# Omniscient Policy / Oracle

We are creating this notebook, to represent an omniscient policy that knows all of the probability distributions. This would be the algorithm which, every step of the way, makes the best decision based on its knowledge of the true distributions. It does not have to learn anything. The oracle has optimal parameters $\theta$, hence it is expected to maximize reward in fewer rounds. 

In [170]:
import numpy as np
import pandas as pd
from scipy.stats import bernoulli
from sklearn import linear_model
# from sklearn.preprocessing import LabelEncoder

# Multi-threading

In [171]:
import threading
from queue import Queue
import time

# This would be our LinUCB class, & exampleJob would be replace by learn. There would be a lock we'll create. 
print("Starting ExampleJobClass")
class ExampleJobClass:
    print_lock = threading.Lock()

    def exampleJob(self, worker):
        time.sleep(.5) # pretend to do some work.
        with self.print_lock:
            print(threading.current_thread().name,worker)

print("About to create ExampleJobClass")
ej_obj = ExampleJobClass()
# This would be a new function we'll create. This is where we would iterate over topics. Call LinkUCB learn method from here
# The threader thread pulls an worker from the queue and processes it
def threader():
    while True:
        # gets an worker from the queue
        worker = q.get()
        # Run the example job with the avail worker in queue (thread)
        ej_obj.exampleJob(worker)
        # completed with the job
        q.task_done()
        
# Create the queue and threader 
q = Queue()

# This would be in the Simulator class to simulate the students created. The loop would be iterated depending on the number of students
# how many threads are we going to allow for
for x in range(10):
    print("Inside for loop : 10 threads created")
    t = threading.Thread(target=threader)

     # classifying as a daemon, so they will die when the main dies
    t.daemon = True

     # begins, must come after daemon definition
    t.start()

start = time.time()

# 20 jobs assigned.
for worker in range(20):
    print("Inside for loop : 20 jobs assigned")
    q.put(worker)

# wait until the thread terminates.
q.join()

# with 10 workers and 20 tasks, with each task being .5 seconds, then the completed job
# is ~1 second using threading. Normally 20 tasks with .5 seconds each would take 10 seconds.
print('Entire job took:',time.time() - start)

Starting ExampleJobClass
About to create ExampleJobClass
Inside for loop : 10 threads created
Inside for loop : 10 threads created
Inside for loop : 10 threads created
Inside for loop : 10 threads created
Inside for loop : 10 threads created
Inside for loop : 10 threads created
Inside for loop : 10 threads created
Inside for loop : 10 threads created
Inside for loop : 10 threads created
Inside for loop : 10 threads created
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for lo

# DataSet

In [172]:
'''
This class holds student data. Its made to have all attributes of the students. StudentContext is meant to takes a subset 
of attributes from this class
'''
class Students:
    '''
    student_data created during data generation
    '''
    def setStudentsFeatures(self , student_data):
        self.studentsFeatures = student_data
    
    def getStudentsFeatures(self):
        return self.studentsFeatures
    
'''
This class holds content data. Its made to have all attributes of contents & topics. ContentContext takes a subset of 
attributes of content & topics
'''
class Content:
        
    def getContentData(self): # Rename to content data
        return self.contentsFeatures
    '''
    courseContent created during data generation
    '''
    def setContentData(self,courseContent):
        self.contentsFeatures = courseContent
        
    def getTopics(self):
        return self.topics
    '''
    topics created during data generation
    '''   
    def setTopics(self,topics):
        self.topics = topics
        
    def getTopicContent(self):
        return self.topicsContent
        
    def setTopicContent(self,topicsContent):
        self.topicsContent = topicsContent

'''
Class that enscapulates student & content data generators. Its uses the StudentDataGen & ContentDataGen to create data. 
'''
class DataGenerator:
    
    def __init__(self):
        self.studentDataGen = StudentDataGen()
        self.contentDataGenerator = ContentDataGen()
        
    def createStudentData(self):
        self.studentData =  self.studentDataGen.create()

    def getStudentData(self):
        return self.studentData
    
    def createContentData(self):
        self.contentsFeatures = self.contentDataGenerator.getContentsFeatures() 
        self.topicContent = self.contentDataGenerator.getTopicContent()
        self.topics = self.contentDataGenerator.getTopics()
        
    def getContentData(self):
        return self.contentsFeatures
    
    def getTopicContent(self):
        return self.topicContent
    
    def getTopics(self):
        return self.topics

'''
This is the student data generator
'''
class StudentDataGen:
    def __init__(self):
        self.number_of_students = 5 # Students taking the course. 
        self.student_context = ['video','audio','reading','kinesthetic'] # Student preferences
        # TO-DO : Have student preferences & probability of having those preferences as a tuple. 
    
    def create(self):
        ## Create Student Context Data
        student_context_df = pd.DataFrame(data=np.random.binomial(1 , [0.7,0.6,0.5,0.4] , 
                            size=(self.number_of_students,len(self.student_context))) , columns = self.student_context)
        return student_context_df
    
'''
This is the content data generator
'''
class ContentDataGen:
    
    def __init__(self):
        self.number_of_topics = 5 # Number of topics in the course
        self.content_context = ['A','B','C','D','E','F'] # Content features. Add meaningful features.
        self.prob_content_context = [0.8,0.7,0.6,0.5,0.4,0.3]
        self.no_contents_per_topic = np.random.randint(2,5,self.number_of_topics) # Variable number of contents per topic.
    
    def create(self):
        all_contents = list()
        all_topics = list()
        topic_content = {}
        for i,j in enumerate(self.no_contents_per_topic):
            topic_id = "T_" + str(i+1) # e.g : T_10
            content_ids = [] # Temporary variable to help map topic to content. 
            for j_1 in range(1,j+1) : # Number of contents
                c_id = 'C_' + str(i+1) + '_' + str(j_1) # e.g : C_10_2 : Content number 2 for topics 10
                content_ids.append(c_id)
                all_contents.append(c_id)
            topic_content[topic_id] = content_ids 
            all_topics.append(topic_id)
        return topic_content , all_topics , all_contents
    
    # Content related features
    def getContentsFeatures(self):
        self.topic_content , self.all_topics , self.all_contents = self.create()
        content_context_df = pd.DataFrame(data=np.random.binomial(1 , self.prob_content_context, 
                             size=(sum(self.no_contents_per_topic),len(self.content_context))) , 
                             columns = self.content_context , index=self.all_contents)
        return content_context_df
    
    def getTopicContent(self):
        return self.topic_content
    
    def getTopics(self):
        return self.all_topics


# Contexts

In [173]:
'''
Context data for learning
'''
class Context:
        
    def getStudentContext(self):
        return self.studentContext
    
    def setStudentContext(self , studentFeatures):
        self.studentContext = studentFeatures
    
    def getContentContext(self):
        return self.contentContext
   
    def setContentContext(self , courseContent):
        self.contentContext = courseContent
        
    def setTopicContext(self,topics):
        self.topicContext=topics
        
    def getTopicContext(self):
        return self.topicContext
        
    def prepareContext(self,studentContext,contentContext):
        context = pd.DataFrame() 
        for content in list(contentContext.index):
            c = pd.Series()
            c = c.append([studentContext,contentContext.loc[content]]) # Combine student & content. 
            c['Content_id'] = content
            context = context.append(c, ignore_index=True)
        context = context.set_index('Content_id')
        return context

# Skip Classifier

In [174]:
# Series 
# student = pd.Series([1,0,1,0] , index=['V','A','R','K'])
# encoded_topic = 10
# pta = 0.5
# next_topic_pta = 0.7

# X = pd.Series()
# X = X.append([student,pd.Series([encoded_topic,pta,next_topic_pta],index=['encoded_topic','pta','next_topic_pta'])])
# type(X.values)
# # X = X.append([encoded_topic,pta,next_topic_pta])
# # X

# label = 1
# type(np.array([label]))
# # pd.Series().append([label])

# prediction = np.array([1])[0]
# prediction

import numpy as np
from sklearn import linear_model
X = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
Y = np.array([1, 1, 2, 2])
clf = linear_model.SGDClassifier(max_iter=1000)
clf.fit(X, Y)
new_X = np.array([[-2,-1]])
print(clf.predict(new_X))
print(clf.decision_function(new_X))

[1]
[-9.6662043]


In [192]:
# Online Stochastic Gradient Descent. This classifier decides whether or not to skip to the next topic. 
# TO-DO : Change loss functions (Log,Hinge,Others) to find if they impact performance. Try different values of parameters 
# For instance SGD has a parameter alpha, SVM has a parameter C. To optimize, you can train a mini-batch of samples, 
# rather than one data point at a time. Try different values of learning_rate . Look at the class_weight parameter if you 
# want to give more weight to samples of one class over the other. Need to understand about warm_start parameter
# We need to record predictions made by the classifier to evaluate its performance over rounds 
from sklearn.exceptions import NotFittedError
class SkipClassifier:
    
    def __init__(self):
        self.clf = linear_model.SGDClassifier()
        
    def check_fitted(self,clf): 
        return hasattr(clf, "classes_")
    
    def train(self,student,pta,next_topic_pta,label):
        X = pd.Series()
        X = X.append([student,pd.Series([pta,next_topic_pta],index=['pta','next_topic_pta'])])
        X = np.array([X.values])
#         Y = pd.Series().append([label])
        Y = np.array([label])
        clf = self.clf.partial_fit(X,Y,classes=np.array([0,1]))
        
        
    def predict(self,student,pta,next_topic_pta):
        # next_topic_encoded : We're not using it, as it'll influence the decision incorrectly, based on label assigned to it. 
        # For e.g : Labels with high encoded values would have more influence on the decision, than labels which have lower 
        # encoded values. We should instead using one hot encoding for labels. 
        X = pd.Series()
        X = X.append([student,pd.Series([pta,next_topic_pta],index=['pta','next_topic_pta'])])
#         try:
        if self.check_fitted(self.clf):
            Y = self.clf.predict([X.values])[0]
            confidence_score = self.clf.decision_function([X.values])[0]
        else:
            Y = 0
            confidence_score = 0
#         except NotFitterError as e:
#             print(repr(e))
            
        #print('Skip Prediction : {0} & Confidence Score : {1}'.format(Y,confidence_score))
        # Get confidence score 
        return Y , confidence_score 

In [193]:
# test_var is accessible outside the scope of 'if' block

# if True:
#     test_var = 1
# else:
#     print('nothing')
# print(test_var)


# skip_decision = 0
# confidence_score = 70
# confidence_threshold = 60
# if skip_decision and confidence_score < confidence_threshold:
#     print('We\'re changing our decision')
# else:
#     print('We\'re sticking to our decision')

# skip_decision = 1
# actual_payoff = 0
# if skip_decision == 0 and actual_payoff == 0:
#     label_ = 1
# else:
#     label_ = 0
# print(label_)
    

In [194]:
class SkipTopic:
    
    def __init__(self):
        self.skipClassifier = SkipClassifier()
        self.confidence_threshold = 60 # It the confidence score returned by the classifier is greater than this, then we trust in the decision made by the classifier. 
        self.skipTopic_lock = threading.Lock()
        #le = LabelEncoder().fit(all_topics)  
                
    #def skipTopic(self, student_number , student, expected_payoff, topic_number, topicContext, all_topic_contents, context_obj):
    def skipTopic(self,student,pta,topic_number,context_obj,topic_content,oracle):
#         with self.skipTopic_lock:
        contentContext = context_obj.getContentContext() # Get the content dataframe.
#         print("contentContext : " , contentContext )
        topicContext = context_obj.getTopicContext() # Get the topic list. 
        current_topic_index = topicContext.index(topic_number) # Get the index number of the current topic
        next_topic_index = current_topic_index + 1
        next_topic = '' # Initialized to make it accessible outside the if statement. 
        if next_topic_index < len(topicContext): # Check to see if we're going out of bounds
            next_topic = topicContext[next_topic_index]
            next_topic_contents = topic_content[next_topic]
            t_c = contentContext.loc[next_topic_contents]
            X = context_obj.prepareContext(student,t_c)
            arm_pulled , next_topic_pta = oracle.expectedPayoff(X,next_topic_contents)
        else:
            # Will be going out of bounds. Current topic is the last topic. No more topics to complete. 
            next_topic_pta = 0
            #return # There is no topic to skip to. OR is might be better to exit the system. 
        skip_decision , confidence_score = self.skipClassifier.predict(student,pta,next_topic_pta)
        if skip_decision and confidence_score < self.confidence_threshold:
    #             with self.skipTopic_lock:
            print('Actual decision made by classifier : ' , skip_decision)
            print('Confidence score returned is {0}, which is less than threshold {1}'.format(confidence_score , self.confidence_threshold))
            skip_decision = 0
        #skip_decision = self.skipClassifier.predict(student,le.transform(next_topic),pta,next_topic_pta)
        return skip_decision,next_topic_pta
        
        
#         self.oracle.expectedPayoff(student_number,student,next_topic,topicContext,all_topic_contents,t_c,self.contexts)
    def setLabel(self,skip_decision,actual_payoff):
        if skip_decision == 0 and actual_payoff == 0:
            label = 1
        if skip_decision == 0 and actual_payoff == 1:
            label = 0
        return label
    
    def train(self,student,pta,pta_next_topic,label):
        self.skipClassifier.train(student,pta,pta_next_topic,label)    
            

# Omniscient Policy / Oracle

In [195]:
class Oracle : 
    '''
    arms: Content ids
    contexts: Features
    '''
    def __init__(self):
        self.rounds = 0 # Number of round played
        self.rounds_data = pd.DataFrame() # Rounds data required for Skip Algorithm
        self.oracle_lock = threading.Lock()
        self.skipTopic = SkipTopic()
        
    def setParameters(self, features , arms):
        parameters = np.random.uniform(size=(len(arms) , len(features)))
        # Normalize parameters
        for i in range(parameters.shape[0]): # Have it in a list comprehension.
            parameters[i] = parameters[i] / np.sum(parameters[i])
        self.theta_df = pd.DataFrame(data = parameters ,  index = arms , columns = features , dtype= np.float)
    
    def expectedPayoff(self,contexts,arms):
            arms_payoff = list()
            for arm in arms:
                arm_theta = self.theta_df.loc[arm]
                X = contexts.loc[arm]
#                 print('X.type {0} , X.shape {1} , X = {2} '.format(type(X),X.shape,X))
#                 print('arm_theta.type {0} , arm_theta.shape {1} , arm_theta = {2}  : '.format(type(arm_theta),arm_theta.shape,arm_theta))
                pta = pd.Series.dot(X,arm_theta) # Vector dim : (1 * d) (d * 1).
                arms_payoff.append(pta)
            arm_index = np.argmax(arms_payoff)
            arm_pulled = arms[arm_index]
            expected_payoff = np.max(arms_payoff)
            return arm_pulled,expected_payoff
            # Skip Decision happens up here
            
            
#             skip-decision , next_round_expected_payoff = self.skipTopic.skipTopic(student_number,student,expected_payoff,topic_number,topicContext,all_topic_contents,context_obj)
#             # Skip Decision Taken, before we spin a bernoulli. 
#             reward = bernoulli.rvs(size=1,p=expected_payoff)[0] # Simulate student's response
#             with self.write_lock:
#                 print('That round was completed by : ', student_number)
#                 self.rounds += 1
#             if reward != 1:
#                 arm_index = np.argmax(arms_payoff)
#                 arm_pulled = arms[arm_index]
#                 arms.remove(arm_pulled)
#             else:
#                 break
            
        
        
    
#     def getBestArm(self,X,arms):
        
#         while arms:
#             arms_payoff = list()
#             for arm in arms:
#                 arm_theta = self.theta_df.loc[arm_id]
#                 pta = pd.Series.dot(X,arm_theta) # Vector dim : (1 * d) (d * 1).
#                 arms_payoff.append(pta)
#             expected_payoff = np.max(arms_payoff)
#             arm_index = np.argmax(arms_payoff)
#             arm_pulled = arms[arm_index]
        
    '''
    X: Context information. 
    arm_id: Id of the arm pulled. 
    '''
    #def getReward(self,X,arm_id):
    # For each of the arms, find the one which maximizes reward. 
    # Send it to Bernoulli, to find reward
    # 
#     def getReward(self,X,arm_id):
#         arm_theta = self.theta_df.loc[arm_id] #Get parameters for the arm predicted by the learning algo
#         print('X.type {0} , X.shape {1} , X = {2} '.format(type(X),X.shape,X))
#         print('arm_theta.type {0} , arm_theta.shape {1} , arm_theta = {2}  : '.format(type(arm_theta),arm_theta.shape,arm_theta))
#         expected_reward = pd.Series.dot(X,arm_theta) # Vector dim : (1 * d) (d * 1).
#         print('expected_reward : ', expected_reward)
#         reward = bernoulli.rvs(size=1,p=expected_reward)[0] # Simulate student's response
#         print('Actual Reward : ', reward)
#         return reward

# Simulator

In [208]:
class Simulator:
    
    def __init__(self):
        self.dataGenerator = DataGenerator()
        self.dataGenerator.createStudentData()
        self.dataGenerator.createContentData()
        self.students = Students()
        self.students.setStudentsFeatures(self.dataGenerator.getStudentData())
        self.contents = Content()
        self.contents.setContentData(self.dataGenerator.getContentData())
        self.contents.setTopics(self.dataGenerator.getTopics())
        self.contents.setTopicContent(self.dataGenerator.getTopicContent())
        self.contexts = Context()
        self.contexts.setStudentContext(self.students.getStudentsFeatures())
        self.contexts.setContentContext(self.contents.getContentData())
        self.contexts.setTopicContext(self.contents.getTopics())
        self.oracle = Oracle()
        self.skipTopic = SkipTopic()
        self.simulator_lock = threading.Lock()
        self.rounds=0

    def getPayoff(self,pta):
        reward = bernoulli.rvs(size=1,p=pta)[0] # Simulate student's response
        with self.simulator_lock:
#             print('That round was completed by : ', student_number)
            self.oracle.rounds += 1
            print('Actual Reward : ', reward)        
        return reward
    
    def takeCourse(self,student_number,student,contentContext,topicContext,topic_content):
        for i in topicContext:
            contents = topic_content[i] # You now have all arm associated with the topic 't'
            t_c = contentContext.loc[contents]
            #print('takeCourse : Student Number taking course : ', student_number)
            contexts = self.contexts.prepareContext(student,t_c)
            arms = list(t_c.index)
            while arms:
                #self.oracle.expectedPayoff(student_number,student,i,topicContext,topic_content,t_c,self.contexts)
#                 with self.simulator_lock:
                arm , pta = self.oracle.expectedPayoff(contexts,arms)
                        # Skip-topic
                skip_decision , pta_next_topic = self.skipTopic.skipTopic(student,pta,i,self.contexts,topic_content,self.oracle)
                        # Reward
                if skip_decision:
                    break # Decision is to skip. Hence, we won't pull the arm. 
                else:
                    actual_payoff = self.getPayoff(pta)
                    with self.simulator_lock:
                        self.rounds+=1
                    print('Completed topic {0} for student {1}'.format(i,student_number))
                    label = self.skipTopic.setLabel(skip_decision,actual_payoff) # Set Label
                    self.skipTopic.train(student,pta,pta_next_topic,label)
                # Train
                if actual_payoff != 1:
                    arms.remove(arm)
                else:
                    break # Move to the next topic 

    def main(self):
        studentContext = self.contexts.getStudentContext() # Student dataframe
        contentContext = self.contexts.getContentContext() # Content Dataframe
        topicContext = self.contexts.getTopicContext() # List of topics. 
        topic_content = self.contents.getTopicContent() # Topics Data, which includes topics to content mapping.
        features = list(studentContext.columns) + list(contentContext.columns)
        self.oracle.setParameters(features , contentContext.index)  
        for student_number , student in studentContext.iterrows():
            #self.takeCourse(student_number,student,contentContext,topicContext,topic_content)
            t = threading.Thread(target=self.takeCourse, args=(student_number,student,contentContext,topicContext,topic_content))
            t.daemon = True # classifying as a daemon, so they will die when the main dies
            t.start() # begins, must come after daemon definition
        t.join()
        print('Total Number of rounds : ', self.rounds)  
#             for i in topics:
#                 content = topics[i] # You now have all arm associated with the topic 't'
# #                 X = pd.DataFrame()
#                 topic_contents = contentContext.loc[content]
#                 self.oracle.expectedPayoff(student,topic_contents,self.contexts)
#         print('Rounds Data: ')
# #         print(self.linUCB.getRoundsData())
#         print('Total Number of rounds : ', self.oracle.rounds)

simulator = Simulator()
simulator.main()
# print('Rounds Data:')
# #         print(self.linUCB.getRoundsData())
# print('Total Number of rounds : ', self.rounds)  

Actual Reward :  1
Completed topic T_1 for student 1
Actual Reward :  0
Completed topic T_1 for student 0
Actual Reward :  0
Completed topic T_1 for student 2
Actual Reward :  0
Completed topic T_1 for student 4
Actual decision made by classifier :  1
Confidence score returned is 2.6302195640648014, which is less than threshold 60
Actual Reward :  1
Completed topic T_1 for student 3
Actual decision made by classifier :  1
Confidence score returned is 12.631661498288722, which is less than threshold 60
Actual Reward :  1
Completed topic T_1 for student 0
Actual Reward :  0
Completed topic T_2 for student 1
Actual decision made by classifier :  1
Confidence score returned is 36.181139531991064, which is less than threshold 60
Actual Reward :  1
Completed topic T_1 for student 2
Actual decision made by classifier :  1
Confidence score returned is 23.261036270098366, which is less than threshold 60
Actual Reward :  1
Completed topic T_1 for student 4
Actual Reward :  1
Completed topic T_2 

In [200]:
'print'

'print'