# Omniscient Policy / Oracle

This notebook represents an omniscient policy that knows all of the probability distributions. This algorithm knows, every step of the way, the best decision based on its knowledge of the true distributions. It does not have to learn anything. The oracle has optimal parameters $\theta$, hence it is expected to maximize reward in fewer rounds. 

In [5]:
import numpy as np
import pandas as pd
from scipy.stats import bernoulli



In [6]:
# TO-DO : 1) Ability to print, execute statements after the threads have completed execution. Currently, the main thread runs
# the student threads & goes about with its own business. I want, in certain situations for the main thread to wait for the 
# student threads before it executes other statements. HOME-WORK


# Multi-threading

In [7]:
import threading
from queue import Queue
import time

# This would be our LinUCB class, & exampleJob would be replace by learn. There would be a lock we'll create. 
print("Starting ExampleJobClass")
class ExampleJobClass:
    print_lock = threading.Lock()

    def exampleJob(self, worker):
        time.sleep(.5) # pretend to do some work.
        with self.print_lock:
            print(threading.current_thread().name,worker)

print("About to create ExampleJobClass")
ej_obj = ExampleJobClass()
# This would be a new function we'll create. This is where we would iterate over topics. Call LinkUCB learn method from here
# The threader thread pulls an worker from the queue and processes it
def threader():
    while True:
        # gets an worker from the queue
        worker = q.get()
        # Run the example job with the avail worker in queue (thread)
        ej_obj.exampleJob(worker)
        # completed with the job
        q.task_done()
        
# Create the queue and threader 
q = Queue()

# This would be in the Simulator class to simulate the students created. The loop would be iterated depending on the number of students
# how many threads are we going to allow for
for x in range(10):
    print("Inside for loop : 10 threads created")
    t = threading.Thread(target=threader)

     # classifying as a daemon, so they will die when the main dies
    t.daemon = True

     # begins, must come after daemon definition
    t.start()

start = time.time()

# 20 jobs assigned.
for worker in range(20):
    print("Inside for loop : 20 jobs assigned")
    q.put(worker)

# wait until the thread terminates.
q.join()

# with 10 workers and 20 tasks, with each task being .5 seconds, then the completed job
# is ~1 second using threading. Normally 20 tasks with .5 seconds each would take 10 seconds.
print('Entire job took:',time.time() - start)

Starting ExampleJobClass
About to create ExampleJobClass
Inside for loop : 10 threads created
Inside for loop : 10 threads created
Inside for loop : 10 threads created
Inside for loop : 10 threads created
Inside for loop : 10 threads created
Inside for loop : 10 threads created
Inside for loop : 10 threads created
Inside for loop : 10 threads created
Inside for loop : 10 threads created
Inside for loop : 10 threads created
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for lo

# DataSet

In [8]:
'''
This class holds student data. Its made to have all attributes of the students. StudentContext is meant to takes a subset 
of attributes from this class
'''
class Students:
    '''
    student_data created during data generation
    '''
    def setStudentsFeatures(self , student_data):
        self.studentsFeatures = student_data
    
    def getStudentsFeatures(self):
        return self.studentsFeatures
    
'''
This class holds content data. Its made to have all attributes of contents & topics. ContentContext takes a subset of 
attributes of content & topics
'''
class Content:
        
    def getContentData(self): # Rename to content data
        return self.contentsFeatures
    '''
    courseContent created during data generation
    '''
    def setContentData(self,courseContent):
        self.contentsFeatures = courseContent
        
    def getTopics(self):
        return self.topics
    '''
    topics created during data generation
    '''   
    def setTopics(self,topics):
        self.topics = topics
        
    def getTopicContent(self):
        return self.topicsContent
        
    def setTopicContent(self,topicsContent):
        self.topicsContent = topicsContent

'''
Class that enscapulates student & content data generators. Its uses the StudentDataGen & ContentDataGen to create data. 
'''
class DataGenerator:
    
    def __init__(self):
        self.studentDataGen = StudentDataGen()
        self.contentDataGenerator = ContentDataGen()
        
    def createStudentData(self):
        self.studentData =  self.studentDataGen.create()

    def getStudentData(self):
        return self.studentData
    
    def createContentData(self):
        self.contentsFeatures = self.contentDataGenerator.getContentsFeatures() 
        self.topicContent = self.contentDataGenerator.getTopicContent()
        self.topics = self.contentDataGenerator.getTopics()
        
    def getContentData(self):
        return self.contentsFeatures
    
    def getTopicContent(self):
        return self.topicContent
    
    def getTopics(self):
        return self.topics

'''
This is the student data generator
'''
class StudentDataGen:
    def __init__(self):
        self.number_of_students = 5 # Students taking the course. 
        self.student_context = ['video','audio','reading','kinesthetic'] # Student preferences
        # TO-DO : Have student preferences & probability of having those preferences as a tuple. 
    
    def create(self):
        ## Create Student Context Data
        student_context_df = pd.DataFrame(data=np.random.binomial(1 , [0.7,0.6,0.5,0.4] , 
                            size=(self.number_of_students,len(self.student_context))) , columns = self.student_context)
        return student_context_df
    
'''
This is the content data generator
'''
class ContentDataGen:
    
    def __init__(self):
        self.number_of_topics = 5 # Number of topics in the course
        self.content_context = ['A','B','C','D','E','F'] # Content features. Add meaningful features.
        self.prob_content_context = [0.8,0.7,0.6,0.5,0.4,0.3]
        self.no_contents_per_topic = np.random.randint(2,5,self.number_of_topics) # Variable number of contents per topic.
    
    def create(self):
        all_contents = list()
        all_topics = list()
        topic_content = {}
        for i,j in enumerate(self.no_contents_per_topic):
            topic_id = "T_" + str(i+1) # e.g : T_10
            content_ids = [] # Temporary variable to help map topic to content. 
            for j_1 in range(1,j+1) : # Number of contents
                c_id = 'C_' + str(i+1) + '_' + str(j_1) # e.g : C_10_2 : Content number 2 for topics 10
                content_ids.append(c_id)
                all_contents.append(c_id)
            topic_content[topic_id] = content_ids 
            all_topics.append(topic_id)
        return topic_content , all_topics , all_contents
    
    # Content related features
    def getContentsFeatures(self):
        self.topic_content , self.all_topics , self.all_contents = self.create()
        content_context_df = pd.DataFrame(data=np.random.binomial(1 , self.prob_content_context, 
                             size=(sum(self.no_contents_per_topic),len(self.content_context))) , 
                             columns = self.content_context , index=self.all_contents)
        return content_context_df
    
    def getTopicContent(self):
        return self.topic_content
    
    def getTopics(self):
        return self.all_topics


# Contexts

In [15]:
import pickle
'''
Context data for learning
'''
class Context:
    
    # Student Preferences: 
    # Visual (S_V) , Text (S_T) , Demo-based (S_D) , Practical (S_P), Step-by-step (S_S) ,Activity / Task based (S_AT), 
    # Lecture (S_L) , Audio (S_A) , Self-evaluation (S_SE) , Pre-assessment (S_PA)
    # Students preference to learning via various ways can also be evaluated on a scale of 10, rather being binary. 
    def getStudentContext(self):
        return self.studentContext
    
    def setStudentContext(self):
         with open('student.pickle', 'rb') as student_file:
            self.studentContext= pickle.load(student_file)
    
    # Content Features 
    # Ease of understanding (C_E) , Simple / Intuitive (C_I) , Surface / In-depth (C_ID) , Brief / Concise (C_C), 
    # Thorough (C_T), Preference / Well reviewed / Well rated (C_R) , Theoritical / Abstract (C_A), 
    # Practical / Hands on (C_P), Experimental / Task-based (C_ETB)
    # Content preference to learning via various ways can also be evaluated on a scale of 10, rather being binary. 
    def getContentContext(self):
        return self.contentContext
   
    def setContentContext(self):
        with open('content.pickle', 'rb') as content_file:
            self.contentContext= pickle.load(content_file)
        
    def getTopic(self):
        return self.topic
    
    def setTopic(self):
        with open('topic.pickle', 'rb') as topic_file:
            self.topic = pickle.load(topic_file)
    
    def getTopicContent(self):
        return self.topic_content
    
    def setTopicContent(self):
        with open('topic_content.pickle', 'rb') as topic_content_file:
            self.topic_content= pickle.load(topic_content_file)
                
    def prepareContext(self,studentContext,contentContext):
        context = pd.DataFrame() 
        for content in list(contentContext.index):
            c = pd.Series()
            c = c.append([studentContext,contentContext.loc[content]]) # Combine student & content. 
            c['Content_id'] = content
            context = context.append(c, ignore_index=True)
        context = context.set_index('Content_id')
        return context
    
    def loadData(self):
        self.setStudentContext()
        self.setContentContext()
        self.setTopic()
        self.setTopicContent()
        print(self.getStudentContext())
        print(type(self.getStudentContext()))
        print('*********************************')
        print(self.getContentContext())
        print(type(self.getContentContext()))
        print('*********************************')
        print(self.getTopic())
        print(type(self.getTopic()))
        print('*********************************')
        print(self.getTopicContent())
        print(type(self.getTopicContent()))
                
c_test = Context()
c_test.loadData()

    S_V   S_T   S_D   S_P   S_S  S_AT   S_L   S_A  S_SE  S_PA
0  0.57  0.90  0.41  0.21  0.71  0.33  0.03  0.68  0.42  0.22
1  0.47  0.02  0.65  0.23  0.68  0.67  0.80  0.40  0.10  0.85
2  0.84  0.93  0.30  0.93  0.91  0.58  0.01  0.27  0.43  0.89
3  0.80  0.98  0.66  0.02  0.10  0.15  0.67  0.18  0.22  0.75
4  0.83  0.22  0.28  0.51  0.68  0.06  0.73  0.55  0.90  0.08
<class 'pandas.core.frame.DataFrame'>
*********************************
        C_E   C_I  C_ID   C_C   C_T   C_R   C_A   C_P  C_ETB
C_1_1  0.27  0.59  0.56  0.42  0.82  0.27  0.84  0.22   0.75
C_1_2  0.14  0.41  0.80  0.91  0.41  0.84  0.09  0.70   0.73
C_1_3  0.48  0.77  0.98  0.86  0.75  0.90  0.79  0.11   0.32
C_1_4  0.48  0.64  0.35  0.63  0.45  0.10  0.09  0.83   0.11
C_2_1  0.97  0.92  0.99  0.22  0.90  0.34  0.43  0.50   0.27
C_2_2  0.52  0.00  0.40  0.97  0.21  0.80  0.73  0.41   0.11
C_2_3  0.58  0.83  0.01  0.74  0.51  0.31  0.98  0.83   0.99
C_3_1  0.76  0.02  0.53  0.23  0.62  0.11  0.50  0.02   0.04
C_3_2  

# Skip Classifier

In [16]:
# Online Stochastic Gradient Descent. This classifier decides whether or not to skip to the next topic. 
# TO-DO : Change loss functions (Log,Hinge,Others) to find if they impact performance. Try different values of parameters 
# For instance SGD has a parameter alpha, SVM has a parameter C. To optimize, you can train a mini-batch of samples, 
# rather than one data point at a time. Try different values of learning_rate . Look at the class_weight parameter if you 
# want to give more weight to samples of one class over the other. Need to understand about warm_start parameter
# We need to record predictions made by the classifier to evaluate its performance over rounds.
from sklearn import linear_model
from sklearn.exceptions import NotFittedError
class SkipClassifier:
    
    def __init__(self):
        self.clf = linear_model.SGDClassifier()
        
    def check_fitted(self,clf): 
        return hasattr(clf, "classes_")
    
    def train(self,student,pta,next_topic_pta,label):
        X = pd.Series()
        X = X.append([student,pd.Series([pta,next_topic_pta],index=['pta','next_topic_pta'])])
        X = np.array([X.values])
        Y = np.array([label])
        clf = self.clf.partial_fit(X,Y,classes=np.array([0,1]))
        
        
    def predict(self,student,pta,next_topic_pta):
        X = pd.Series()
        X = X.append([student,pd.Series([pta,next_topic_pta],index=['pta','next_topic_pta'])])
        if self.check_fitted(self.clf):
            Y = self.clf.predict([X.values])[0]
            confidence_score = self.clf.decision_function([X.values])[0]
        else:
            Y = 0
            confidence_score = 0
        #print('Skip Prediction : {0} & Confidence Score : {1}'.format(Y,confidence_score))
        # Get confidence score 
        return Y , confidence_score 

In [19]:
class SkipTopic:
    def __init__(self):
        self.skipClassifier = SkipClassifier()
        self.confidence_threshold = 60 # It the confidence score returned by the classifier is greater than this, then we trust in the decision made by the classifier. 
                
    def skipTopic(self,student,pta,topic_number,context_obj,topic_content,oracle):
        contentContext = context_obj.getContentContext() # Get the content dataframe.
#         print("contentContext : " , contentContext )
        topic = context_obj.getTopic() # Get the topic list. 
        current_topic_index = topic.index(topic_number) # Get the index number of the current topic
        next_topic_index = current_topic_index + 1
        next_topic = '' # Initialized to make it accessible outside the if statement. 
        if next_topic_index < len(topic): # Check to see if we're going out of bounds
            next_topic = topic[next_topic_index]
            next_topic_contents = topic_content[next_topic]
            t_c = contentContext.loc[next_topic_contents]
            X = context_obj.prepareContext(student,t_c)
            arm_pulled , next_topic_pta = oracle.expectedPayoff(X,next_topic_contents)
        else:
            # Will be going out of bounds. Current topic is the last topic. No more topics to complete. 
            next_topic_pta = 0
            #return # There is no topic to skip to. OR is might be better to exit the system. 
        skip_decision , confidence_score = self.skipClassifier.predict(student,pta,next_topic_pta)
        if skip_decision and confidence_score < self.confidence_threshold:
    #             with self.skipTopic_lock:
            print('Actual decision made by classifier : ' , skip_decision)
            print('Confidence score returned is {0}, which is less than threshold {1}'.format(confidence_score , self.confidence_threshold))
            skip_decision = 0
        #skip_decision = self.skipClassifier.predict(student,le.transform(next_topic),pta,next_topic_pta)
        return skip_decision,next_topic_pta

    def setLabel(self,skip_decision,actual_payoff):
        if skip_decision == 0 and actual_payoff == 0:
            label = 1
        if skip_decision == 0 and actual_payoff == 1:
            label = 0
        return label
    
    def train(self,student,pta,pta_next_topic,label):
        self.skipClassifier.train(student,pta,pta_next_topic,label)    
            

# Omniscient Policy / Oracle

In [20]:
class Oracle : 
    '''
    arms: Content ids
    contexts: Features
    '''
    def __init__(self):
        self.rounds = 0 # Number of round played
        self.rounds_data = pd.DataFrame() # Rounds data required for Skip Algorithm
        self.oracle_lock = threading.Lock()
        self.skipTopic = SkipTopic()
    
    def setParameters(self, features , arms): # Setting optimal parameter theta
        parameters = np.random.uniform(size=(len(arms) , len(features)))
        # Normalize parameters
        for i in range(parameters.shape[0]): # Have it in a list comprehension.
            parameters[i] = parameters[i] / np.sum(parameters[i])
        self.theta_df = pd.DataFrame(data = parameters ,  index = arms , columns = features , dtype= np.float)
    
    def expectedPayoff(self,contexts,arms):
        arms_payoff = list()
        for arm in arms:
            arm_theta = self.theta_df.loc[arm]
            X = contexts.loc[arm]
#                 print('X.type {0} , X.shape {1} , X = {2} '.format(type(X),X.shape,X))
#                 print('arm_theta.type {0} , arm_theta.shape {1} , arm_theta = {2}  : '.format(type(arm_theta),arm_theta.shape,arm_theta))
            pta = pd.Series.dot(X,arm_theta) # Vector dim : (1 * d) (d * 1).
            arms_payoff.append(pta)
        arm_index = np.argmax(arms_payoff)
        arm_pulled = arms[arm_index]
        expected_payoff = np.max(arms_payoff)
        return arm_pulled,expected_payoff

# Simulator

In [22]:
class Simulator:
    
    def __init__(self):
#         self.dataGenerator = DataGenerator()
#         self.dataGenerator.createStudentData()
#         self.dataGenerator.createContentData()
#         self.students = Students()
#         self.students.setStudentsFeatures(self.dataGenerator.getStudentData())
#         self.contents = Content()
#         self.contents.setContentData(self.dataGenerator.getContentData())
#         self.contents.setTopics(self.dataGenerator.getTopics())
#         self.contents.setTopicContent(self.dataGenerator.getTopicContent())
        
        self.context = Context()
        self.context.setStudentContext()
        self.context.setContentContext()
        self.context.setTopic()
        self.context.setTopicContent()
        self.oracle = Oracle()
        self.skipTopic = SkipTopic()
        self.simulator_lock = threading.Lock()
        self.rounds=0

    def getPayoff(self,pta):
        reward = bernoulli.rvs(size=1,p=pta)[0] # Simulate student's response
        with self.simulator_lock:
            self.rounds += 1
            print('Actual Reward : ', reward)        
        return reward
    
    def takeCourse(self,student_number,student,contentContext,topic,topic_content):
        for i in topic:
            contents = topic_content[i] # You now have all arm associated with the topic 't'
            t_c = contentContext.loc[contents]
            with self.simulator_lock:
                print('Student Number is on topic  : ', i)
            contexts = self.context.prepareContext(student,t_c)
            arms = list(t_c.index)
            while arms:
                arm , pta = self.oracle.expectedPayoff(contexts,arms)
                skip_decision , pta_next_topic = self.skipTopic.skipTopic(student,pta,i,self.context,topic_content,self.oracle)
                if skip_decision:
                    break # Decision is to skip. Hence, we won't pull the arm. 
                else:
                    actual_payoff = self.getPayoff(pta)
                    with self.simulator_lock:
                        print('topic {0} completed by :{1} with reward {2} '.format(i,student_number,actual_payoff))
                        self.rounds+=1
                    label = self.skipTopic.setLabel(skip_decision,actual_payoff) # Set Label
                    self.skipTopic.train(student,pta,pta_next_topic,label)
                if actual_payoff != 1:
                    arms.remove(arm)
                else:
                    break # Move to the next topic 

    def main(self):
        studentContext = self.context.getStudentContext() # Student dataframe
        contentContext = self.context.getContentContext() # Content Dataframe
        topic = self.context.getTopic() # List of topics. 
        topic_content = self.context.getTopicContent() # Topics Data, which includes topics to content mapping.
        features = list(studentContext.columns) + list(contentContext.columns)
        self.oracle.setParameters(features , contentContext.index)  
        for student_number , student in studentContext.iterrows():
            t = threading.Thread(target=self.takeCourse, args=(student_number,student,contentContext,topic,topic_content))
            t.daemon = True # classifying as a daemon, so they will die when the main dies
            t.start() # begins, must come after daemon definition
#         t.join()
        print('Total Number of rounds : ', self.rounds)  
    
simulator = Simulator()
simulator.main()

Student Number is on topic  :  T_1
Student Number is on topic  :  T_1
Student Number is on topic  :  T_1
Student Number is on topic  :  T_1
Total Number of rounds :  0
Student Number is on topic  :  T_1
Actual Reward :  0
topic T_1 completed by :1 with reward 0 
Actual decision made by classifier :  1
Confidence score returned is 34.74350650764082, which is less than threshold 60
Actual Reward :  1
topic T_1 completed by :0 with reward 1 
Student Number is on topic  :  T_2
Actual decision made by classifier :  1
Confidence score returned is 1.0340119604543025, which is less than threshold 60
Actual Reward :  0
topic T_1 completed by :3 with reward 0 
Actual decision made by classifier :  1
Confidence score returned is 12.833669724063064, which is less than threshold 60
Actual Reward :  1
topic T_1 completed by :1 with reward 1 
Student Number is on topic  :  T_2
Actual Reward :  0
topic T_2 completed by :0 with reward 0 
Actual Reward :  0
topic T_1 completed by :4 with reward 0 
Actua