# Omniscient Policy / Oracle

We are creating this notebook, to represent an omniscient policy that knows all of the probability distributions. This would be the algorithm which, every step of the way, makes the best decision based on its knowledge of the true distributions. It does not have to learn anything. The oracle has optimal parameters $\theta$, hence it is expected to maximize reward in fewer rounds. 

In [1]:
import numpy as np
import pandas as pd
from scipy.stats import bernoulli

# Multi-threading

In [2]:
import threading
from queue import Queue
import time

# This would be our LinUCB class, & exampleJob would be replace by learn. There would be a lock we'll create. 
print("Starting ExampleJobClass")
class ExampleJobClass:
    print_lock = threading.Lock()

    def exampleJob(self, worker):
        time.sleep(.5) # pretend to do some work.
        with self.print_lock:
            print(threading.current_thread().name,worker)

print("About to create ExampleJobClass")
ej_obj = ExampleJobClass()
# This would be a new function we'll create. This is where we would iterate over topics. Call LinkUCB learn method from here
# The threader thread pulls an worker from the queue and processes it
def threader():
    while True:
        # gets an worker from the queue
        worker = q.get()
        # Run the example job with the avail worker in queue (thread)
        ej_obj.exampleJob(worker)
        # completed with the job
        q.task_done()
        
# Create the queue and threader 
q = Queue()

# This would be in the Simulator class to simulate the students created. The loop would be iterated depending on the number of students
# how many threads are we going to allow for
for x in range(10):
    print("Inside for loop : 10 threads created")
    t = threading.Thread(target=threader)

     # classifying as a daemon, so they will die when the main dies
    t.daemon = True

     # begins, must come after daemon definition
    t.start()

start = time.time()

# 20 jobs assigned.
for worker in range(20):
    print("Inside for loop : 20 jobs assigned")
    q.put(worker)

# wait until the thread terminates.
q.join()

# with 10 workers and 20 tasks, with each task being .5 seconds, then the completed job
# is ~1 second using threading. Normally 20 tasks with .5 seconds each would take 10 seconds.
print('Entire job took:',time.time() - start)

Starting ExampleJobClass
About to create ExampleJobClass
Inside for loop : 10 threads created
Inside for loop : 10 threads created
Inside for loop : 10 threads created
Inside for loop : 10 threads created
Inside for loop : 10 threads created
Inside for loop : 10 threads created
Inside for loop : 10 threads created
Inside for loop : 10 threads created
Inside for loop : 10 threads created
Inside for loop : 10 threads created
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for loop : 20 jobs assigned
Inside for lo

# DataSet

In [9]:
'''
This class holds student data. Its made to have all attributes of the students. StudentContext is meant to takes a subset 
of attributes from this class
'''
class Students:
    '''
    student_data created during data generation
    '''
    def setStudentsFeatures(self , student_data):
        self.studentsFeatures = student_data
    
    def getStudentsFeatures(self):
        return self.studentsFeatures
    
'''
This class holds content data. Its made to have all attributes of contents & topics. ContentContext takes a subset of 
attributes of content & topics
'''
class Content:
        
    def getContentData(self): # Rename to content data
        return self.contentsFeatures
    '''
    courseContent created during data generation
    '''
    def setContentData(self,courseContent):
        self.contentsFeatures = courseContent
        
    def getTopics(self):
        return self.topicContent
    '''
    topics created during data generation
    '''   
    def setTopics(self,topics):
        self.topicContent = topics

'''
Class that enscapulates student & content data generators. Its uses the StudentDataGen & ContentDataGen to create data. 
'''
class DataGenerator:
    
    def __init__(self):
        self.studentDataGen = StudentDataGen()
        self.contentDataGenerator = ContentDataGen()
        
    def createStudentData(self):
        self.studentData =  self.studentDataGen.create()

    def getStudentData(self):
        return self.studentData
    
    def createContentData(self):
        self.contentsFeatures = self.contentDataGenerator.getContentsFeatures() 
        self.topicContent = self.contentDataGenerator.getTopicContent()
        
    def getContentData(self):
        return self.contentsFeatures
    
    def getTopicData(self):
        return self.topicContent

'''
This is the student data generator
'''
class StudentDataGen:
    def __init__(self):
        self.number_of_students = 2 # Students taking the course. 
        self.student_context = ['video','audio','reading','kinesthetic'] # Student preferences
        # TO-DO : Have student preferences & probability of having those preferences as a tuple. 
    
    def create(self):
        ## Create Student Context Data
        student_context_df = pd.DataFrame(data=np.random.binomial(1 , [0.7,0.6,0.5,0.4] , 
                            size=(self.number_of_students,len(self.student_context))) , columns = self.student_context)
        return student_context_df
    
'''
This is the content data generator
'''
class ContentDataGen:
    
    def __init__(self):
        self.number_of_topics = 5 # Number of topics in the course
        self.content_context = ['A','B','C','D','E','F'] # Content features. Add meaningful features.
        self.prob_content_context = [0.8,0.7,0.6,0.5,0.4,0.3]
        self.no_contents_per_topic = np.random.randint(2,5,self.number_of_topics) # Variable number of contents per topic.
    
    def create(self):
        all_contents = list()
        topic_content = {}
        for i,j in enumerate(self.no_contents_per_topic):
            topic_id = "T_" + str(i+1) # e.g : T_10
            content_ids = [] # Temporary variable to help map topic to content. 
            for j_1 in range(1,j+1) : # Number of contents
                c_id = 'C_' + str(i+1) + '_' + str(j_1) # e.g : C_10_2 : Content number 2 for topics 10
                content_ids.append(c_id)
                all_contents.append(c_id)
            topic_content[topic_id] = content_ids   
        return topic_content , all_contents
    
    # Content related features
    def getContentsFeatures(self):
        self.topic_content , self.all_contents = self.create()
        content_context_df = pd.DataFrame(data=np.random.binomial(1 , self.prob_content_context, 
                             size=(sum(self.no_contents_per_topic),len(self.content_context))) , 
                             columns = self.content_context , index=self.all_contents)
        return content_context_df
    
    def getTopicContent(self):
        return self.topic_content


# Contexts

In [10]:
'''
Context data for learning
'''
class Context:
        
    def getStudentContext(self):
        return self.studentContext
    
    def setStudentContext(self , studentFeatures):
        self.studentContext = studentFeatures
    
    def getContentContext(self):
        return self.contentContext
   
    def setContentContext(self , courseContent):
        self.contentContext = courseContent
        
    def prepareContext(self,studentContext,contentContext):
        context = pd.DataFrame() 
        for content in list(contentContext.index):
            c = pd.Series()
            c = c.append([studentContext,contentContext.loc[content]]) # Combine student & content. 
            c['Content_id'] = content
            context = context.append(c, ignore_index=True)
        context = context.set_index('Content_id')
        return context

# Omniscient Policy / Oracle

In [18]:
class Oracle : 
    
    '''
    arms: Content ids
    contexts: Featurs
    '''
    def __init__(self):
        self.rounds = 0 # Number of round played
        self.rounds_data = pd.DataFrame() # Rounds data required for Skip Algorithm
        
    def setParameters(self, features , arms):
        parameters = np.random.uniform(size=(len(arms) , len(features)))
        # Normalize parameters
        for i in range(parameters.shape[0]): # Have it in a list comprehension.
            parameters[i] = parameters[i] / np.sum(parameters[i])
        self.theta_df = pd.DataFrame(data = parameters ,  index = arms , columns = features , dtype= np.float)
    
    def expectedPayoff(self,student,topic_contents,context_obj):
        contexts = context_obj.prepareContext(student,topic_contents)
        arms = list(topic_contents.index)
        while arms:
            arms_payoff = list()
            for arm in arms:
                arm_theta = self.theta_df.loc[arm]
                X = contexts.loc[arm]
#                 print('X.type {0} , X.shape {1} , X = {2} '.format(type(X),X.shape,X))
#                 print('arm_theta.type {0} , arm_theta.shape {1} , arm_theta = {2}  : '.format(type(arm_theta),arm_theta.shape,arm_theta))
                pta = pd.Series.dot(X,arm_theta) # Vector dim : (1 * d) (d * 1).
                arms_payoff.append(pta)
            expected_payoff = np.max(arms_payoff)
            reward = bernoulli.rvs(size=1,p=expected_payoff)[0] # Simulate student's response
            self.rounds += 1
            if reward != 1:
                arm_index = np.argmax(arms_payoff)
                arm_pulled = arms[arm_index]
                arms.remove(arm_pulled)
            else:
                break
        
        
    
#     def getBestArm(self,X,arms):
        
#         while arms:
#             arms_payoff = list()
#             for arm in arms:
#                 arm_theta = self.theta_df.loc[arm_id]
#                 pta = pd.Series.dot(X,arm_theta) # Vector dim : (1 * d) (d * 1).
#                 arms_payoff.append(pta)
#             expected_payoff = np.max(arms_payoff)
#             arm_index = np.argmax(arms_payoff)
#             arm_pulled = arms[arm_index]
        
    '''
    X: Context information. 
    arm_id: Id of the arm pulled. 
    '''
    #def getReward(self,X,arm_id):
    # For each of the arms, find the one which maximizes reward. 
    # Send it to Bernoulli, to find reward
    # 
#     def getReward(self,X,arm_id):
#         arm_theta = self.theta_df.loc[arm_id] #Get parameters for the arm predicted by the learning algo
#         print('X.type {0} , X.shape {1} , X = {2} '.format(type(X),X.shape,X))
#         print('arm_theta.type {0} , arm_theta.shape {1} , arm_theta = {2}  : '.format(type(arm_theta),arm_theta.shape,arm_theta))
#         expected_reward = pd.Series.dot(X,arm_theta) # Vector dim : (1 * d) (d * 1).
#         print('expected_reward : ', expected_reward)
#         reward = bernoulli.rvs(size=1,p=expected_reward)[0] # Simulate student's response
#         print('Actual Reward : ', reward)
#         return reward

# Simulator

In [23]:
class Simulator:
    
    def __init__(self):
        self.dataGenerator = DataGenerator()
        self.dataGenerator.createStudentData()
        self.dataGenerator.createContentData()
        self.students = Students()
        self.students.setStudentsFeatures(self.dataGenerator.getStudentData())
        self.contents = Content()
        self.contents.setContentData(self.dataGenerator.getContentData())
        self.contents.setTopics(self.dataGenerator.getTopicData())
        self.contexts = Context()
        self.contexts.setStudentContext(self.students.getStudentsFeatures())
        self.contexts.setContentContext(self.contents.getContentData())
        self.oracle = Oracle()
#         self.linUCB = LinUCB()
    
            
    def main(self):
        studentContext = self.contexts.getStudentContext() # Student dataframe
        contentContext = self.contexts.getContentContext() # Content Dataframe
        topics = self.contents.getTopics() # Topics Data, which includes topics to content mapping.
        features = list(studentContext.columns) + list(contentContext.columns)
        self.oracle.setParameters(features , contentContext.index)  
        for index , student in studentContext.iterrows():
            for i in topics:
                content = topics[i] # You now have all arm associated with the topic 't'
#                 X = pd.DataFrame()
                topic_contents = contentContext.loc[content]
                self.oracle.expectedPayoff(student,topic_contents,self.contexts)
#                 chooses_arm , pta = getBestArm()
#                 self.nature.getRewards()
                #self.linUCB.learn(student , topic_contents , t, self.nature)
        print('Rounds Data: ')
#         print(self.linUCB.getRoundsData())
        print('Total Number of rounds : ', self.oracle.rounds)
        
simulator = Simulator()
simulator.main()

Rounds Data: 
Total Number of rounds :  12
