In [1]:
#This is the learning algoritm which would be used. It has skipping, multi-htreading enabled. 

In [18]:
import numpy as np
import pandas as pd
from scipy.stats import bernoulli
from sklearn import linear_model
import threading
# from sklearn.preprocessing import LabelEncoder

# Dataset

In [10]:
'''
This class holds student data. Its made to have all attributes of the students. StudentContext is meant to takes a subset 
of attributes from this class
'''
class Students:
    '''
    student_data created during data generation
    '''
    def setStudentsFeatures(self , student_data):
        self.studentsFeatures = student_data
    
    def getStudentsFeatures(self):
        return self.studentsFeatures
    
'''
This class holds content data. Its made to have all attributes of contents & topics. ContentContext takes a subset of 
attributes of content & topics
'''
class Content:
        
    def getContentData(self): # Rename to content data
        return self.contentsFeatures
    '''
    courseContent created during data generation
    '''
    def setContentData(self,courseContent):
        self.contentsFeatures = courseContent
        
    def getTopics(self):
        return self.topics
    '''
    topics created during data generation
    '''   
    def setTopics(self,topics):
        self.topics = topics
        
    def getTopicContent(self):
        return self.topicsContent
        
    def setTopicContent(self,topicsContent):
        self.topicsContent = topicsContent

'''
Class that enscapulates student & content data generators. Its uses the StudentDataGen & ContentDataGen to create data. 
'''
class DataGenerator:
    
    def __init__(self):
        self.studentDataGen = StudentDataGen()
        self.contentDataGenerator = ContentDataGen()
        
    def createStudentData(self):
        self.studentData =  self.studentDataGen.create()

    def getStudentData(self):
        return self.studentData
    
    def createContentData(self):
        self.contentsFeatures = self.contentDataGenerator.getContentsFeatures() 
        self.topicContent = self.contentDataGenerator.getTopicContent()
        self.topics = self.contentDataGenerator.getTopics()
        
    def getContentData(self):
        return self.contentsFeatures
    
    def getTopicContent(self):
        return self.topicContent
    
    def getTopics(self):
        return self.topics

'''
This is the student data generator
'''
class StudentDataGen:
    def __init__(self):
        self.number_of_students = 5 # Students taking the course. 
        self.student_context = ['video','audio','reading','kinesthetic'] # Student preferences
        # TO-DO : Have student preferences & probability of having those preferences as a tuple. 
    
    def create(self):
        ## Create Student Context Data
        student_context_df = pd.DataFrame(data=np.random.binomial(1 , [0.7,0.6,0.5,0.4] , 
                            size=(self.number_of_students,len(self.student_context))) , columns = self.student_context)
        return student_context_df
    
'''
This is the content data generator
'''
class ContentDataGen:
    
    def __init__(self):
        self.number_of_topics = 5 # Number of topics in the course
        self.content_context = ['A','B','C','D','E','F'] # Content features. Add meaningful features.
        self.prob_content_context = [0.8,0.7,0.6,0.5,0.4,0.3]
        self.no_contents_per_topic = np.random.randint(2,5,self.number_of_topics) # Variable number of contents per topic.
    
    def create(self):
        all_contents = list()
        all_topics = list()
        topic_content = {}
        for i,j in enumerate(self.no_contents_per_topic):
            topic_id = "T_" + str(i+1) # e.g : T_10
            content_ids = [] # Temporary variable to help map topic to content. 
            for j_1 in range(1,j+1) : # Number of contents
                c_id = 'C_' + str(i+1) + '_' + str(j_1) # e.g : C_10_2 : Content number 2 for topics 10
                content_ids.append(c_id)
                all_contents.append(c_id)
            topic_content[topic_id] = content_ids 
            all_topics.append(topic_id)
        return topic_content , all_topics , all_contents
    
    # Content related features
    def getContentsFeatures(self):
        self.topic_content , self.all_topics , self.all_contents = self.create()
        content_context_df = pd.DataFrame(data=np.random.binomial(1 , self.prob_content_context, 
                             size=(sum(self.no_contents_per_topic),len(self.content_context))) , 
                             columns = self.content_context , index=self.all_contents)
        return content_context_df
    
    def getTopicContent(self):
        return self.topic_content
    
    def getTopics(self):
        return self.all_topics

# Contexts

In [11]:
'''
Context data for learning
'''
class Context:
        
    def getStudentContext(self):
        return self.studentContext
    
    def setStudentContext(self , studentFeatures):
        self.studentContext = studentFeatures
    
    def getContentContext(self):
        return self.contentContext
   
    def setContentContext(self , courseContent):
        self.contentContext = courseContent
        
    def setTopicContext(self,topics):
        self.topicContext=topics
        
    def getTopicContext(self):
        return self.topicContext
        
    def prepareContext(self,studentContext,contentContext):
        context = pd.DataFrame() 
        for content in list(contentContext.index):
            c = pd.Series()
            c = c.append([studentContext,contentContext.loc[content]]) # Combine student & content. 
            c['Content_id'] = content
            context = context.append(c, ignore_index=True)
        context = context.set_index('Content_id')
        return context

# Skip Classifier

In [12]:
# Online Stochastic Gradient Descent. This classifier decides whether or not to skip to the next topic. 
# TO-DO : Change loss functions (Log,Hinge,Others) to find if they impact performance. Try different values of parameters 
# For instance SGD has a parameter alpha, SVM has a parameter C. To optimize, you can train a mini-batch of samples, 
# rather than one data point at a time. Try different values of learning_rate . Look at the class_weight parameter if you 
# want to give more weight to samples of one class over the other. Need to understand about warm_start parameter
# We need to record predictions made by the classifier to evaluate its performance over rounds 
from sklearn.exceptions import NotFittedError
class SkipClassifier:
    
    def __init__(self):
        self.clf = linear_model.SGDClassifier()
        
    def check_fitted(self,clf): 
        return hasattr(clf, "classes_")
    
    def train(self,student,pta,next_topic_pta,label):
        X = pd.Series()
        X = X.append([student,pd.Series([pta,next_topic_pta],index=['pta','next_topic_pta'])])
        X = np.array([X.values])
#         Y = pd.Series().append([label])
        Y = np.array([label])
        clf = self.clf.partial_fit(X,Y,classes=np.array([0,1]))
        
        
    def predict(self,student,pta,next_topic_pta):
        # next_topic_encoded : We're not using it, as it'll influence the decision incorrectly, based on label assigned to it. 
        # For e.g : Labels with high encoded values would have more influence on the decision, than labels which have lower 
        # encoded values. We should instead using one hot encoding for labels. 
        X = pd.Series()
        X = X.append([student,pd.Series([pta,next_topic_pta],index=['pta','next_topic_pta'])])
#         try:
        if self.check_fitted(self.clf):
            Y = self.clf.predict([X.values])[0]
            confidence_score = self.clf.decision_function([X.values])[0]
        else:
            Y = 0
            confidence_score = 0
#         except NotFitterError as e:
#             print(repr(e))
            
        #print('Skip Prediction : {0} & Confidence Score : {1}'.format(Y,confidence_score))
        # Get confidence score 
        return Y , confidence_score

# Skip Topic

In [13]:
class SkipTopic:
    
    def __init__(self):
        self.skipClassifier = SkipClassifier()
        self.confidence_threshold = 60 # It the confidence score returned by the classifier is greater than this, then we trust in the decision made by the classifier. 
        self.skipTopic_lock = threading.Lock()
        #le = LabelEncoder().fit(all_topics)  
                
    #def skipTopic(self, student_number , student, expected_payoff, topic_number, topicContext, all_topic_contents, context_obj):
    def skipTopic(self,student,pta,topic_number,context_obj,topic_content,linUCB):
#         with self.skipTopic_lock:
        contentContext = context_obj.getContentContext() # Get the content dataframe.
#         print("contentContext : " , contentContext )
        topicContext = context_obj.getTopicContext() # Get the topic list. 
        current_topic_index = topicContext.index(topic_number) # Get the index number of the current topic
        next_topic_index = current_topic_index + 1
        next_topic = '' # Initialized to make it accessible outside the if statement. 
        if next_topic_index < len(topicContext): # Check to see if we're going out of bounds
            next_topic = topicContext[next_topic_index]
            next_topic_contents = topic_content[next_topic]
            t_c = contentContext.loc[next_topic_contents]
            X = context_obj.prepareContext(student,t_c)
            arm_pulled , next_topic_pta = linUCB.expectedPayoff(X,next_topic_contents)
        else:
            # Will be going out of bounds. Current topic is the last topic. No more topics to complete. 
            next_topic_pta = 0
            #return # There is no topic to skip to. OR is might be better to exit the system. 
        skip_decision , confidence_score = self.skipClassifier.predict(student,pta,next_topic_pta)
        if skip_decision and confidence_score < self.confidence_threshold:
    #             with self.skipTopic_lock:
            print('Actual decision made by classifier : ' , skip_decision)
            print('Confidence score returned is {0}, which is less than threshold {1}'.format(confidence_score , self.confidence_threshold))
            skip_decision = 0
        #skip_decision = self.skipClassifier.predict(student,le.transform(next_topic),pta,next_topic_pta)
        return skip_decision,next_topic_pta
        
        
#         self.oracle.expectedPayoff(student_number,student,next_topic,topicContext,all_topic_contents,t_c,self.contexts)
    def setLabel(self,skip_decision,actual_payoff):
        if skip_decision == 0 and actual_payoff == 0:
            label = 1
        if skip_decision == 0 and actual_payoff == 1:
            label = 0
        return label
    
    def train(self,student,pta,pta_next_topic,label):
        self.skipClassifier.train(student,pta,pta_next_topic,label)

# Nature / Environment / Universe

In [14]:
class Oracle : 
    
    '''
    arms: Content ids
    contexts: Featurs
    '''

    def setParameters(self, contexts , arms):
        parameters = np.random.uniform(size=(len(arms) , len(contexts)))
        # Normalize parameters
        for i in range(parameters.shape[0]): # Have it in a list comprehension.
            parameters[i] = parameters[i] / np.sum(parameters[i])
        self.theta_df = pd.DataFrame(data = parameters ,  index = arms , columns = contexts , dtype= np.float)
    
#     '''
#     X: Context information. 
#     arm_id: Id of the arm pulled. 
#     '''
#     def getReward(self,X,arm_id):
#         arm_theta = self.theta_df.loc[arm_id] #Get parameters for the arm predicted by the learning algo
#         print('X.type {0} , X.shape {1} , X = {2} '.format(type(X),X.shape,X))
#         print('arm_theta.type {0} , arm_theta.shape {1} , arm_theta = {2}  : '.format(type(arm_theta),arm_theta.shape,arm_theta))
#         expected_reward = pd.Series.dot(X,arm_theta) # Vector dim : (1 * d) (d * 1).
#         print('expected_reward : ', expected_reward)
#         reward = bernoulli.rvs(size=1,p=expected_reward)[0] # Simulate student's response
#         print('Actual Reward : ', reward)
#         return reward

# Learning Algorithm

In [20]:
class LinUCB:
    def __init__(self,alpha=0.5):
        self.alpha = alpha # Hyper parameter required for LinUCB to adjust confidence bounds.
        self.arm_params = {} # Maps content to arm object
#         self.rounds = 0 # Number of round played
#         self.rounds_data = pd.DataFrame() # Rounds data required for Skip Algorithm
        self.LinUCB_lock = threading.Lock()
#         self.skipTopic = SkipTopic()
        
#     def setParameters(self, features , arms):
#         parameters = np.random.uniform(size=(len(arms) , len(features)))
#         # Normalize parameters
#         for i in range(parameters.shape[0]): # Have it in a list comprehension.
#             parameters[i] = parameters[i] / np.sum(parameters[i])
#         self.theta_df = pd.DataFrame(data = parameters ,  index = arms , columns = features , dtype= np.float)
    '''
    Method called by Simulator. Encapsulates finding the best arm, making a prediction, getting rewards & 
    updating parameters. 
    studentContext: Context information of student 
    contentContext : Context information of content/arms that can be pulled for this topic. 
    topic: Topic on which predictions are being made. Data needs for skip algorithm. 
    nature: One who knows it all. Get actual/real reward for the pulled arm. 
    '''
#     def learn(self, studentContext , contentContext , topic , nature):
#         context = self.prepareContext(studentContext,contentContext)
#         arms = list(context.index)
#         skip_algo = False # Skip algorithm is inactive initially. Activated when predictions give no reward. 
        
#         # LinUCB has started 
        
#         while True and arms: # Try to find the best arm, till there are no arms to pull. If there are no arms, then move to the next topic. 
#             ## If skip algorithm is active, then code for skip algorithm comes here. It here where we'll decide whether to skip to the next topic or predict another content for the same topic. 
#             arms_payoff = list() # Check if the payoff values change for arms that have not been pulled. It doesn't change. 
#             for arm in arms: # arms is a list of all arms available w.r.t content
#                 X = context.loc[arm] # Give student & content context for an arm 
#                 if arm not in self.arm_params: # If new content is added, then parameters would be created for it. 
#                     self.arm_params[arm] = Arm(len(X.index)) # Arm class below, has arm specific parameters 
#                 arm_obj = self.arm_params[arm]
#                 theta = self.getTheta(arm_obj) # Arm parameter. 
#                 pta = self.getPta(X , arm_obj) # pta : pay-off/reward at round 't' for arm 'a'. 
#                 arms_payoff.append(pta)
#             expected_payoff = np.max(arms_payoff) # To be used a input data for skip algorithm
#             ### Get prediction whether to skip to next topic ? If it return no, then don't skip, else do skip
            
#             print('arms_payoff : ', arms_payoff) # Expected pay-off of all arms. 
#             arm_index = np.argmax(arms_payoff) # Find the index of the arm which max pay-off
#             print('Index of arm with max payoff : ', arm_index)
#             arm_pulled = arms[arm_index] # Give me the arm with max pay-off
#             print('Arm pulled : ', arm_pulled)
#             real_payoff = nature.getReward(context.loc[arm_pulled],arm_pulled) # Get me the reward for arm_pulled
#             self.rounds += 1 # Increment number of rounds by 1
#             pulled_arm_obj = self.arm_params[arm_pulled] # Get me the arm object for the pulled arm
#             pulled_arm_obj.updateParams(context.loc[arm_pulled],real_payoff) # Update parameters of the pulled arm. 
            
#             # LinUCB has ended
            
#             arms_payoff.remove(expected_payoff) # Remove pay-off of pulled arm
#             if real_payoff == 0: 
#                 skip_algo = True # Activate skip algorithm, as there is potential to skip to next topic
#                 arms.remove(arm_pulled) # Remove that arm from the list
#             if skip_algo:
#                 if arms_payoff:
#                     potential_payoff = np.max(arms_payoff) # Gives the 2nd highest expected pay-off from remaining arms. 
#                 else:
#                     potential_payoff = 0
#                 self.record_rounds_data(studentContext, topic , expected_payoff,  potential_payoff, real_payoff) # Record round details for skip algorithm
#             if real_payoff == 1: # Move to the next topic
#                 break
                
    def expectedPayoff(self,contexts,arms):
            arms_payoff = list()
            for arm in arms:
                X = contexts.loc[arm] # Give student & content context for an arm 
                if arm not in self.arm_params: # If new content is added, then parameters would be created for it. 
                    self.arm_params[arm] = Arm(len(X.index)) # Arm class below, has arm specific parameters
                arm_obj = self.arm_params[arm]
                theta = self.getTheta(arm_obj) # Arm parameter. 
                pta = self.getPta(X , arm_obj) # pta : pay-off/reward at round 't' for arm 'a'. 
                arms_payoff.append(pta)
            expected_payoff = np.max(arms_payoff) # To be used a input data for skip algorithm
            print('arms_payoff : ', arms_payoff) # Expected pay-off of all arms. 
            arm_index = np.argmax(arms_payoff) # Find the index of the arm which max pay-off
            print('Index of arm with max payoff : ', arm_index)
            arm_pulled = arms[arm_index] # Give me the arm with max pay-off
            print('Arm pulled : ', arm_pulled)    
            return arm_pulled,expected_payoff    
        
    '''
       arm: Arm object
    '''
    def getTheta(self,arm): # Theta is used to compute the mean reward for an arm 
        arm.theta = np.dot(arm.Ainv , arm.b) # A vector
        return arm.theta
    
    def getMean(self, context , arm):
        mean = np.dot(arm.theta.T , context)
#         print('mean : {0} type : {1}'.format(mean,type(mean)))
        return mean
        
    def getUCB(self , context ,arm):
        ucb = np.sqrt(np.dot(np.dot(context.T , arm.Ainv) , context))
        return ucb
    
    def getPta(self, context , arm):
        payoff = self.getMean(context,arm) + self.alpha * self.getUCB(context , arm)
        return payoff
    
    def updateParams(self, arm , context, reward):
        arm_obj = self.arm_params[arm]
        arm_obj.A += np.outer(context,context.T)
        arm_obj.b += reward * context
        arm_obj.Ainv = np.linalg.inv(arm_obj.A)               
    
            # Skip Decision happens up here
            
            
#             skip-decision , next_round_expected_payoff = self.skipTopic.skipTopic(student_number,student,expected_payoff,topic_number,topicContext,all_topic_contents,context_obj)
#             # Skip Decision Taken, before we spin a bernoulli. 
#             reward = bernoulli.rvs(size=1,p=expected_payoff)[0] # Simulate student's response
#             with self.write_lock:
#                 print('That round was completed by : ', student_number)
#                 self.rounds += 1
#             if reward != 1:
#                 arm_index = np.argmax(arms_payoff)
#                 arm_pulled = arms[arm_index]
#                 arms.remove(arm_pulled)
#             else:
#                 break
            
        
        
    
#     def getBestArm(self,X,arms):
        
#         while arms:
#             arms_payoff = list()
#             for arm in arms:
#                 arm_theta = self.theta_df.loc[arm_id]
#                 pta = pd.Series.dot(X,arm_theta) # Vector dim : (1 * d) (d * 1).
#                 arms_payoff.append(pta)
#             expected_payoff = np.max(arms_payoff)
#             arm_index = np.argmax(arms_payoff)
#             arm_pulled = arms[arm_index]
        
    '''
    X: Context information. 
    arm_id: Id of the arm pulled. 
    '''
    #def getReward(self,X,arm_id):
    # For each of the arms, find the one which maximizes reward. 
    # Send it to Bernoulli, to find reward
    # 
#     def getReward(self,X,arm_id):
#         arm_theta = self.theta_df.loc[arm_id] #Get parameters for the arm predicted by the learning algo
#         print('X.type {0} , X.shape {1} , X = {2} '.format(type(X),X.shape,X))
#         print('arm_theta.type {0} , arm_theta.shape {1} , arm_theta = {2}  : '.format(type(arm_theta),arm_theta.shape,arm_theta))
#         expected_reward = pd.Series.dot(X,arm_theta) # Vector dim : (1 * d) (d * 1).
#         print('expected_reward : ', expected_reward)
#         reward = bernoulli.rvs(size=1,p=expected_reward)[0] # Simulate student's response
#         print('Actual Reward : ', reward)
#         return reward


    
    '''
    studentContext: students contextual data
    expected_reward: expected reward of arm played
    sum_of_arm_rewards: rewards for remaining arms, that have not been played
    reward: actual reward received
    '''
#     def record_rounds_data(self, studentContext, topic, expected_reward, potential_payoff, real_payoff):
#         r_data = {'topic': topic , 'expected_reward': expected_reward , 'potential_reward': potential_payoff , 'reward': real_payoff}
#         series = pd.Series()
#         series = series.append([studentContext,pd.Series(r_data)])
#         print('series in record_rounds_data: ' , series)
#         self.rounds_data = self.rounds_data.append(series,ignore_index=True)
    
#     def getRoundsData(self):
#         return self.rounds_data

class Arm:
    def __init__(self,dimensions):
        self.A = np.identity(dimensions)
        self.b = np.zeros(dimensions)
        self.Ainv = np.linalg.inv(self.A)
        self.theta = np.dot(self.Ainv , self.b)
    
#     def updateParams(self, context, reward):
#         self.A += np.outer(context,context.T)
#         self.b += reward * context
#         self.Ainv = np.linalg.inv(self.A)   

# Simulator

In [22]:
class Simulator:
    
    def __init__(self):
        self.dataGenerator = DataGenerator()
        self.dataGenerator.createStudentData()
        self.dataGenerator.createContentData()
        self.students = Students()
        self.students.setStudentsFeatures(self.dataGenerator.getStudentData())
        self.contents = Content()
        self.contents.setContentData(self.dataGenerator.getContentData())
        self.contents.setTopics(self.dataGenerator.getTopics())
        self.contents.setTopicContent(self.dataGenerator.getTopicContent())
        self.contexts = Context()
        self.contexts.setStudentContext(self.students.getStudentsFeatures())
        self.contexts.setContentContext(self.contents.getContentData())
        self.contexts.setTopicContext(self.contents.getTopics())
        self.oracle = Oracle()
        self.linUCB = LinUCB()
        self.skipTopic = SkipTopic()
        self.simulator_lock = threading.Lock()
        self.rounds=0

    def getPayoff(self,X,arm,pta):
        arm_theta = self.oracle.theta_df.loc[arm] #Get parameters for the arm predicted by the learning algo
        expected_reward = pd.Series.dot(X,arm_theta) # Vector dim : (1 * d) (d * 1).
        reward = bernoulli.rvs(size=1,p=expected_reward)[0] # Simulate student's response
        with self.simulator_lock:
#             print('That round was completed by : ', student_number)
#             self.linUCB.rounds += 1
            print('Actual Reward : ', reward)        
        return reward
    
    def takeCourse(self,student_number,student,contentContext,topicContext,topic_content):
        for i in topicContext:
            contents = topic_content[i] # You now have all arm associated with the topic 't'
            t_c = contentContext.loc[contents]
            #print('takeCourse : Student Number taking course : ', student_number)
            contexts = self.contexts.prepareContext(student,t_c)
            arms = list(t_c.index)
            while arms:
                #self.oracle.expectedPayoff(student_number,student,i,topicContext,topic_content,t_c,self.contexts)
#                 with self.simulator_lock:
                arm , pta = self.linUCB.expectedPayoff(contexts,arms)
#                 arm , pta = self.oracle.expectedPayoff(contexts,arms)
                        # Skip-topic
                skip_decision , pta_next_topic = self.skipTopic.skipTopic(student,pta,i,self.contexts,topic_content,self.linUCB)
                        # Reward
                if skip_decision:
                    break # Decision is to skip. Hence, we won't pull the arm. 
                else:
                    actual_payoff = self.getPayoff(contexts.loc[arm],arm,pta)
                    with self.simulator_lock:
                        self.rounds+=1
                    self.linUCB.updateParams(arm,contexts.loc[arm], actual_payoff)
                    print('Completed topic {0} for student {1}'.format(i,student_number))
                    label = self.skipTopic.setLabel(skip_decision,actual_payoff) # Set Label
                    self.skipTopic.train(student,pta,pta_next_topic,label)
                # Train
                if actual_payoff != 1:
                    arms.remove(arm)
                else:
                    break # Move to the next topic 

    def main(self):
        studentContext = self.contexts.getStudentContext() # Student dataframe
        contentContext = self.contexts.getContentContext() # Content Dataframe
        topicContext = self.contexts.getTopicContext() # List of topics. 
        topic_content = self.contents.getTopicContent() # Topics Data, which includes topics to content mapping.
        features = list(studentContext.columns) + list(contentContext.columns)
        self.oracle.setParameters(features , contentContext.index)  
        for student_number , student in studentContext.iterrows():
            #self.takeCourse(student_number,student,contentContext,topicContext,topic_content)
            t = threading.Thread(target=self.takeCourse, args=(student_number,student,contentContext,topicContext,topic_content))
            t.daemon = True # classifying as a daemon, so they will die when the main dies
            t.start() # begins, must come after daemon definition
        t.join()
        print('Total Number of rounds : ', self.rounds)  
#             for i in topics:
#                 content = topics[i] # You now have all arm associated with the topic 't'
# #                 X = pd.DataFrame()
#                 topic_contents = contentContext.loc[content]
#                 self.oracle.expectedPayoff(student,topic_contents,self.contexts)
#         print('Rounds Data: ')
# #         print(self.linUCB.getRoundsData())
#         print('Total Number of rounds : ', self.oracle.rounds)

simulator = Simulator()
simulator.main()
# print('Rounds Data:')
# #         print(self.linUCB.getRoundsData())
# print('Total Number of rounds : ', self.rounds)  

arms_payoff :  [1.3228756555322954, 1.224744871391589]
Index of arm with max payoff :  0
Arm pulled :  C_1_1
arms_payoff :  [1.4142135623730951, 1.3228756555322954]
Index of arm with max payoff :  0
Arm pulled :  C_1_1
arms_payoff :  [1.3228756555322954, 1.224744871391589]
Index of arm with max payoff :  0
Arm pulled :  C_1_1
arms_payoff : arms_payoff :  [1.118033988749895, 1.118033988749895]
Index of arm with max payoff :  0
Arm pulled :  [1.224744871391589, 1.224744871391589]
Index of arm with max payoff :  0
Arm pulled :  C_2_1
 C_2_1
arms_payoff :  [1.3228756555322954, 1.224744871391589]
Index of arm with max payoff :  0
Arm pulled :  C_1_1
Actual Reward :  1
Actual Reward :  1
arms_payoff :  [1.3228756555322954, 1.224744871391589]
Index of arm with max payoff :  0
Arm pulled :  C_1_1
Completed topic T_1 for student 0
Completed topic T_1 for student 1
arms_payoff :  [1.118033988749895, 1.118033988749895]
Index of arm with max payoff :  0
Arm pulled :  C_2_1
arms_payoff :  [1.118033

arms_payoff :  [1.4626060145123057, 0.8660254037844386, 0.7791937224739796]
Index of arm with max payoff :  0
Arm pulled :  C_5_1
Actual decision made by classifier :  1
Confidence score returned is 10.960942360296762, which is less than threshold 60
Actual Reward :  0
Completed topic T_4 for student 2
arms_payoff :  [1.365028367816617, 0.8660254037844386, 0.7791937224739796]
Index of arm with max payoff :  0
Arm pulled :  C_5_1
Actual Reward :  1
Completed topic T_4 for student 4
arms_payoff : arms_payoff :  [1.3650283678166168, 0.8660254037844386, 0.7791937224739796]
Index of arm with max payoff :  0
Arm pulled :  C_5_1
Actual Reward :  0
Completed topic T_5 for student 0
 [1.4626060145123057, 0.8660254037844386, 0.7791937224739796]
Index of arm with max payoff :  0
Arm pulled :  C_5_1
Actual decision made by classifier :  1
Confidence score returned is 39.177311782859, which is less than threshold 60
arms_payoff :  [0.8660254037844386, 0.7791937224739796]
Index of arm with max payof