In [69]:
#This is the learning algoritm which would be used. It has skipping, multi-threading enabled. 

In [27]:
import numpy as np
import pandas as pd
from scipy.stats import bernoulli
from sklearn import linear_model
import threading

# Contexts

In [28]:
import os,pickle

file_path = os.path.join(os.path.curdir, '..' , 'dataset' , 'very_small')
# file_path = os.path.join(os.path.curdir, '..' , 'dataset' , 'small')
# file_path = os.path.join(os.path.curdir, '..' , 'dataset' , 'medium')
# file_path = os.path.join(os.path.curdir, '..' , 'dataset' , 'large')
# file_path = os.path.join(os.path.curdir, '..' , 'dataset' , 'very_large')
'''
Context data for learning
'''
class Context:
    """
    Contextual information required by contextual bandit algorithms to make better predictions. It enscapsulates all data
    required about the student , topics & content required to design the learning algorithm. 
    """
    
    def getStudentContext(self):
        """
        Student Preferences: 
        Visual (S_V) , Text (S_T) , Demo-based (S_D) , Practical (S_P), Step-by-step (S_S) ,Activity / Task based (S_AT), 
        Lecture (S_L) , Audio (S_A) , Self-evaluation (S_SE) , Pre-assessment (S_PA)
        Students preference to learning via various ways are evaluated on a scale from 0 to 1, rather being binary. 
        """
        return self.studentContext
    
    def setStudentContext(self):
        """
        
        """
        with open(os.path.join(file_path , 'student.pickle'), 'rb') as student_file:
            self.studentContext= pickle.load(student_file)
    
    
    def getContentContext(self):
        """
        Content Features 
        Ease of understanding (C_E) , Simple / Intuitive (C_I) , Surface / In-depth (C_ID) , Brief / Concise (C_C), 
        Thorough (C_T), Preference / Well reviewed / Well rated (C_R) , Theoritical / Abstract (C_A), 
        Practical / Hands on (C_P), Experimental / Task-based (C_ETB)
        Content preference to learning via various ways are evaluated on a scale from 0 to 1, rather being binary. 
        """
        return self.contentContext
   
    def setContentContext(self):
        """
        
        """
        with open(os.path.join(file_path ,'content.pickle'), 'rb') as content_file:
            self.contentContext= pickle.load(content_file)
        
    def getTopic(self):
        """
        Gives the topics part of the course.
        """
        return self.topic
    
    def setTopic(self):
        """
        Loads & sets the topics part of the course
        """
        with open(os.path.join(file_path ,'topic.pickle'), 'rb') as topic_file:
            self.topic = pickle.load(topic_file)
    
    def getTopicContent(self):
        """
         Gets the topic content. topic_content is a map of topics to content. So for every topic, it gives the content 
         available for the topic. In education parlance, for any given topic, it shows the different ways of teaching this
         topic (via contents)
        """
        return self.topic_content
    
    def setTopicContent(self):
        """
        Sets the topic_content variable to the one in the serialized object. topic_content is a map of topics to content. So
        for every topic, it gives the content available for the topic. In education parlance, for any given topic, it shows
        the different ways of teaching this topic (via contents)
        """
        with open(os.path.join(file_path ,'topic_content.pickle'), 'rb') as topic_content_file:
            self.topic_content= pickle.load(topic_content_file)
                
    def prepareContext(self,studentContext,contentContext):
        """
           Given the student & content context available for a round, this method combines them to form a single contextual
           variable
           
           Inputs : 
           
           studentContext: Student contextual information.
           contentContext: Contents contextual information. 
           
           Returns :
           
           context : A combined output of student & content context.
        """
        context = pd.DataFrame() 
        for content in list(contentContext.index):
            c = pd.Series()
            c = c.append([studentContext,contentContext.loc[content]]) # Combine student & content. 
            c['Content_id'] = content
            context = context.append(c, ignore_index=True)
        context = context.set_index('Content_id')
        return context
    
    
    def loadData(self):
        """
        Method used to test data retrieval. Data generator handles the data generation. This method checks we can retrieve
        data. This is a dummy method used to test data retrieval. Its not invoked in the main program.
        """
        self.setStudentContext()
        self.setContentContext()
        self.setTopic()
        self.setTopicContent()
        print(self.getStudentContext())
        print(type(self.getStudentContext()))
        print('*********************************')
        print(self.getContentContext())
        print(type(self.getContentContext()))
        print('*********************************')
        print(self.getTopic())
        print(type(self.getTopic()))
        print('*********************************')
        print(self.getTopicContent())
        print(type(self.getTopicContent()))
                
c_test = Context()
c_test.loadData()

     S_V   S_T   S_D   S_P   S_S  S_AT   S_L   S_A  S_SE  S_PA
0   0.60  0.65  0.77  0.06  0.97  0.10  0.14  0.75  0.53  0.92
1   0.89  0.94  0.76  0.67  0.33  0.43  0.57  0.22  0.21  0.08
2   0.66  0.14  0.06  0.60  0.79  0.37  0.29  0.62  0.95  0.48
3   0.86  0.39  0.14  0.65  0.79  0.44  0.57  0.19  0.76  0.59
4   0.19  0.65  0.84  0.07  0.87  0.67  0.42  0.51  0.79  0.03
5   0.48  0.63  0.13  0.23  0.31  0.85  0.92  0.36  0.17  0.09
6   0.00  0.44  0.16  0.43  0.30  0.06  0.12  0.13  0.54  0.41
7   0.58  0.03  0.05  0.58  0.18  0.41  0.54  0.96  0.15  0.93
8   0.22  0.20  0.80  0.92  0.20  0.48  0.05  0.72  0.67  0.47
9   0.17  0.20  0.33  0.58  0.38  0.50  0.55  0.33  0.38  0.22
10  0.96  0.23  0.19  0.29  0.20  0.78  0.73  0.57  0.56  0.41
11  0.58  0.32  0.99  0.08  0.94  0.09  0.13  0.55  0.49  0.83
12  0.38  0.52  0.67  0.97  0.67  0.26  0.66  0.12  0.73  0.57
13  0.40  0.30  0.63  0.19  0.19  0.23  0.48  0.53  0.96  0.12
14  0.88  0.26  0.53  0.46  0.69  0.17  0.94  0.47  0.3

# Skip Classifier

In [29]:
# Online Stochastic Gradient Descent. This classifier decides whether or not to skip to the next topic. 
# TO-DO : Change loss functions (Log,Hinge,Others) to find if they impact performance. Try different values of parameters 
# For instance SGD has a parameter alpha, SVM has a parameter C. To optimize, you can train a mini-batch of samples, 
# rather than one data point at a time. Try different values of learning_rate . Look at the class_weight parameter if you 
# want to give more weight to samples of one class over the other. Need to understand about warm_start parameter
# We need to record predictions made by the classifier to evaluate its performance over rounds 

class SkipClassifier:
    """
    A classifier which gives prediction, whether or not to move to the next topic. This is important, because we want 
    students to learn content which the algorithm is confident would help the student learn. The skip classifier is trained
    online, hence we use a confidence threshold, to be conservative & minimize skipping topics. Skipping is not preferred, 
    but if the classifier is confident the next round would help gain higher rewards, then we should skip. Ideally, we want 
    to consider skipping after the first pulled arm has failed, to avoid frustrating the student. 
    """
    def __init__(self):
        self.clf = linear_model.SGDClassifier()
        self.clf.partial_fit(np.array([[0,0,0,0,0,0,0,0,0,0,0,0]]),np.array([0]),classes=np.array([0,1])) # Used to initialize the skip classifier
        
    def check_fitted(self,clf): 
        """
        Check if the classifier is fit before asking for prediction. Our classifier is trained in online mode, hence it would
        be asked to predict before fitting. This method makes sure we only ask for prediction after a data point has been 
        fit to the estimator/model
        """
        return hasattr(clf, "classes_")
    
    def train(self,student,pta,next_topic_pta,label):
        """
        Used to train the classifier in online mode, over every data point. In future we might want to consider training in 
        mini-batches, rather than for every data point. 
        """
        X = pd.Series()
        X = X.append([student,pd.Series([pta,next_topic_pta],index=['pta','next_topic_pta'])])
        X = np.array([X.values])
        Y = np.array([label])
        clf = self.clf.partial_fit(X,Y,classes=np.array([0,1]))
            
    def predict(self,student,pta,next_topic_pta):
        """
        Gets predictions from the classifier, along with the confidence score to help determine the reliability / confidence
        level of the prediction being made. 
        """
        X = pd.Series()
        X = X.append([student,pd.Series([pta,next_topic_pta],index=['pta','next_topic_pta'])])
        if self.check_fitted(self.clf):
            Y = self.clf.predict([X.values])[0]
            confidence_score = self.clf.decision_function([X.values])[0]
        else:
            Y = 0
            confidence_score = 0
        return int(Y) , confidence_score

# Skip Topic

In [30]:
class SkipTopic:
    """
    A wrapper around the Skip Classifier to validate the inputs, before sending it to Skip Classifier for prediction. 
    It post-processes the results of the prediction made by skip classifier to check for confidence threshold, 
    before sending out the decision to skip or not. 
    """
    def __init__(self):
        """
        Initializes the SkipTopic class & sets confidence threshold to make confident skip decisions.
        """
        self.skipClassifier = SkipClassifier()
        self.confidence_threshold = 55 # It the confidence score returned by the classifier is greater than this, then we trust in the decision made by the classifier. 
                
    def skipTopic(self,student,pta,topic_number,context_obj,topic_content,linUCB):
        """
        Pre-validates the topic number before asking the skip classifier for a prediction. Then checks the confidence 
        of the prediction before sending out the decision to skip or not. 
        """
        contentContext = context_obj.getContentContext() # Get the content dataframe.
        topic = context_obj.getTopic() # Get the topic list. 
        current_topic_index = topic.index(topic_number) # Get the index number of the current topic
        next_topic_index = current_topic_index + 1
        next_topic = '' # Initialized to make it accessible outside the if statement. 
        if next_topic_index < len(topic): # Check to see if we're going out of bounds
            next_topic = topic[next_topic_index]
            next_topic_contents = topic_content[next_topic]
            t_c = contentContext.loc[next_topic_contents]
            X = context_obj.prepareContext(student,t_c)
            arm_pulled , next_topic_pta = linUCB.expectedPayoff(X,next_topic_contents)
        else:
            # Will be going out of bounds. Current topic is the last topic. No more topics to complete. 
            next_topic_pta = 0
        actual_decision , confidence_score = self.skipClassifier.predict(student,pta,next_topic_pta)
        if actual_decision and confidence_score > self.confidence_threshold:
            skip_decision = 1
        else:
            skip_decision = 0
        return actual_decision,confidence_score,skip_decision,next_topic_pta
        
    def setLabel(self,actual_payoff):
        """
        Sets the label for training the skip classifier
        """
        if actual_payoff == 0:
            label = 1
        else:
            label = 0
        return label
    
    def train(self,student,pta,pta_next_topic,label):
        """
        Training the skip classifier
        """
        self.skipClassifier.train(student,pta,pta_next_topic,label)

# Nature / Environment / Universe

In [31]:
class Oracle : 
    """
    It has the optimal parameters to maximize rewards. The learning algorithm updates its parameters to emulate its parameters
    It is an omniscient policy that knows all the probability distributions. At every step of the way, makes the best 
    decision based on its knowledge of the true distributions. Iit does not have to learn anything. 
    """
    def setParameters(self, contexts , arms):
        """
        Sets the optimal parameters for the omniscient policy. 
        """
        parameters = np.random.uniform(size=(len(arms) , len(contexts)))
        # Normalize parameters
        for i in range(parameters.shape[0]): # Have it in a list comprehension.
            parameters[i] = parameters[i] / np.sum(parameters[i])
        self.theta_df = pd.DataFrame(data = parameters ,  index = arms , columns = contexts , dtype= np.float)

# Learning Algorithm

In [32]:
class LinUCB:
    """
    The learning algorithm, which suggest the arm to be pulled / content to be shown. Based on students feedback it updates
    the parameters of the pulled arm. We're using the disjoint linear model described in this paper[LinUCB](https://arxiv.org/abs/1003.0146) 
    This is the most cited contextual bandit algorithm. 
    """
    def __init__(self,alpha=0.5):
        """
        Initialize the variables 
        alpha : To adjust confidence bounds. Higher alpha, implies arms would have higher confidence bounds. This parameter 
        should be tuned to be optimal.
        arm_params : A map , which for every arm stores the corresponding arm object used to update parameters of an arm.
        """
        self.alpha = alpha # Hyper parameter required for LinUCB to adjust confidence bounds.
        self.arm_params = {} # Maps content to arm object
                
    def expectedPayoff(self,contexts,arms):
        """
        Gives the max expected pay-off for a round with the given context & available arms. 
        
        Input : 
        
        contexts : Contextual data available in the round. Its a combination of student & content context
        arms : Arms / Content available in this round. 
        
        Returns : 
        
        arm_pulled : The arm that should be pulled 
        expected_payoff : Expected pay-off for the pulled suggested to be pulled. 
        
        The arm is not pulled up here as we also depend of the decision from the skip classifer before the arm is actually pulled        
        """
        arms_payoff = list()
        for arm in arms:
            X = contexts.loc[arm] # Give student & content context for an arm 
            if arm not in self.arm_params: # If new content is added, then parameters would be created for it. 
                self.arm_params[arm] = Arm(len(X.index)) # Arm class below, has arm specific parameters
            arm_obj = self.arm_params[arm]
            theta = self.getTheta(arm_obj) # Arm parameter. 
            pta = self.getPta(X , arm_obj) # pta : pay-off/reward at round 't' for arm 'a'. 
            arms_payoff.append(pta)
        expected_payoff = np.max(arms_payoff) # To be used a input data for skip algorithm
        arm_index = np.argmax(arms_payoff) # Find the index of the arm which max pay-off
        arm_pulled = arms[arm_index] # Give me the arm with max pay-off 
        return arm_pulled,np.round(expected_payoff,2)    
        
    def getTheta(self,arm):  
        """
        Get theta which is used to compute the mean reward for an arm
        """
        arm.theta = np.dot(arm.Ainv , arm.b) # A vector
        return arm.theta
    
    def getMean(self, context , arm):
        """
        Get mean expected reward for an arm 
        """
        mean = np.dot(arm.theta.T , context)
        return mean
        
    def getUCB(self , context ,arm):
        """
        Get upper confidence bound for an arm 
        """
        ucb = np.sqrt(np.dot(np.dot(context.T , arm.Ainv) , context))
        return ucb
    
    def getPta(self, context , arm):
        """
        Get expected pay-off for an arm 
        """
        payoff = self.getMean(context,arm) + self.alpha * self.getUCB(context , arm)
        return payoff
    
    def updateParams(self, arm , context, reward):
        """
        Update parameters for the pulled arm. 
        """
        arm_obj = self.arm_params[arm]
        arm_obj.A += np.outer(context,context.T)
        arm_obj.b += reward * context
        arm_obj.Ainv = np.linalg.inv(arm_obj.A)               
    
class Arm:
    """
    Arm class which enscapulates arm parameters, which are updated for an arm when its pulled. 
    """
    def __init__(self,dimensions):
        """
        Initialize the arm parameters. 
        """
        self.A = np.identity(dimensions)
        self.b = np.zeros(dimensions)
        self.Ainv = np.linalg.inv(self.A)
        self.theta = np.dot(self.Ainv , self.b)

# Simulator

In [38]:
class Simulator:
    """
    It represents the teaching system. Several students log into it to take courses. 
    """
    
    def __init__(self):
        """
        Initialize the teaching system by loading data about students, topics & content. Also, initialize other objects to
        be used with the learning system, such as the learning algorithm (LinUCB), the omniscient policy (Oracle) , skip topic
        to optimize rewards, by skipping topics which have content with low expected rewards. 
        """
        self.context = Context()
        self.context.setStudentContext()
        self.context.setContentContext()
        self.context.setTopic()
        self.context.setTopicContent()
        self.oracle = Oracle()
        self.linUCB = LinUCB()
        self.skipTopic = SkipTopic()
        self.simulator_lock = threading.Lock()
        self.rounds=0
        self.logs = pd.DataFrame(columns = ['student_number','topic','arm_pulled','pay-off','pay-off_next_topic','actual_decision','skip_decision'
                                            ,'reward']) 

    def getPayoff(self,X,arm,pta):
        """
        Student shares feedback about the content / understanding of the topic. 
        
        Input : 
        
        X : Context vector for the round. 
        arm : Arm to be pulled / Content to be shown
        pta : Payoff at round 't' for pulling arm 'a'
        
        Returns : 
        
        reward : Reward / Feedback from student for the content shown / arm pulled
        """
        arm_theta = self.oracle.theta_df.loc[arm] #Get parameters for the arm predicted by the learning algo
        expected_reward = pd.Series.dot(X,arm_theta) # Vector dim : (1 * d) (d * 1).
        reward = bernoulli.rvs(size=1,p=expected_reward)[0] # Simulate student's response     
        return reward
    
    def takeCourse(self,student_number,studentContext,contentContext,topic,topic_content):
        """
        This method simulates students taking a course. As part of it, students are presented content for various topics. 
        Students share their feedback, based on which we either move to the next topic or remain on the same topic.  
        We get the expected pay-off from the learning algoritm. We then decide whether to skip or remain on the same topic.
        If skip is true, then the student moves to the next topic, else the student remains on the same topic, shares feedback on 
        the content & we train the skip classifier with this feedback. This method drives the flow of the system, hence key data 
        elements available in this method are logged for analysis.
        
        Inputs : 
        
        student_number : Student Id 
        studentContext : Student context vector. 
        contentContext : Contents context. This has context of all contents for the topic. 
        topic : All the topics to be taught as part of the course. 
        topic_content : Relates all topics to the contents available for every topic     
         
        """
        for i in topic:
            skip_enabled = False
            contents = topic_content[i] # You now have all arms associated with the topic 'i'
            t_c = contentContext.loc[contents] # Get all arms/contents for topic 'i'
            contexts = self.context.prepareContext(studentContext,t_c) # Prepare context for this round
            arms = list(t_c.index) # Get a list of all arms
            while arms: # While we still have arms remaining. 
                arm , pta = self.linUCB.expectedPayoff(contexts,arms) # Get me the max expected pay-off for this round & the arm that would give that reward / pay-off
                actual_decision , confidence_score, skip_decision , pta_next_topic = self.skipTopic.skipTopic(studentContext,pta,i,self.context,topic_content,self.linUCB) # Check if makes sense to skip this topic & move to the next one
                if skip_decision and skip_enabled: 
                    log = pd.Series([student_number,i,arm,pta,pta_next_topic,actual_decision,confidence_score,skip_decision], 
                                        index=['student_number','topic','arm_pulled','pay-off',
                                                'pay-off_next_topic','actual_decision','confidence_score','skip_decision']) # Print log for this round
                    with self.simulator_lock:
                        print('We\'re skipping. Student {0} is on topic {1} was expected to be shown content {2}. Expected Pay-off of this arm is {3}, compared to expected pay-off of next round is {4}. Actual decision was {5} with confidence {6} Decision of skip classifier is {7}'
                          .format(student_number,i,arm,pta,pta_next_topic,actual_decision,confidence_score,skip_decision))                    
                        self.logs = self.logs.append(log , ignore_index=True) # Log in a file
                    break # Decision is to skip. Hence, we won't pull the arm.  
                else:
                    actual_payoff = self.getPayoff(contexts.loc[arm],arm,pta) # Student shares feedback
                    log = pd.Series([student_number,i,arm,pta,pta_next_topic,actual_decision,confidence_score,skip_decision,actual_payoff], 
                                        index=['student_number','topic','arm_pulled','pay-off',
                                                'pay-off_next_topic','actual_decision','confidence_score','skip_decision','reward']) # Print log for this round
                    with self.simulator_lock:
                        self.rounds+=1
                        print('Student {0} is on topic {1} is shown content {2} feedback recd is {3}. Expected Pay-off of this arm is {4}, compared to expected pay-off of next round is {5}. Actual decision was {6} with confidence {7}. Decision of skip classifier is {8}'
                              .format(student_number,i,arm,actual_payoff,pta,pta_next_topic,actual_decision,confidence_score,skip_decision))
                        self.logs = self.logs.append(log , ignore_index=True) # Log in a file
                    self.linUCB.updateParams(arm,contexts.loc[arm], actual_payoff) # Update arm parameters for this round
                    label = self.skipTopic.setLabel(actual_payoff) # Set label to train skip classifier
                    self.skipTopic.train(studentContext,pta,pta_next_topic,label) # Train skip classifier. 
                if actual_payoff != 1: # If we got no reward for this round
                    arms.remove(arm) #  # lets the that arm. 
                    skip_enabled = True
                else:
                    break # Move to the next topic 

    def main(self):
        """
        Its the main method. Its in the name :)
        """
        studentContext = self.context.getStudentContext() # Student dataframe
        contentContext = self.context.getContentContext() # Content Dataframe
        topic = self.context.getTopic() # List of topics. 
        topic_content = self.context.getTopicContent() # Topics Data, which includes topics to content mapping.
        features = list(studentContext.columns) + list(contentContext.columns)
        self.oracle.setParameters(features , contentContext.index)
        student_thread = list() # Keep track of students taking the course. 
        for student_number , student in studentContext.iterrows():
            t = threading.Thread(target=self.takeCourse, args=(student_number,student,contentContext,topic,topic_content))
            student_thread.append(t)
            # Some threads do background tasks, like sending keepalive packets, or performing periodic garbage collection, or 
            # whatever. These are only useful when the main program is running, and it's okay to kill them off once the other, 
            # non-daemon, threads have exited. Once the main thread finishes & one of the student is still working through the course. 
            # we will wait for the student to complete the course, since the main thread is completed. We want all students 
            # to complete the course. Hence, setting daemon to False
            t.daemon = False # classifying as a non-daemon, so they will npt die even when the main thread does. 
            t.start() # begins, must come after daemon definition
        for t in student_thread: # This is done to ensure, we proceed to save the logs only after all students have completed the course. 
            t.join()
        self.logs.to_csv('logs_linUCB_verySmall',index=False)
        print('Total Number of rounds : ', self.rounds)  

simulator = Simulator()
simulator.main()

Student 0 is on topic T_1 is shown content C_1_1 feedback recd is 1. Expected Pay-off of this arm is 1.45, compared to expected pay-off of next round is 1.41. Actual decision was 0 with confidence -10.0. Decision of skip classifier is 0
Student 2 is on topic T_1 is shown content C_1_1 feedback recd is 0. Expected Pay-off of this arm is 1.37, compared to expected pay-off of next round is 1.33. Actual decision was 0 with confidence -10.0. Decision of skip classifier is 0
Student 1 is on topic T_1 is shown content C_1_1 feedback recd is 1. Expected Pay-off of this arm is 1.39, compared to expected pay-off of next round is 1.35. Actual decision was 0 with confidence -10.0. Decision of skip classifier is 0
Student 12 is on topic T_1 is shown content C_1_1 feedback recd is 1. Expected Pay-off of this arm is 1.41, compared to expected pay-off of next round is 1.37. Actual decision was 1 with confidence 65.42914171656686. Decision of skip classifier is 1
Student 11 is on topic T_1 is shown con

Student 37 is on topic T_1 is shown content C_1_1 feedback recd is 1. Expected Pay-off of this arm is 1.41, compared to expected pay-off of next round is 1.37. Actual decision was 1 with confidence 58.59742533845747. Decision of skip classifier is 1
Student 8 is on topic T_1 is shown content C_1_1 feedback recd is 1. Expected Pay-off of this arm is 1.36, compared to expected pay-off of next round is 1.32. Actual decision was 1 with confidence 58.37774342990876. Decision of skip classifier is 1
Student 43 is on topic T_1 is shown content C_1_1 feedback recd is 1. Expected Pay-off of this arm is 1.24, compared to expected pay-off of next round is 1.2. Actual decision was 1 with confidence 48.181918380206966. Decision of skip classifier is 0
Student 49 is on topic T_1 is shown content C_1_1 feedback recd is 1. Expected Pay-off of this arm is 1.57, compared to expected pay-off of next round is 1.38. Actual decision was 1 with confidence 67.5606460144018. Decision of skip classifier is 1
St

Student 14 is on topic T_1 is shown content C_1_2 feedback recd is 0. Expected Pay-off of this arm is 1.29, compared to expected pay-off of next round is 1.39. Actual decision was 0 with confidence -17.737554422504363. Decision of skip classifier is 0
Student 41 is on topic T_2 is shown content C_2_3 feedback recd is 1. Expected Pay-off of this arm is 1.24, compared to expected pay-off of next round is 1.24. Actual decision was 0 with confidence -10.81999865140834. Decision of skip classifier is 0
Student 6 is on topic T_1 is shown content C_1_2 feedback recd is 1. Expected Pay-off of this arm is 0.98, compared to expected pay-off of next round is 1.11. Actual decision was 1 with confidence 54.94836896789707. Decision of skip classifier is 0
Student 9 is on topic T_2 is shown content C_2_3 feedback recd is 0. Expected Pay-off of this arm is 1.17, compared to expected pay-off of next round is 1.16. Actual decision was 1 with confidence 51.38441547952496. Decision of skip classifier is 0

Student 7 is on topic T_2 is shown content C_2_4 feedback recd is 0. Expected Pay-off of this arm is 1.26, compared to expected pay-off of next round is 1.31. Actual decision was 0 with confidence -36.77868434098183. Decision of skip classifier is 0
Student 5 is on topic T_2 is shown content C_2_4 feedback recd is 1. Expected Pay-off of this arm is 1.22, compared to expected pay-off of next round is 1.27. Actual decision was 0 with confidence -8.25424825075627. Decision of skip classifier is 0
Student 1 is on topic T_3 is shown content C_3_3 feedback recd is 0. Expected Pay-off of this arm is 1.35, compared to expected pay-off of next round is 1.27. Actual decision was 0 with confidence -14.262706897372823. Decision of skip classifier is 0
Student 13 is on topic T_2 is shown content C_2_4 feedback recd is 0. Expected Pay-off of this arm is 1.18, compared to expected pay-off of next round is 1.24. Actual decision was 0 with confidence -23.136547308259185. Decision of skip classifier is 

Student 8 is on topic T_2 is shown content C_2_4 feedback recd is 1. Expected Pay-off of this arm is 1.27, compared to expected pay-off of next round is 1.32. Actual decision was 1 with confidence 21.97484464688168. Decision of skip classifier is 0
Student 27 is on topic T_3 is shown content C_3_3 feedback recd is 1. Expected Pay-off of this arm is 1.44, compared to expected pay-off of next round is 1.36. Actual decision was 0 with confidence -47.040161322131055. Decision of skip classifier is 0
Student 44 is on topic T_3 is shown content C_3_3 feedback recd is 1. Expected Pay-off of this arm is 1.29, compared to expected pay-off of next round is 1.2. Actual decision was 0 with confidence -25.909853683604254. Decision of skip classifier is 0
Student 34 is on topic T_3 is shown content C_3_3 feedback recd is 0. Expected Pay-off of this arm is 1.41, compared to expected pay-off of next round is 1.34. Actual decision was 0 with confidence -54.11592679105673. Decision of skip classifier is

Student 15 is on topic T_2 is shown content C_2_2 feedback recd is 1. Expected Pay-off of this arm is 1.29, compared to expected pay-off of next round is 1.34. Actual decision was 1 with confidence 22.98155852893324. Decision of skip classifier is 0
Student 20 is on topic T_3 is shown content C_3_1 feedback recd is 0. Expected Pay-off of this arm is 1.25, compared to expected pay-off of next round is 1.2. Actual decision was 0 with confidence -42.67164811063746. Decision of skip classifier is 0
Student 45 is on topic T_2 is shown content C_2_2 feedback recd is 1. Expected Pay-off of this arm is 1.29, compared to expected pay-off of next round is 1.34. Actual decision was 1 with confidence 13.753331561752528. Decision of skip classifier is 0
Student 35 is on topic T_4 is shown content C_4_2 feedback recd is 0. Expected Pay-off of this arm is 1.2, compared to expected pay-off of next round is 1.25. Actual decision was 0 with confidence -68.98159482466943. Decision of skip classifier is 0

Student 28 is on topic T_3 is shown content C_3_3 feedback recd is 1. Expected Pay-off of this arm is 1.33, compared to expected pay-off of next round is 1.17. Actual decision was 1 with confidence 38.1273333170348. Decision of skip classifier is 0
Student 2 is on topic T_4 is shown content C_4_2 feedback recd is 0. Expected Pay-off of this arm is 1.43, compared to expected pay-off of next round is 1.3. Actual decision was 0 with confidence -1.571086444554782. Decision of skip classifier is 0
Student 13 is on topic T_3 is shown content C_3_2 feedback recd is 1. Expected Pay-off of this arm is 1.07, compared to expected pay-off of next round is 1.12. Actual decision was 0 with confidence -1.4266324462744866. Decision of skip classifier is 0
Student 5 is on topic T_4 is shown content C_4_2 feedback recd is 1. Expected Pay-off of this arm is 1.18, compared to expected pay-off of next round is 1.23. Actual decision was 1 with confidence 1.5338146732439966. Decision of skip classifier is 0


Student 14 is on topic T_4 is shown content C_4_1 feedback recd is 1. Expected Pay-off of this arm is 1.22, compared to expected pay-off of next round is 1.35. Actual decision was 0 with confidence -20.14667608376901. Decision of skip classifier is 0
Student 18 is on topic T_5 is shown content C_5_1 feedback recd is 1. Expected Pay-off of this arm is 1.39, compared to expected pay-off of next round is 1.35. Actual decision was 1 with confidence 31.217696813632642. Decision of skip classifier is 0
Student 25 is on topic T_2 is shown content C_2_1 feedback recd is 1. Expected Pay-off of this arm is 1.08, compared to expected pay-off of next round is 0.93. Actual decision was 0 with confidence -13.571806925540235. Decision of skip classifier is 0
Student 16 is on topic T_3 is shown content C_3_3 feedback recd is 0. Expected Pay-off of this arm is 1.02, compared to expected pay-off of next round is 1.12. Actual decision was 0 with confidence -7.908320877686706. Decision of skip classifier 

Student 29 is on topic T_2 is shown content C_2_3 feedback recd is 0. Expected Pay-off of this arm is 0.78, compared to expected pay-off of next round is 1.31. Actual decision was 0 with confidence -20.896445119542648. Decision of skip classifier is 0
Student 21 is on topic T_3 is shown content C_3_2 feedback recd is 1. Expected Pay-off of this arm is 1.08, compared to expected pay-off of next round is 1.26. Actual decision was 0 with confidence -37.8074655277059. Decision of skip classifier is 0
Student 10 is on topic T_3 is shown content C_3_2 feedback recd is 1. Expected Pay-off of this arm is 1.15, compared to expected pay-off of next round is 1.35. Actual decision was 0 with confidence -42.41539660233623. Decision of skip classifier is 0
Student 6 is on topic T_4 is shown content C_4_2 feedback recd is 1. Expected Pay-off of this arm is 0.79, compared to expected pay-off of next round is 0.93. Actual decision was 1 with confidence 22.425517254983696. Decision of skip classifier is

Student 12 is on topic T_6 is shown content C_6_1 feedback recd is 1. Expected Pay-off of this arm is 1.51, compared to expected pay-off of next round is 1.38. Actual decision was 0 with confidence -1.5790293553769823. Decision of skip classifier is 0
Student 13 is on topic T_5 is shown content C_5_4 feedback recd is 0. Expected Pay-off of this arm is 1.3, compared to expected pay-off of next round is 1.35. Actual decision was 0 with confidence -3.2890371104177527. Decision of skip classifier is 0
Student 28 is on topic T_5 is shown content C_5_4 feedback recd is 0. Expected Pay-off of this arm is 1.32, compared to expected pay-off of next round is 1.43. Actual decision was 0 with confidence -52.92976205035295. Decision of skip classifier is 0
Student 38 is on topic T_5 is shown content C_5_4 feedback recd is 0. Expected Pay-off of this arm is 1.49, compared to expected pay-off of next round is 1.56. Actual decision was 0 with confidence -63.68903592564892. Decision of skip classifier 

Student 27 is on topic T_6 is shown content C_6_3 feedback recd is 1. Expected Pay-off of this arm is 1.32, compared to expected pay-off of next round is 1.44. Actual decision was 0 with confidence -28.513614035230688. Decision of skip classifier is 0
Student 16 is on topic T_4 is shown content C_4_3 feedback recd is 0. Expected Pay-off of this arm is 1.29, compared to expected pay-off of next round is 1.13. Actual decision was 0 with confidence -32.856630584669176. Decision of skip classifier is 0
Student 17 is on topic T_4 is shown content C_4_2 feedback recd is 0. Expected Pay-off of this arm is 0.94, compared to expected pay-off of next round is 1.12. Actual decision was 0 with confidence -24.233565569189302. Decision of skip classifier is 0
Student 34 is on topic T_6 is shown content C_6_3 feedback recd is 1. Expected Pay-off of this arm is 1.3, compared to expected pay-off of next round is 1.42. Actual decision was 0 with confidence -44.65229621934407. Decision of skip classifier

Student 8 is on topic T_4 is shown content C_4_2 feedback recd is 0. Expected Pay-off of this arm is 1.1, compared to expected pay-off of next round is 1.33. Actual decision was 1 with confidence 20.57465332929875. Decision of skip classifier is 0
Student 29 is on topic T_4 is shown content C_4_3 feedback recd is 0. Expected Pay-off of this arm is 1.21, compared to expected pay-off of next round is 1.42. Actual decision was 0 with confidence -49.278699825168076. Decision of skip classifier is 0
Student 10 is on topic T_5 is shown content C_5_3 feedback recd is 1. Expected Pay-off of this arm is 1.33, compared to expected pay-off of next round is 1.35. Actual decision was 0 with confidence -60.24775196000966. Decision of skip classifier is 0
Student 41 is on topic T_6 is shown content C_6_3 feedback recd is 1. Expected Pay-off of this arm is 1.1, compared to expected pay-off of next round is 1.23. Actual decision was 0 with confidence -46.78754174979947. Decision of skip classifier is 0

Student 26 is on topic T_7 is shown content C_7_1 feedback recd is 0. Expected Pay-off of this arm is 1.09, compared to expected pay-off of next round is 1.33. Actual decision was 1 with confidence 50.271550734874076. Decision of skip classifier is 0
Student 11 is on topic T_8 is shown content C_8_1 feedback recd is 1. Expected Pay-off of this arm is 1.34, compared to expected pay-off of next round is 1.34. Actual decision was 1 with confidence 72.24066838193289. Decision of skip classifier is 1
We're skipping. Student 32 is on topic T_5 was expected to be shown content C_5_2. Expected Pay-off of this arm is 1.23, compared to expected pay-off of next round is 1.26. Actual decision was 1 with confidence 58.40551257089428 Decision of skip classifier is 1
Student 0 is on topic T_7 is shown content C_7_3 feedback recd is 1. Expected Pay-off of this arm is 1.18, compared to expected pay-off of next round is 1.38. Actual decision was 1 with confidence 66.83994386925391. Decision of skip clas

Student 34 is on topic T_7 is shown content C_7_1 feedback recd is 0. Expected Pay-off of this arm is 1.15, compared to expected pay-off of next round is 1.39. Actual decision was 1 with confidence 12.49872839548022. Decision of skip classifier is 0
Student 36 is on topic T_7 is shown content C_7_1 feedback recd is 1. Expected Pay-off of this arm is 1.2, compared to expected pay-off of next round is 1.43. Actual decision was 1 with confidence 14.818163542722122. Decision of skip classifier is 0
Student 7 is on topic T_5 is shown content C_5_2 feedback recd is 0. Expected Pay-off of this arm is 1.14, compared to expected pay-off of next round is 1.44. Actual decision was 1 with confidence 16.47024388705496. Decision of skip classifier is 0
Student 44 is on topic T_8 is shown content C_8_1 feedback recd is 1. Expected Pay-off of this arm is 1.26, compared to expected pay-off of next round is 1.26. Actual decision was 1 with confidence 8.413572437987524. Decision of skip classifier is 0
S

Student 24 is on topic T_5 is shown content C_5_3 feedback recd is 0. Expected Pay-off of this arm is 1.14, compared to expected pay-off of next round is 0.94. Actual decision was 1 with confidence 5.31022138627138. Decision of skip classifier is 0
Student 8 is on topic T_5 is shown content C_5_4 feedback recd is 1. Expected Pay-off of this arm is 0.98, compared to expected pay-off of next round is 1.09. Actual decision was 1 with confidence 16.73829156170996. Decision of skip classifier is 0
Student 25 is on topic T_4 is shown content C_4_2 feedback recd is 1. Expected Pay-off of this arm is 1.09, compared to expected pay-off of next round is 0.98. Actual decision was 0 with confidence -35.88620094585629. Decision of skip classifier is 0
Student 30 is on topic T_7 is shown content C_7_1 feedback recd is 1. Expected Pay-off of this arm is 1.37, compared to expected pay-off of next round is 1.68. Actual decision was 0 with confidence -57.73757757234739. Decision of skip classifier is 0


Student 32 is on topic T_6 is shown content C_6_3 feedback recd is 0. Expected Pay-off of this arm is 1.07, compared to expected pay-off of next round is 0.96. Actual decision was 0 with confidence -18.85221285799034. Decision of skip classifier is 0
Student 19 is on topic T_8 is shown content C_8_2 feedback recd is 0. Expected Pay-off of this arm is 1.14, compared to expected pay-off of next round is 1.03. Actual decision was 1 with confidence 4.449791944319636. Decision of skip classifier is 0
Student 0 is on topic T_9 is shown content C_9_2 feedback recd is 1. Expected Pay-off of this arm is 1.21, compared to expected pay-off of next round is 1.4. Actual decision was 0 with confidence -9.71012569466458. Decision of skip classifier is 0
Student 38 is on topic T_7 is shown content C_7_3 feedback recd is 1. Expected Pay-off of this arm is 0.98, compared to expected pay-off of next round is 1.34. Actual decision was 1 with confidence 10.336318490704107. Decision of skip classifier is 0


Student 1 is on topic T_8 is shown content C_8_2 feedback recd is 1. Expected Pay-off of this arm is 1.25, compared to expected pay-off of next round is 0.8. Actual decision was 1 with confidence 43.88501017059953. Decision of skip classifier is 0
Student 36 is on topic T_9 is shown content C_9_2 feedback recd is 1. Expected Pay-off of this arm is 1.26, compared to expected pay-off of next round is 1.44. Actual decision was 1 with confidence 32.542498417745264. Decision of skip classifier is 0
Student 17 is on topic T_7 is shown content C_7_2 feedback recd is 1. Expected Pay-off of this arm is 1.14, compared to expected pay-off of next round is 1.38. Actual decision was 1 with confidence 16.32234103888049. Decision of skip classifier is 0
Student 46 is on topic T_8 is shown content C_8_2 feedback recd is 0. Expected Pay-off of this arm is 1.32, compared to expected pay-off of next round is 0.75. Actual decision was 1 with confidence 47.24928473861241. Decision of skip classifier is 0
S

Student 42 is on topic T_6 is shown content C_6_3 feedback recd is 0. Expected Pay-off of this arm is 0.71, compared to expected pay-off of next round is 1.06. Actual decision was 0 with confidence -2.0991826400556786. Decision of skip classifier is 0
Student 30 is on topic T_9 is shown content C_9_2 feedback recd is 0. Expected Pay-off of this arm is 0.82, compared to expected pay-off of next round is 1.45. Actual decision was 0 with confidence -13.159550094123919. Decision of skip classifier is 0
Student 18 is on topic T_9 is shown content C_9_1 feedback recd is 1. Expected Pay-off of this arm is 0.72, compared to expected pay-off of next round is 1.41. Actual decision was 0 with confidence -12.061781065252529. Decision of skip classifier is 0
Student 15 is on topic T_8 is shown content C_8_3 feedback recd is 1. Expected Pay-off of this arm is 1.26, compared to expected pay-off of next round is 0.93. Actual decision was 1 with confidence 22.034019459681858. Decision of skip classifie

Student 9 is on topic T_6 is shown content C_6_1 feedback recd is 1. Expected Pay-off of this arm is 0.71, compared to expected pay-off of next round is 0.87. Actual decision was 1 with confidence 6.538159269078797. Decision of skip classifier is 0
Student 4 is on topic T_9 is shown content C_9_2 feedback recd is 1. Expected Pay-off of this arm is 1.11, compared to expected pay-off of next round is 1.26. Actual decision was 1 with confidence 6.282525012800633. Decision of skip classifier is 0
Student 28 is on topic T_7 is shown content C_7_3 feedback recd is 1. Expected Pay-off of this arm is 0.85, compared to expected pay-off of next round is 1.18. Actual decision was 0 with confidence -1.8024717676243505. Decision of skip classifier is 0
Student 23 is on topic T_8 is shown content C_8_3 feedback recd is 0. Expected Pay-off of this arm is 1.11, compared to expected pay-off of next round is 0.79. Actual decision was 0 with confidence -37.80051524677951. Decision of skip classifier is 0

Student 16 is on topic T_7 is shown content C_7_3 feedback recd is 0. Expected Pay-off of this arm is 0.78, compared to expected pay-off of next round is 0.86. Actual decision was 1 with confidence 1.710917924771726. Decision of skip classifier is 0
Student 31 is on topic T_10 is shown content C_10_1 feedback recd is 1. Expected Pay-off of this arm is 1.46, compared to expected pay-off of next round is 0. Actual decision was 1 with confidence 23.31657830213025. Decision of skip classifier is 0
Student 49 is on topic T_9 is shown content C_9_2 feedback recd is 0. Expected Pay-off of this arm is 0.99, compared to expected pay-off of next round is 1.58. Actual decision was 1 with confidence 3.0203518870358774. Decision of skip classifier is 0
Student 12 is on topic T_10 is shown content C_10_1 feedback recd is 0. Expected Pay-off of this arm is 1.24, compared to expected pay-off of next round is 0. Actual decision was 1 with confidence 5.134606202782091. Decision of skip classifier is 0
S

Student 48 is on topic T_8 is shown content C_8_2 feedback recd is 1. Expected Pay-off of this arm is 0.72, compared to expected pay-off of next round is 0.93. Actual decision was 0 with confidence -18.615953382183115. Decision of skip classifier is 0
Student 27 is on topic T_10 is shown content C_10_1 feedback recd is 0. Expected Pay-off of this arm is 1.27, compared to expected pay-off of next round is 0. Actual decision was 0 with confidence -2.1588811899227034. Decision of skip classifier is 0
Student 24 is on topic T_8 is shown content C_8_3 feedback recd is 0. Expected Pay-off of this arm is 0.84, compared to expected pay-off of next round is 1.07. Actual decision was 0 with confidence -13.86736827479358. Decision of skip classifier is 0
Student 34 is on topic T_10 is shown content C_10_2 feedback recd is 1. Expected Pay-off of this arm is 0.64, compared to expected pay-off of next round is 0. Actual decision was 0 with confidence -2.0589850558035963. Decision of skip classifier 

Student 37 is on topic T_10 is shown content C_10_1 feedback recd is 1. Expected Pay-off of this arm is 1.1, compared to expected pay-off of next round is 0. Actual decision was 1 with confidence 10.164896271747327. Decision of skip classifier is 0
Student 29 is on topic T_7 is shown content C_7_2 feedback recd is 0. Expected Pay-off of this arm is 1.01, compared to expected pay-off of next round is 1.25. Actual decision was 1 with confidence 0.2627599364439428. Decision of skip classifier is 0
Student 14 is on topic T_10 is shown content C_10_2 feedback recd is 1. Expected Pay-off of this arm is 0.42, compared to expected pay-off of next round is 0. Actual decision was 0 with confidence -10.726190172693876. Decision of skip classifier is 0
Student 42 is on topic T_7 is shown content C_7_1 feedback recd is 1. Expected Pay-off of this arm is 0.77, compared to expected pay-off of next round is 0.82. Actual decision was 0 with confidence -20.44964548163208. Decision of skip classifier is 

Student 29 is on topic T_8 is shown content C_8_2 feedback recd is 0. Expected Pay-off of this arm is 1.01, compared to expected pay-off of next round is 0.88. Actual decision was 0 with confidence -3.019182808042588. Decision of skip classifier is 0
Student 43 is on topic T_8 is shown content C_8_2 feedback recd is 0. Expected Pay-off of this arm is 0.54, compared to expected pay-off of next round is 0.81. Actual decision was 1 with confidence 17.536596065377736. Decision of skip classifier is 0
Student 42 is on topic T_8 is shown content C_8_3 feedback recd is 1. Expected Pay-off of this arm is 0.8, compared to expected pay-off of next round is 0.9. Actual decision was 1 with confidence 13.237110504700121. Decision of skip classifier is 0
Student 48 is on topic T_10 is shown content C_10_2 feedback recd is 1. Expected Pay-off of this arm is 0.85, compared to expected pay-off of next round is 0. Actual decision was 0 with confidence -12.010074847015407. Decision of skip classifier is 

Student 2 is on topic T_9 is shown content C_9_2 feedback recd is 0. Expected Pay-off of this arm is 0.99, compared to expected pay-off of next round is 0.93. Actual decision was 1 with confidence 6.279862954564692. Decision of skip classifier is 0
Student 13 is on topic T_9 is shown content C_9_1 feedback recd is 1. Expected Pay-off of this arm is 0.89, compared to expected pay-off of next round is 0.87. Actual decision was 1 with confidence 25.980239291946802. Decision of skip classifier is 0
Student 32 is on topic T_9 is shown content C_9_2 feedback recd is 0. Expected Pay-off of this arm is 0.53, compared to expected pay-off of next round is 0.66. Actual decision was 0 with confidence -35.43079538454266. Decision of skip classifier is 0
Student 8 is on topic T_10 is shown content C_10_1 feedback recd is 0. Expected Pay-off of this arm is 1.14, compared to expected pay-off of next round is 0. Actual decision was 0 with confidence -31.92385198287878. Decision of skip classifier is 0


Student 43 is on topic T_10 is shown content C_10_1 feedback recd is 0. Expected Pay-off of this arm is 0.63, compared to expected pay-off of next round is 0. Actual decision was 1 with confidence 14.322143241442525. Decision of skip classifier is 0
Student 9 is on topic T_9 is shown content C_9_2 feedback recd is 0. Expected Pay-off of this arm is 0.36, compared to expected pay-off of next round is 0.58. Actual decision was 0 with confidence -7.2651435045431025. Decision of skip classifier is 0
Student 25 is on topic T_8 is shown content C_8_1 feedback recd is 1. Expected Pay-off of this arm is 1.05, compared to expected pay-off of next round is 0.76. Actual decision was 1 with confidence 17.47846049990629. Decision of skip classifier is 0
Student 9 is on topic T_10 is shown content C_10_2 feedback recd is 1. Expected Pay-off of this arm is 0.58, compared to expected pay-off of next round is 0. Actual decision was 0 with confidence -1.2421733579638463. Decision of skip classifier is 0

In [35]:
df = pd.read_csv('logs_linUCB_verySmall')
# df
# df.shape
# df.loc[df['reward'].isnull()] # Get all rows where reward is NaN means , we skipped from those topics
df[df['student_number'] == 1]

Unnamed: 0,student_number,topic,arm_pulled,pay-off,pay-off_next_topic,actual_decision,skip_decision,reward,confidence_score
1,1,T_1,C_1_1,1.39,1.35,1,1,0.0,64.747253
41,1,T_1,C_1_2,1.25,1.35,1,1,,55.866829
94,1,T_2,C_2_3,1.35,1.35,1,1,0.0,70.834858
139,1,T_2,C_2_4,1.3,1.35,0,0,1.0,-23.922711
191,1,T_3,C_3_1,1.32,1.27,1,0,1.0,54.519186
246,1,T_4,C_4_1,1.19,1.32,1,0,1.0,26.955388
301,1,T_5,C_5_4,1.22,1.27,1,0,1.0,2.58631
351,1,T_6,C_6_1,1.27,1.36,0,0,0.0,-37.617634
397,1,T_6,C_6_3,1.23,1.36,0,0,1.0,-32.262874
451,1,T_7,C_7_3,1.34,1.32,0,0,0.0,-53.266266
