In [57]:
#This is the learning algoritm which would be used. It has skipping, multi-threading enabled. 

# Put alpha = 0 & evaluate performance. In that case it would employ greedy strategy. Pick the arm with max expected reward. 
# Set confidenc_threshold = 50 to minimize skipping



In [58]:
import numpy as np
import pandas as pd

# Contexts

In [59]:
import os,pickle

file_path = os.path.join(os.path.curdir, '..' , 'dataset' , 'very_small')
# file_path = os.path.join(os.path.curdir, '..' , 'dataset' , 'small')
# file_path = os.path.join(os.path.curdir, '..' , 'dataset' , 'medium')
# file_path = os.path.join(os.path.curdir, '..' , 'dataset' , 'large')
# file_path = os.path.join(os.path.curdir, '..' , 'dataset' , 'very_large')
'''
Context data for learning
'''
class Context:
    """
    Contextual information required by contextual bandit algorithms to make better predictions. It enscapsulates all data
    about the student , topics & content to experiment with the learning algorithm. 
    """
    
    def getStudentContext(self):
        """
        Student Preferences: 
        Visual (S_V) , Text (S_T) , Demo-based (S_D) , Practical (S_P), Step-by-step (S_S) ,Activity / Task based (S_AT), 
        Lecture (S_L) , Audio (S_A) , Self-evaluation (S_SE) , Pre-assessment (S_PA)
        Students preference to learning via various ways are evaluated on a scale from 0 to 1, rather being binary. 
        """
        return self.studentContext
    
    def setStudentContext(self):
        """
        Load the student data
        """
        with open(os.path.join(file_path , 'student.pickle'), 'rb') as student_file:
            self.studentContext= pickle.load(student_file)
    
    
    def getContentContext(self):
        """
        Content Features 
        Ease of understanding (C_E) , Simple / Intuitive (C_I) , Surface / In-depth (C_ID) , Brief / Concise (C_C), 
        Thorough (C_T), Preference / Well reviewed / Well rated (C_R) , Theoritical / Abstract (C_A), 
        Practical / Hands on (C_P), Experimental / Task-based (C_ETB)
        Content preference to learning via various ways are evaluated on a scale from 0 to 1, rather being binary. 
        """
        return self.contentContext
   
    def setContentContext(self):
        """
        Load the content data
        """
        with open(os.path.join(file_path ,'content.pickle'), 'rb') as content_file:
            self.contentContext= pickle.load(content_file)
        
    def getTopic(self):
        """
        Gives the topics part of the course.
        """
        return self.topic
    
    def setTopic(self):
        """
        Loads the topics part of the course
        """
        with open(os.path.join(file_path ,'topic.pickle'), 'rb') as topic_file:
            self.topic = pickle.load(topic_file)
    
    def getTopicContent(self):
        """
         Gets the topic content. topic_content is a map of topics to content. So for every topic, it gives the content 
         available for the topic. In education parlance, for any given topic, it shows the different ways of teaching this
         topic (via contents)
        """
        return self.topic_content
    
    def setTopicContent(self):
        """
        Sets the topic_content variable to the one in the serialized object. topic_content is a map of topics to content. So
        for every topic, it gives the content available for the topic. In education parlance, for any given topic, it shows
        the different ways of teaching this topic (via contents)
        """
        with open(os.path.join(file_path ,'topic_content.pickle'), 'rb') as topic_content_file:
            self.topic_content= pickle.load(topic_content_file)
                
    def prepareContext(self,studentContext,contentContext):
        """
           Given the student & content context available for a round, this method combines them to form a single contextual
           variable
           
           Inputs : 
           
           studentContext: Student contextual information.
           contentContext: Contents contextual information. 
           
           Returns :
           
           context : A combined output of student & content context.
        """
        context = pd.DataFrame() 
        for content in list(contentContext.index):
            c = pd.Series()
            c = c.append([studentContext,contentContext.loc[content]]) # Combine student & content. 
            c['Content_id'] = content
            context = context.append(c, ignore_index=True)
        context = context.set_index('Content_id')
        return context
    
    
    def loadData(self):
        """
        Method used to test data retrieval. Data generator handles the data generation. This method checks we can retrieve
        data. This is a dummy method used to test data retrieval. Its not invoked in the main program.
        """
        self.setStudentContext()
        self.setContentContext()
        self.setTopic()
        self.setTopicContent()
        print(self.getStudentContext())
        print(type(self.getStudentContext()))
        print('*********************************')
        print(self.getContentContext())
        print(type(self.getContentContext()))
        print('*********************************')
        print(self.getTopic())
        print(type(self.getTopic()))
        print('*********************************')
        print(self.getTopicContent())
        print(type(self.getTopicContent()))
                
c_test = Context()
c_test.loadData()

     S_V   S_T   S_D   S_P   S_S  S_AT   S_L   S_A  S_SE  S_PA
0   0.73  0.66  0.39  0.51  0.01  0.24  0.83  0.93  0.14  0.59
1   0.68  0.11  0.19  0.86  0.37  0.73  0.33  0.72  0.53  0.64
2   0.13  0.84  0.32  0.49  0.47  0.88  0.16  0.85  0.27  0.02
3   0.62  0.64  0.33  0.18  0.41  0.45  0.46  0.19  0.36  0.51
4   0.69  0.74  0.71  0.11  0.73  0.67  0.29  0.45  0.78  0.09
5   0.93  0.89  0.80  0.85  0.03  0.23  0.64  0.40  0.91  0.59
6   0.77  0.27  0.32  0.06  0.67  0.38  0.23  0.36  0.92  0.99
7   0.53  0.49  0.49  0.42  0.57  0.06  0.82  0.17  0.64  0.58
8   0.86  0.08  0.17  0.35  0.89  0.95  0.70  0.05  0.17  0.86
9   0.79  0.42  0.28  0.91  0.80  0.13  0.27  0.50  0.70  0.76
10  0.29  0.18  0.68  0.36  0.51  0.18  0.88  0.73  0.55  0.56
11  0.74  0.76  0.80  0.97  0.28  0.91  0.57  0.88  0.04  0.35
12  0.61  0.86  0.18  0.59  0.24  0.51  0.59  0.15  0.19  0.06
13  0.12  0.29  0.16  0.20  0.26  0.71  0.85  0.08  0.12  0.88
14  0.92  0.66  0.74  0.85  0.88  0.07  0.04  0.39  0.3

# Skip Classifier

In [60]:
# Online Stochastic Gradient Descent. This classifier decides whether or not to skip to the next topic. 
# TO-DO : Change loss functions (Log,Hinge,Others) to find if they impact performance. Try different values of parameters 
# For instance SGD has a parameter alpha, SVM has a parameter C. To optimize, you can train a mini-batch of samples, 
# rather than one data point at a time. Try different values of learning_rate . Look at the class_weight parameter if you 
# want to give more weight to samples of one class over the other. Need to understand about warm_start parameter
# We need to record predictions made by the classifier to evaluate its performance over rounds 
from sklearn import linear_model
import os.path
import threading
class SkipClassifier:
    """
    A classifier which gives prediction, whether or not to move to the next topic. This is important, because we want 
    students to learn content which the algorithm is confident would help the student learn. The skip classifier is trained
    online, hence we use a confidence threshold, to be conservative & minimize skipping topics. Skipping is not preferred, 
    but if the classifier is confident the next round would help gain higher rewards, then we should skip. Ideally, we want 
    to consider skipping after the first pulled arm has failed, to avoid frustrating the student. 
    """
    def __init__(self):
        if os.path.exists('skip_classifier_small.sav'):
            self.clf = pickle.load(open('skip_classifier_small.sav', 'rb'))
        else:
            self.clf = linear_model.SGDClassifier()
            self.clf.partial_fit(np.array([[0,0,0,0,0,0,0,0,0,0,0,0]]),np.array([0]),classes=np.array([0,1])) # Used to initialize the skip classifier
        self.classifier_lock = threading.Lock()
        
    def check_fitted(self,clf): 
        """
        Check if the classifier is fit before asking for prediction. Our classifier is trained in online mode, hence it would
        be asked to predict before fitting. This method makes sure we only ask for prediction after a data point has been 
        fit to the estimator/model
        """
        return hasattr(clf, "classes_")
    
    def train(self,student,pta,next_topic_pta,label):
        """
        Used to train the classifier in online mode, over every data point. In future we might want to consider training in 
        mini-batches, rather than for every data point. 
        """
        X = pd.Series()
        X = X.append([student,pd.Series([pta,next_topic_pta],index=['pta','next_topic_pta'])])
        X = np.array([X.values])
        Y = np.array([label])
        clf = self.clf.partial_fit(X,Y,classes=np.array([0,1]))
        pickle.dump(clf, open('skip_classifier_small.sav', 'wb'))
            
    def predict(self,student,pta,next_topic_pta):
        """
        Gets predictions from the classifier, along with the confidence score to help determine the reliability / confidence
        level of the prediction being made. 
        """
#         with self.classifier_lock:
#             print('pta : {0} , next_topic_pta : {1}'.format(pta,next_topic_pta))
        X = pd.Series()
        X = X.append([student,pd.Series([pta,next_topic_pta],index=['pta','next_topic_pta'])])
        if self.check_fitted(self.clf):
            Y = self.clf.predict([X.values])[0]
            confidence_score = self.clf.decision_function([X.values])[0]
        else:
            Y = 0
            confidence_score = 0
        return int(Y) , confidence_score

# Skip Topic

In [61]:
class SkipTopic:
    """
    A wrapper around the Skip Classifier to validate the inputs, before sending it to Skip Classifier for prediction. 
    It post-processes the results of the prediction made by skip classifier to check for confidence threshold, 
    before sending out the decision to skip or not. 
    """
    def __init__(self):
        """
        Initializes the SkipTopic class & sets confidence threshold to make confident skip decisions.
        """
        self.skipClassifier = SkipClassifier()
        self.confidence_threshold = 100 # It the confidence score returned by the classifier is greater than this, then we trust in the decision made by the classifier. 
                
    def skipTopic(self,student,pta,topic_number,context_obj,topic_content,linUCB):
        """
        Pre-validates the topic number before asking the skip classifier for a prediction. Then checks the confidence 
        of the prediction before sending out the decision to skip or not. 
        """
        contentContext = context_obj.getContentContext() # Get the content dataframe.
        topic = context_obj.getTopic() # Get the topic list. 
        current_topic_index = topic.index(topic_number) # Get the index number of the current topic
        next_topic_index = current_topic_index + 1
        next_topic = '' # Initialized to make it accessible outside the if statement. 
        if next_topic_index < len(topic): # Check to see if we're going out of bounds
            next_topic = topic[next_topic_index]
            next_topic_contents = topic_content[next_topic]
            t_c = contentContext.loc[next_topic_contents]
            X = context_obj.prepareContext(student,t_c)
            arm_pulled , next_topic_pta = linUCB.expectedPayoff(X,next_topic_contents)
        else:
            # Will be going out of bounds. Current topic is the last topic. No more topics to complete. 
            next_topic_pta = 0
        actual_decision , confidence_score = self.skipClassifier.predict(student,pta,next_topic_pta)
        if actual_decision and confidence_score > self.confidence_threshold:
            skip_decision = 1
        else:
            skip_decision = 0
        return actual_decision,confidence_score,skip_decision,next_topic_pta
        
    def setLabel(self,actual_payoff):
        """
        Sets the label for training the skip classifier
        """
        if actual_payoff == -1:
            label = 1
        if actual_payoff == 1:
            label = 0
        return label
    
    def train(self,student,pta,pta_next_topic,label):
        """
        Training the skip classifier
        """
        self.skipClassifier.train(student,pta,pta_next_topic,label)

# Nature / Environment / Universe

In [62]:
class Oracle : 
    """
    It has the optimal parameters to maximize rewards. The learning algorithm updates its parameters to emulate its parameters
    It is an omniscient policy that knows all the probability distributions. At every step of the way, makes the best 
    decision based on its knowledge of the true distributions. Iit does not have to learn anything. 
    """
    def setParameters(self, contexts , arms):
        """
        Sets the optimal parameters for the omniscient policy. 
        """
        parameters = np.random.uniform(size=(len(arms) , len(contexts)))
        # Normalize parameters
#         for i in range(parameters.shape[0]): # Have it in a list comprehension.
#             parameters[i] = parameters[i] / np.sum(parameters[i])
        self.theta_df = pd.DataFrame(data = parameters ,  index = arms , columns = contexts , dtype= np.float)

# Learning Algorithm

In [63]:
import threading
class LinUCB:
    """
    The learning algorithm, which suggest the arm to be pulled / content to be shown. Based on students feedback it updates
    the parameters of the pulled arm. We're using the disjoint linear model described in this paper[LinUCB](https://arxiv.org/abs/1003.0146) 
    This is the most cited contextual bandit algorithm. 
    """
#     def __init__(self,alpha=0.001):
    def __init__(self,alpha=0.1):
        """
        Initialize the variables 
        alpha : To adjust confidence bounds. Higher alpha, implies arms would have higher confidence bounds. This parameter 
        should be tuned to be optimal.
        arm_params : A map , which for every arm stores the corresponding arm object used to update parameters of an arm.
        """
        self.alpha = alpha # Hyper parameter required for LinUCB to adjust confidence bounds.
        self.arm_params = {} # Maps content to arm object
        self.LinUCB_lock = threading.Lock()
                
    def expectedPayoff(self,contexts,arms):
        """
        Gives the max expected pay-off for a round with the given context & available arms. 
        
        Input : 
        
        contexts : Contextual data available in the round. Its a combination of student & content context
        arms : Arms / Content available in this round. 
        
        Returns : 
        
        arm_pulled : The arm that should be pulled 
        expected_payoff : Expected pay-off for the pulled suggested to be pulled. 
        
        The arm is not pulled up here as we also depend of the decision from the skip classifer before the arm is actually pulled        
        """
        arms_payoff = list()
        for arm in arms:
            X = contexts.loc[arm] # Give student & content context for an arm 
            if arm not in self.arm_params: # If new content is added, then parameters would be created for it. 
                self.arm_params[arm] = Arm(len(X.index)) # Arm class below, has arm specific parameters
            arm_obj = self.arm_params[arm]
            theta = self.getTheta(arm_obj) # Arm parameter. 
            pta = self.getPta(X , arm_obj) # pta : pay-off/reward at round 't' for arm 'a'. 
            arms_payoff.append(pta)
            # Commenting the normalization code to help the skipping feature make better decisions. 
#         for i in range(len(arms_payoff)): # Normalize arms_payoff. Required for cases when alpha > 1. Have it in a list comprehension.
#             arms_payoff[i] = arms_payoff[i] / np.sum(arms_payoff)
        expected_payoff = np.max(arms_payoff) # To be used a input data for skip algorithm
        arm_index = np.argmax(arms_payoff) # Find the index of the arm which max pay-off
        arm_pulled = arms[arm_index] # Give me the arm with max pay-off 
        return arm_pulled,np.round(expected_payoff,2)    
        
    def getTheta(self,arm):  
        """
        Get theta which is used to compute the mean reward for an arm
        """
        arm.theta = np.dot(arm.Ainv , arm.b) # A vector
        return arm.theta
    
    def getMean(self, context , arm):
        """
        Get mean expected reward for an arm 
        """
        mean = np.dot(arm.theta.T , context)
        return mean
        
    def getUCB(self , context ,arm):
        """
        Get upper confidence bound for an arm 
        """
        ucb = np.sqrt(np.dot(np.dot(context.T , arm.Ainv) , context))
        return ucb
    
    def getPta(self, context , arm):
        """
        Get expected pay-off for an arm 
        """
        payoff = self.getMean(context,arm) + self.alpha * self.getUCB(context , arm)
        return payoff
    
    def updateParams(self, arm , context, reward):
        """
        Update parameters for the pulled arm. 
        """
        arm_obj = self.arm_params[arm]
        arm_obj.A += np.outer(context,context.T)
        arm_obj.b += reward * context
        arm_obj.Ainv = np.linalg.inv(arm_obj.A)               
    
class Arm:
    """
    Arm class which enscapulates arm parameters, which are updated for an arm when its pulled. 
    """
    def __init__(self,dimensions):
        """
        Initialize the arm parameters. 
        """
        self.A = np.identity(dimensions)
        self.b = np.zeros(dimensions)
        self.Ainv = np.linalg.inv(self.A)
        self.theta = np.dot(self.Ainv , self.b)

# Simulator

In [64]:
# Normalize pta values before they are sent for prediction

In [65]:
import threading 
from scipy.stats import bernoulli
class Simulator:
    """
    It represents the teaching system. Several students log into it to take courses. 
    """
    
    def __init__(self):
        """
        Initialize the teaching system by loading data about students, topics & content. Also, initialize other objects to
        be used with the learning system, such as the learning algorithm (LinUCB), the omniscient policy (Oracle) , skip topic
        to optimize rewards, by skipping topics which have content with low expected rewards. 
        """
        self.context = Context()
        self.context.setStudentContext()
        self.context.setContentContext()
        self.context.setTopic()
        self.context.setTopicContent()
        self.oracle = Oracle()
        self.linUCB = LinUCB()
        self.skipTopic = SkipTopic()
        self.simulator_lock = threading.Lock()
        self.rounds=0
        self.logs = pd.DataFrame(columns = ['student_number','topic','arm_pulled','pay-off','pay-off_next_topic','actual_decision','skip_decision','skip_enabled'
                                            ,'reward']) 

    def getPayoff(self,X,arm,pta):
        """
        Student shares feedback about the content / understanding of the topic. 
        
        Input : 
        
        X : Context vector for the round. 
        arm : Arm to be pulled / Content to be shown
        pta : Payoff at round 't' for pulling arm 'a'
        
        Returns : 
        
        reward : Reward / Feedback from student for the content shown / arm pulled
        """
        arm_theta = self.oracle.theta_df.loc[arm] #Get parameters for the arm predicted by the learning algo
        expected_reward = pd.Series.dot(X,arm_theta) # Vector dim : (1 * d) (d * 1).
        reward = bernoulli.rvs(size=1,p=expected_reward)[0] # Simulate student's response    
        if reward == 0:
            reward = -1
        return reward , expected_reward
    
    def takeCourse(self,student_number,studentContext,contentContext,topic,topic_content):
        """
        This method simulates students taking a course. As part of it, students are presented content for various topics. 
        Students share their feedback, based on which we either move to the next topic or remain on the same topic.  
        We get the expected pay-off from the learning algoritm. We then decide whether to skip or remain on the same topic.
        If skip is true, then the student moves to the next topic, else the student remains on the same topic, shares feedback on 
        the content & we train the skip classifier with this feedback. This method drives the flow of the system, hence key data 
        elements available in this method are logged for analysis.
        
        Inputs : 
        
        student_number : Student Id 
        studentContext : Student context vector. 
        contentContext : Contents context. This has context of all contents for the topic. 
        topic : All the topics to be taught as part of the course. 
        topic_content : Relates all topics to the contents available for every topic     
         
        """
        for i in topic:
            skip_enabled = False
            contents = topic_content[i] # You now have all arms associated with the topic 'i'
            t_c = contentContext.loc[contents] # Get all arms/contents for topic 'i'
            contexts = self.context.prepareContext(studentContext,t_c) # Prepare context for this round
            arms = list(t_c.index) # Get a list of all arms
            while arms: # While we still have arms remaining. 
                arm , pta = self.linUCB.expectedPayoff(contexts,arms) # Get me the max expected pay-off for this round & the arm that would give that reward / pay-off
#                 with self.simulator_lock:
#                     print('PTA : ', pta)
                actual_decision , confidence_score, skip_decision , pta_next_topic = self.skipTopic.skipTopic(studentContext,pta,i,self.context,topic_content,self.linUCB) # Check if makes sense to skip this topic & move to the next one
                if skip_decision and skip_enabled: 
                    log = pd.Series([student_number,i,arm,pta,pta_next_topic,actual_decision,confidence_score,skip_decision,skip_enabled], 
                                        index=['student_number','topic','arm_pulled','pay-off',
                                                'pay-off_next_topic','actual_decision','confidence_score','skip_decision','skip_enabled']) # Print log for this round
                    with self.simulator_lock:
                        print('We\'re skipping. Student {0} is on topic {1} was expected to be shown content {2}. Expected Pay-off of this arm is {3}, compared to expected pay-off of next round is {4}. Actual decision was {5} with confidence {6} Decision of skip classifier is {7}'
                          .format(student_number,i,arm,pta,pta_next_topic,actual_decision,confidence_score,skip_decision))                    
                        self.logs = self.logs.append(log , ignore_index=True) # Log in a file
                    break # Decision is to skip. Hence, we won't pull the arm.  
                else:
                    actual_payoff , expected_reward = self.getPayoff(contexts.loc[arm],arm,pta) # Student shares feedback
                    log = pd.Series([student_number,i,arm,pta,pta_next_topic,actual_decision,confidence_score,skip_decision,skip_enabled,expected_reward,actual_payoff], 
                                        index=['student_number','topic','arm_pulled','pay-off',
                                                'pay-off_next_topic','actual_decision','confidence_score','skip_decision','skip_enabled','expected_reward','reward']) # Print log for this round
                    with self.simulator_lock:
                        self.rounds+=1
                        print('Student {0} is on topic {1} is shown content {2} feedback recd is {3}. Expected Pay-off of this arm is {4}, compared to expected pay-off of next round is {5}. Actual decision was {6} with confidence {7}. Decision of skip classifier is {8}. Expected reward was {9}'
                              .format(student_number,i,arm,actual_payoff,pta,pta_next_topic,actual_decision,confidence_score,skip_decision,expected_reward))
                        self.logs = self.logs.append(log , ignore_index=True) # Log in a file
                    self.linUCB.updateParams(arm,contexts.loc[arm], actual_payoff) # Update arm parameters for this round
                    label = self.skipTopic.setLabel(actual_payoff) # Set label to train skip classifier
                    self.skipTopic.train(studentContext,pta,pta_next_topic,label) # Train skip classifier. 
                if actual_payoff != 1: # If we got no reward for this round
                    arms.remove(arm) #  # lets the that arm. 
                    skip_enabled = True
                else:
                    break # Move to the next topic 

    def main(self):
        """
        Its the main method. Its in the name :)
        """
        studentContext = self.context.getStudentContext() # Student dataframe
        contentContext = self.context.getContentContext() # Content Dataframe
        topic = self.context.getTopic() # List of topics. 
        topic_content = self.context.getTopicContent() # Topics Data, which includes topics to content mapping.
        features = list(studentContext.columns) + list(contentContext.columns)
        self.oracle.setParameters(features , contentContext.index)
        student_thread = list() # Keep track of students taking the course. 
        for student_number , student in studentContext.iterrows():
            t = threading.Thread(target=self.takeCourse, args=(student_number,student,contentContext,topic,topic_content))
            student_thread.append(t)
            # Some threads do background tasks, like sending keepalive packets, or performing periodic garbage collection, or 
            # whatever. These are only useful when the main program is running, and it's okay to kill them off once the other, 
            # non-daemon, threads have exited. Once the main thread finishes & one of the student is still working through the course. 
            # we will wait for the student to complete the course, since the main thread is completed. We want all students 
            # to complete the course. Hence, setting daemon to False
            t.daemon = False # classifying as a non-daemon, so they will npt die even when the main thread does. 
            t.start() # begins, must come after daemon definition
        for t in student_thread: # This is done to ensure, we proceed to save the logs only after all students have completed the course. 
            t.join()
        self.logs.to_csv('logs_linUCB_verySmall_reward',index=False)
        print('Total Number of rounds : ', self.rounds)  

simulator = Simulator()
simulator.main()

Exception in thread Thread-457:
Traceback (most recent call last):
  File "/usr/lib/python3.5/threading.py", line 914, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.5/threading.py", line 862, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-65-ff8a69cf4f51>", line 87, in takeCourse
    actual_payoff , expected_reward = self.getPayoff(contexts.loc[arm],arm,pta) # Student shares feedback
  File "<ipython-input-65-ff8a69cf4f51>", line 43, in getPayoff
    reward = bernoulli.rvs(size=1,p=expected_reward)[0] # Simulate student's response
  File "/home/vagrant/.local/lib/python3.5/site-packages/scipy/stats/_distn_infrastructure.py", line 2809, in rvs
    return super(rv_discrete, self).rvs(*args, **kwargs)
  File "/home/vagrant/.local/lib/python3.5/site-packages/scipy/stats/_distn_infrastructure.py", line 940, in rvs
    raise ValueError("Domain error in arguments.")
ValueError: Domain error in arguments.

Exception in thread Thread-455:
Traceback (mo

Exception in thread Thread-466:
Traceback (most recent call last):
  File "/usr/lib/python3.5/threading.py", line 914, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.5/threading.py", line 862, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-65-ff8a69cf4f51>", line 87, in takeCourse
    actual_payoff , expected_reward = self.getPayoff(contexts.loc[arm],arm,pta) # Student shares feedback
  File "<ipython-input-65-ff8a69cf4f51>", line 43, in getPayoff
    reward = bernoulli.rvs(size=1,p=expected_reward)[0] # Simulate student's response
  File "/home/vagrant/.local/lib/python3.5/site-packages/scipy/stats/_distn_infrastructure.py", line 2809, in rvs
    return super(rv_discrete, self).rvs(*args, **kwargs)
  File "/home/vagrant/.local/lib/python3.5/site-packages/scipy/stats/_distn_infrastructure.py", line 940, in rvs
    raise ValueError("Domain error in arguments.")
ValueError: Domain error in arguments.

Exception in thread Thread-485:
Traceback (mo

Exception in thread Thread-481:
Traceback (most recent call last):
  File "/usr/lib/python3.5/threading.py", line 914, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.5/threading.py", line 862, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-65-ff8a69cf4f51>", line 87, in takeCourse
    actual_payoff , expected_reward = self.getPayoff(contexts.loc[arm],arm,pta) # Student shares feedback
  File "<ipython-input-65-ff8a69cf4f51>", line 43, in getPayoff
    reward = bernoulli.rvs(size=1,p=expected_reward)[0] # Simulate student's response
  File "/home/vagrant/.local/lib/python3.5/site-packages/scipy/stats/_distn_infrastructure.py", line 2809, in rvs
    return super(rv_discrete, self).rvs(*args, **kwargs)
  File "/home/vagrant/.local/lib/python3.5/site-packages/scipy/stats/_distn_infrastructure.py", line 940, in rvs
    raise ValueError("Domain error in arguments.")
ValueError: Domain error in arguments.

Exception in thread Thread-476:
Traceback (mo

Total Number of rounds :  0


Exception in thread Thread-469:
Traceback (most recent call last):
  File "/usr/lib/python3.5/threading.py", line 914, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.5/threading.py", line 862, in run
    self._target(*self._args, **self._kwargs)
  File "<ipython-input-65-ff8a69cf4f51>", line 87, in takeCourse
    actual_payoff , expected_reward = self.getPayoff(contexts.loc[arm],arm,pta) # Student shares feedback
  File "<ipython-input-65-ff8a69cf4f51>", line 43, in getPayoff
    reward = bernoulli.rvs(size=1,p=expected_reward)[0] # Simulate student's response
  File "/home/vagrant/.local/lib/python3.5/site-packages/scipy/stats/_distn_infrastructure.py", line 2809, in rvs
    return super(rv_discrete, self).rvs(*args, **kwargs)
  File "/home/vagrant/.local/lib/python3.5/site-packages/scipy/stats/_distn_infrastructure.py", line 940, in rvs
    raise ValueError("Domain error in arguments.")
ValueError: Domain error in arguments.

Exception in thread Thread-479:
Traceback (mo

## Experiements 

1) $\alpha$ = 2 . Confidence threshold = 100 (No skipping). very_small data size. Contents between 5 - 20 
2) $\alpha$ = 0.5 . Confi thresh = 100

## Analysis 

- How much do the rounds increase of increasing values of $\alpha$ ? 
- How well are contents explored for different values of $\alpha$ ? This would show the need to not make $\alpha$ too small or too big
- 

## Evaluation 

#### Very Small - Upto 20 contents per topic. 

When the number of contents per topic are limited upto a certain range, the learning algorithm performs as well as the omniscient policy. However, as the number of contents per topic increases, it performs sub-optimally. $\textit{Show the chart when number of contents per topic can be upto 20}$ 

When we enabled skipping & 

We found the algorithm performs on par with the omniscient policy when the number of contents per topic is upto 10. However, when the number of contents per topic goes beyond 10 its performance degrades. This is because it has to explore more contents.

Skipping algorithm , learns in online mode. Its not been pre-trained. It would be interesting to see how it performs when its pre-trained. 

PS: Whenever we give range, its inclusive of both numbers specified in the range

# Report

## Experiments

- We'll refer to the learning algorithm as learner 
- After finding the best content for different context, the learner now knows the best arm to pull. We refer to this as exploitation. This is inline with Exploration - Exploitation dilemma explained in Chapter 2 \ref{exploreExploit}. Exploration refers to finding the best content for different contexts, where as exploitation refers to using the knowledge acquired during exploration to present the best content for a student. 
- 

Since, we have created the dataset, we had the option to experiment against different combinations. 

- Very Small : 50 students with 10 topics
- Small :  100 students with 25 topics
- Medium : 200 students with 50 topics
- Large : 400 students with 100 topics
- Very Large : 800 students with 200 topics

Students - Varied from 50 to 800

Topics - Varied from 10 to 200

Contents - When the number of contents per topic was less say upto 4, then the learning algorithm performed as well as the omniscient policy. This would be because there was limited exploration of contents & the learner was able to find the best content for every topic for different students relatively quickly. However, when the number of contents per topic was increased to say upto 20, then the learner performed sub-optimally. This was natural, as now there were many more contents to explore. Hence the learner would need more rounds to converge to the best content for every topic \& student. 

$\alpha$ : Having a higher value of $\alpha , like 2.0 encourged the learner to explore more, before exploting. 

confidence_threshold : To disabled skipping set this threshold to 100. A low value on the skipping threshold could encourage skipping at the first attempt of failing to understand a topic. Hence, setting this threshold to an acceptable value, is the perogative of the instructor / teacher. 

Performance of skipping algorithm : The skipping algorithm is an online algorithm, which learns in real-time. 

## Evaluation

Modelling non-stationary student responses is non-trivial as there are external factors beyond the control of the system. Concepts like attitude, study patterns, motivation are abstract concepts which are well understood, but cannot be measured / derived mathematically.

## Exp

- For small, make number of students as 25 & increases contents to 100. 
- 

## small size upto 20 contents. No skipping. 

We observe that the cumulative reward increases linearly for the omniscient policy. Compared to it, the learner's cumulative reward is sub-optimal, which is expected, as it has to learn the optimal parameters. 

## For different values of $\alpha$ 

From experiments we found that setting $\alpha$ too high reduces cumulative reward. This is because the learner spends more time exploring. However, setting a low value of $\alpha$ prevents exploration \& restricts learning, as the learner is not able to evaluate different contents / explanations. Based on empirical data, we found $\alpha = 0.5$ to be optimal. This can be shown by the graph which shows that this value of alpha helps the learner increase its cumulative reward. 





## Conclusion

In [10]:
import matplotlib.pyplot as plt
%matplotlib inline

df = pd.read_csv('logs_linUCB_small')
df[df['confidence_score'] < 0]


Unnamed: 0,student_number,topic,arm_pulled,pay-off,pay-off_next_topic,actual_decision,skip_decision,reward,confidence_score,expected_reward
0,2,T_1,C_1_14,0.44,0.48,0,0,1.0,-10.000000,0.622314
1,4,T_1,C_1_14,0.43,0.48,0,0,1.0,-10.000000,0.645088
2,0,T_1,C_1_14,0.44,0.48,0,0,0.0,-10.000000,0.635686
5,53,T_1,C_1_14,0.42,0.46,0,0,0.0,-10.518954,0.567077
6,67,T_1,C_1_14,0.45,0.49,0,0,1.0,-2.588675,0.667801
7,7,T_1,C_1_14,0.41,0.45,0,0,0.0,-5.699233,0.513402
9,74,T_1,C_1_14,0.42,0.46,0,0,0.0,-5.540866,0.584603
15,75,T_1,C_1_14,0.45,0.46,0,0,0.0,-14.322840,0.568732
16,19,T_1,C_1_14,0.45,0.46,0,0,1.0,-4.012978,0.556236
17,14,T_1,C_1_14,0.48,0.48,0,0,1.0,-9.314895,0.669952
