# Feedback Loops in Adaptive Learning Systems

In [12]:
%matplotlib  inline
from collections import OrderedDict
from math import sqrt, log
from random import random, randint, choice
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

In [22]:
class ProxyStudent:
    def __init__(self, p_learn, p_good, init_skill=0.1):
        """
        Args:
            p_learn = prabability of getting learned after 1 question
            p_good = [probability of good performance for skill=0, -||- skill=1]
        """
        self.p_learn = p_learn
        self.p_good = p_good
        self.skill = init_skill
        
    def update_skill(self, performance):
        """Called when student answers a question."""
        
        # Performance at skill: p_perf_at_skill[performance][skill]
        p_perf_at_skill = [list(map(lambda x: 1 - x, self.p_good)), self.p_good]
        
        likelihood_good = self.skill * p_perf_at_skill[performance][1]
        likelihood_bad = (1 - self.skill) * p_perf_at_skill[performance][0]
        
        # New skill according to Bayes theorem
        self.skill = (likelihood_good / (likelihood_good + likelihood_bad))
        
s = ProxyStudent(p_learn=0.1, p_good=[0.2, 0.7])
for i in range(10): 
    answer = int(randint(0, 5) > 3)
    s.update_skill(answer)
    print(str(answer) + ' : ' + str(s.skill))

0 : 0.04
0 : 0.015384615384615387
1 : 0.05185185185185186
1 : 0.16065573770491803
0 : 0.06697038724373577
0 : 0.026210995542347702
0 : 0.009992824502435894
0 : 0.0037708601532476056
0 : 0.0014174130990778546
1 : 0.004943428645481336


In [27]:
class TrueStudent:
    def __init__(self, p_learn, p_good, max_answers=20):
        """
        Args:
            p_learn = prabability of getting learned after 1 question
            p_good = [probability of good performance for skill=0, -||- skill=1]
        """
        self.p_learn = p_learn
        self.p_good = p_good
        self.skill = 0
        self.n_answers = 0
        self.max_answers = max_answers
        
    def update_skill(self, performance):
        """Called when student answers a question."""
        if self.skill == 1:
            return
        if random() < self.p_learn:
            self.skill = 1
            
    def answer(self):
        """Return a performance for a new question"""
        self.n_answers += 1
        p_good = self.p_good[self.skill]
        performance = int(random() < p_good)
        return performance

    def leave(self, mastery_decision):
        return mastery_decision or self.n_answers >= self.max_answers
    
s = TrueStudent(p_learn=1.0, p_good=(0.2, 0.7))
s.update_skill(1)
s.skill

1

In [30]:
def proxy_objective_solved_tasks(histories):
    """
    Args:
        histories: history of each student
    """
    return sum([len(history) for history in histories])

def proxy_objective_successful_tasks(histories):
    return sum([sum(history) for history in histories])

class LearningSystem:
    def __init__(self, threshold, proxy_objective, threshold_delta=0.05):
        self.threshold = threshold
        self.proxy_objective = proxy_objective
        self.threshold_delta = threshold_delta
        
    def do_iteration(self, true_students):
        """
        Performs the simulation for a single month.
        Will affect local attributes. (threshold)
        """
        thresholds = [self.threshold - self.threshold_delta,
                      self.threshold + self.threshold_delta]
        groups = [true_students[:len(true_students) // 2],
                  true_students[len(true_students) // 2:]]
        
        objectives = []
        for group, t in zip(groups, thresholds):
            histories = [self.process_student(s, t) for s in group]
            objectives.append(self.proxy_objective(histories))
        
        if objectives[1] > objectives[0]:
            self.threshold += self.threshold_delta
        else:
            self.threshold -= self.threshold_delta
    
    def process_student(self, true_student, threshold):
        # Intentional, TODO: think about it again.
        proxy_student = ProxyStudent(true_student.p_learn, true_student.p_good)
        mastery = False
        history = []
        
        while not true_student.leave(mastery):
            performance = true_student.answer()
            proxy_student.update_skill(performance)
            mastery = proxy_student.skill >= threshold
            
            history.append(performance)
        
        return history

In [32]:
def perform_simulation(
        proxy_objective=proxy_objective_solved_tasks,
        p_learn=0.05,
        p_good=[0.2, 0.7],
        n_iters=100,
        n_students=100):
    als = LearningSystem(threshold=0.5, proxy_objective=proxy_objective)
    thresholds = []
    for i_iter in range(n_iters):
        true_students = [
            TrueStudent(p_learn=p_learn, p_good=p_good)
            for _ in range(n_students)
        ]
        als.do_iteration(true_students)
        thresholds.append(als.threshold)
    print(thresholds)
    
perform_simulation()

[0.45, 0.4, 0.35000000000000003, 0.4, 0.35000000000000003, 0.4, 0.45, 0.5, 0.45, 0.5, 0.55, 0.6000000000000001, 0.6500000000000001, 0.7000000000000002, 0.6500000000000001, 0.6000000000000001, 0.6500000000000001, 0.6000000000000001, 0.6500000000000001, 0.7000000000000002, 0.7500000000000002, 0.8000000000000003, 0.8500000000000003, 0.9000000000000004, 0.9500000000000004, 0.9000000000000004, 0.8500000000000003, 0.8000000000000003, 0.7500000000000002, 0.7000000000000002, 0.7500000000000002, 0.7000000000000002, 0.6500000000000001, 0.6000000000000001, 0.6500000000000001, 0.7000000000000002, 0.7500000000000002, 0.8000000000000003, 0.7500000000000002, 0.7000000000000002, 0.6500000000000001, 0.6000000000000001, 0.55, 0.5, 0.55, 0.5, 0.45, 0.5, 0.45, 0.5, 0.45, 0.5, 0.55, 0.6000000000000001, 0.6500000000000001, 0.7000000000000002, 0.7500000000000002, 0.7000000000000002, 0.7500000000000002, 0.8000000000000003, 0.8500000000000003, 0.8000000000000003, 0.8500000000000003, 0.9000000000000004, 0.95000