In [1]:
import pandas as pd
import numpy as np
import csv, time
import warnings
import random as r
warnings.filterwarnings('ignore')

dataset_path = "../input/assignmentdata/AssignmentData.csv"
df = pd.read_csv(dataset_path)
data_labels = ["KC_1","KC_27","KC_24","KC_14","KC_22","KC_20","KC_21"]

students = df.groupby("Student")

In [2]:
class Student():
    def __init__(self, name):
        self.name = name
        
        self.experiences = {'KC_1': [], 'KC_27': [], 'KC_24': [], 'KC_14': [], 'KC_22': [], 'KC_20': [], 'KC_21': []}
        # The given learning paramters are (Lecture 11):
        # p(L0) is the probability that a student already knows the skill before the 1st iteration
        # p(T)  is the probabilty that the skill will be learned on each iteration, aka each opportunity to use the skill.
        
        # The given performance parameters are (Lecture 11):
        # p(G)  is the probabilty that the student will guess and do the step correctly without actually knowing the rule at hand 
        # p(S)  is the probabilty that the student knows the skill but they mess-up/slip-up so they do the rule wrong 

        # (0.75, 0, 0) - Assumption is after each session, there is a 75% the student will learn it
        self.T, self.S, self.G = (0.50, 0.1, 0.5)
            
    def learn(self, KCs, percent):
        for kc in KCs:
            learned = self.prob_latent_knowledge(kc)
            if r.random() <= self.T: 
                self.experiences[kc].append(percent)
        # We will be trying to predict two things:    
        # p(Ln) the current knowledge of a student
        # p(Cs) the probability that an item will be correct
    
        # p(Ln) = p(Ln−1|evidence) + (1 − p(Ln−1|evidence)) ∗ p(T)
        #  The probability that a rule will be learned after the nth opportunity 
        #  We focus on the probability that the ideal rule was already in the learned state contingent 
        #  and the probability that the rule go to the learned state if it is not already "learnt"
  
    def prob_latent_knowledge(self, kc):
        if self.experiences.get(kc):
            last_instance = self.experiences.get(kc)[-1]
            return last_instance + ( ( 1 - last_instance ) * self.T)
        else:
            return 0.0

   
    def prob_correct(self, KCs):
        correctness = 0.0
        correct_threshold = 0.80
        for kc in KCs:
            learned_state = self.prob_latent_knowledge(kc)
            slip = 1 - self.S
            not_learned = 1 - learned_state
            guess = self.G
            correctness += (learned_state * slip) + (not_learned * guess)
        correctness = correctness / len(KCs)
        return (correctness >= correct_threshold, correctness)

In [3]:
correctly_guessed = 0
total_instances = 0
BREAK_POINT = 100
i=0
for student, stud_data in students:
    i+=1
    #     print(student)
    #     commented out to avoid clutter
    s = Student(student)
    steps = stud_data.groupby("StepID")
    j = 0
    for step, step_data in steps:
        j += 1
        total_instances += 1
        required_KCs = {kc: int(step_data.get(kc)) for kc in data_labels if int(step_data.get(kc)) == 1}
        correct = True if int(step_data.get("Correct")) else False
        prediction, percent = s.prob_correct(required_KCs)
        
    #         print(student, step, required_KCs, correct, prediction, percent)
    #         Uncomment in the case of future testing @Blake
        
        if correct == prediction:
            correctly_guessed += 1
        
        s.learn(required_KCs, percent)

In [4]:
string = "Correctly Guessed: {0} (out of {2})\nPercent Correctly Guessed: {1:.4}%"
print(string.format(correctly_guessed, (correctly_guessed / total_instances * 100), len(df)))

Correctly Guessed: 30669 (out of 36627)
Percent Correctly Guessed: 83.73%


In [5]:
print(s.name)
for kc in sorted(data_labels):
    print(kc, ' '.join("{0:.4}".format(i) for i in s.experiences.get(kc)))

stu98
KC_1 0.5
KC_14 0.5 0.8 0.86 0.872 0.8744 0.8749 0.875 0.875 0.875 0.875
KC_20 0.5 0.8 0.86 0.872 0.8744 0.8749 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875
KC_21 0.5 0.8 0.86 0.872 0.8744 0.8734 0.8747 0.8749 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.8735 0.8747
KC_22 0.5 0.8 0.86 0.8735 0.8748 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875
KC_24 0.5 0.8 0.86 0.872 0.8744 0.8749 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875 0.875
KC_27 0.5 0.8 0.86 0.872 0.8744 0.8749 0.875 0.875 0.875 0.875
