In [3]:
 # pip install pyBKT

In [4]:
import sys
import numpy as np
from pyBKT.generate import synthetic_data, random_model_uni
from pyBKT.fit import EM_fit
from copy import deepcopy

In [6]:
#parameters classes
num_gs = 1 #number of guess/slip classes
num_learns = 1 #number of learning resouce

num_fit_initializations = 20

In [9]:
#true params used for synthetic data generation
p_T = 0.30
p_F = 0.00
p_G = 0.10
p_S = 0.03
p_L0 = 0.10

#generate synthetic model and data.
truemodel = {}

truemodel["As"] =  np.zeros((num_learns,2,2), dtype=np.float_)
for i in range(num_learns):
    truemodel["As"][i] = np.transpose([[1-p_T, p_T], [p_F, 1-p_F]])

truemodel["learns"] = truemodel["As"][:,1, 0,]
truemodel["forgets"] = truemodel["As"][:,0, 1]

truemodel["pi_0"] = np.array([[1-p_L0], [p_L0]])
truemodel["prior"] = truemodel["pi_0"][1][0]

truemodel["guesses"] = np.full(num_gs, p_G, dtype=np.float_)
truemodel["slips"] = np.full(num_gs, p_S, dtype=np.float_)
#can optionally set learn class sequence - set randomly by synthetic_data if not included
#truemodel["resources"] = np.random.randint(1, high = num_resources, size = sum(observation_sequence_lengths))

In [10]:
truemodel

{'As': array([[[0.7, 0. ],
         [0.3, 1. ]]]),
 'learns': array([0.3]),
 'forgets': array([0.]),
 'pi_0': array([[0.9],
        [0.1]]),
 'prior': 0.1,
 'guesses': array([0.1]),
 'slips': array([0.03])}

In [11]:
#data!
print("generating data...")
observation_sequence_lengths = np.full(500, 100, dtype=np.int) #specifies 500 students with 100 observations for synthetic data
data = synthetic_data.synthetic_data(truemodel, observation_sequence_lengths)

generating data...


In [12]:
data

{'stateseqs': array([[0, 0, 1, ..., 1, 1, 1]], dtype=int32),
 'data': array([[1, 1, 2, ..., 2, 2, 2]], dtype=int32),
 'starts': array([    1,   101,   201,   301,   401,   501,   601,   701,   801,
          901,  1001,  1101,  1201,  1301,  1401,  1501,  1601,  1701,
         1801,  1901,  2001,  2101,  2201,  2301,  2401,  2501,  2601,
         2701,  2801,  2901,  3001,  3101,  3201,  3301,  3401,  3501,
         3601,  3701,  3801,  3901,  4001,  4101,  4201,  4301,  4401,
         4501,  4601,  4701,  4801,  4901,  5001,  5101,  5201,  5301,
         5401,  5501,  5601,  5701,  5801,  5901,  6001,  6101,  6201,
         6301,  6401,  6501,  6601,  6701,  6801,  6901,  7001,  7101,
         7201,  7301,  7401,  7501,  7601,  7701,  7801,  7901,  8001,
         8101,  8201,  8301,  8401,  8501,  8601,  8701,  8801,  8901,
         9001,  9101,  9201,  9301,  9401,  9501,  9601,  9701,  9801,
         9901, 10001, 10101, 10201, 10301, 10401, 10501, 10601, 10701,
        10801, 10901,

In [13]:
#fit models, starting with random initializations
print('fitting! each dot is a new EM initialization')

num_fit_initializations = 5
best_likelihood = float("-inf")

for i in range(num_fit_initializations):
	fitmodel = random_model_uni.random_model_uni(num_learns, num_gs) # include this line to randomly set initial param values
	(fitmodel, log_likelihoods) = EM_fit.EM_fit(fitmodel, data)
	if(log_likelihoods[-1] > best_likelihood):
		best_likelihood = log_likelihoods[-1]
		best_model = fitmodel

# compare the fit model to the true model

print('')
print('\ttruth\tlearned')
print('prior\t%.4f\t%.4f' % (truemodel['prior'], best_model["pi_0"][1][0]))
for r in range(num_learns):
    print('learn%d\t%.4f\t%.4f' % (r+1, truemodel['As'][r, 1, 0].squeeze(), best_model['As'][r, 1, 0].squeeze()))
for r in range(num_learns):
    print('forget%d\t%.4f\t%.4f' % (r+1, truemodel['As'][r, 0, 1].squeeze(), best_model['As'][r, 0, 1].squeeze()))

for s in range(num_gs):
    print('guess%d\t%.4f\t%.4f' % (s+1, truemodel['guesses'][s], best_model['guesses'][s]))
for s in range(num_gs):
    print('slip%d\t%.4f\t%.4f' % (s+1, truemodel['slips'][s], best_model['slips'][s]))

fitting! each dot is a new EM initialization

	truth	learned
prior	0.1000	0.1022
learn1	0.3000	0.2998
forget1	0.0000	0.0000
guess1	0.1000	0.0854
slip1	0.0300	0.0312
