## Codes from the article "An Introduction to Bayesian Knowledge Tracing with pyBKT" by Okan Bulut, Jinnie Shin, Seyma N. Yildirim-Erbasli, Guher Gorgun and Zachary A. Pardos.

In [1]:
# Installing necessary Python libraries
! pip install pyBKT
from pyBKT.models import Model
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

Collecting pyBKT
  Downloading pyBKT-1.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (24 kB)
Downloading pyBKT-1.4.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pyBKT
Successfully installed pyBKT-1.4.1


In [3]:
# The Cognitive Tutor dataset can be downloaded from:
# https://raw.githubusercontent.com/CAHLR/pyBKT−examples/master/data/ct.csv

# Reading the Cognitive Tutor dataset
!wget https://raw.githubusercontent.com/CAHLR/pyBKT-examples/master/data/ct.csv
df = pd.read_csv('ct.csv',encoding='latin')

--2025-01-25 10:08:43--  https://raw.githubusercontent.com/CAHLR/pyBKT-examples/master/data/ct.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4368817 (4.2M) [text/plain]
Saving to: ‘ct.csv’


2025-01-25 10:08:44 (44.6 MB/s) - ‘ct.csv’ saved [4368817/4368817]



In [4]:
# Initialize the model and set the seed for replicability purposes
model = Model(seed=42, num_fits=1)

# Train a simple BKT model on plot-related knowledge components (with file location)
# model.fit(data_path='ct.csv', skills=".*Plot.*")

# Or, train the model using the Pandas dataframe
model.fit(data=df, skills=".*Plot.*")

In [5]:
# Create a list of unique skills from the 'KC(Default)' column in the dataframe
skill_list = list(df['KC(Default)'].unique())

# Train the model for each skill in the skill list
for s in skill_list:
    model.fit(data_path='ct.csv', skills=s)

In [7]:
skill_list

['Plot non-terminating improper fraction',
 'Plot imperfect radical',
 'Plot terminating proper fraction',
 'Plot pi',
 'Plot whole number',
 'Plot decimal - thousandths',
 'Calculate unit rate',
 'Calculate part in proportion with fractions',
 'Calculate total in proportion with fractions',
 'Finding the intersection, Mixed',
 'Finding the intersection, GLF',
 'Finding the intersection, SIF']

In [8]:
# Update the column mappings (i.e., defaults)
defaults = {
    'order_id': 'person',
    'skill_name': 'skill_math',
    'correct': 'answer'
}

# Fit the BKT model using the updated defaults
model.fit(data=df, defaults=defaults)

In [9]:
# Print estimated model parameters
model.params()

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,value
skill,param,class,Unnamed: 3_level_1
Plot non-terminating improper fraction,prior,default,0.70629
Plot non-terminating improper fraction,learns,default,0.17198
Plot non-terminating improper fraction,guesses,default,0.0611
Plot non-terminating improper fraction,slips,default,0.29488
Plot non-terminating improper fraction,forgets,default,0.0
Plot imperfect radical,prior,default,0.28261
Plot imperfect radical,learns,default,0.10949
Plot imperfect radical,guesses,default,0.09872
Plot imperfect radical,slips,default,0.28728
Plot imperfect radical,forgets,default,0.0


In [10]:
# Model evaluation
model.evaluate(data_path='ct.csv', metric=['rmse', 'accuracy', 'auc'])

[0.45403495354513523, 0.6434122323070535, 0.7051642819384785]

In [11]:
# Three-fold cross-validation
model.crossvalidate(data_path='ct.csv', folds=3)

Unnamed: 0_level_0,rmse
skill,Unnamed: 1_level_1
Plot non-terminating improper fraction,0.48601
Plot imperfect radical,0.43715
Plot terminating proper fraction,0.49984
Plot pi,0.46071
Plot whole number,0.2927
Plot decimal - thousandths,0.47294
Calculate unit rate,0.48387
Calculate part in proportion with fractions,0.42708
Calculate total in proportion with fractions,0.42413
"Finding the intersection, Mixed",0.50438


In [12]:
# Training BKT variants
model.fit(data_path='ct.csv', multilearn=True, forgets=True, multigs=True)

In [20]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split


class IRT:
    def __init__(self, lr, iterations, user_size, question_size):
        self.lr = lr
        self.iterations = iterations
        self.user_size = user_size
        self.question_size = question_size
        self.theta = np.zeros(user_size)
        self.beta = np.zeros(question_size)
        self.val_acc_lst = []
        self.neg_lld_lst = []
        self.val_lld_lst = []

    def sigmoid(self, x):
        """Apply sigmoid function."""
        return np.exp(x) / (1 + np.exp(x))

    def neg_log_likelihood(self, data):
        """Compute the negative log-likelihood."""
        log_likelihood = 0.0
        for ind in np.arange(len(data["is_correct"])):
            i = data["user_id"][ind]
            j = data["question_id"][ind]
            c_ij = data["is_correct"][ind]

            theta_i = self.theta[i]
            beta_j = self.beta[j]
            diff = theta_i - beta_j
            log_likelihood += c_ij * diff - np.log(1 + np.exp(diff))
        return -log_likelihood

    def update_theta_beta(self, data):
        """Update theta and beta using gradient descent."""
        diff_theta_beta = np.expand_dims(self.theta, axis=1) - np.expand_dims(self.beta, axis=0)
        sig = self.sigmoid(diff_theta_beta)

        grad_theta = np.zeros_like(diff_theta_beta)
        grad_beta = np.zeros_like(diff_theta_beta)

        for ind in np.arange(len(data["is_correct"])):
            i = data["user_id"][ind]
            j = data["question_id"][ind]
            c_ij = data["is_correct"][ind]

            grad_theta[i, j] = c_ij - sig[i, j]
            grad_beta[i, j] = sig[i, j] - c_ij

        self.theta = self.theta + self.lr * np.sum(grad_theta, axis=1)
        self.beta = self.beta + self.lr * np.sum(grad_beta, axis=0)

    def evaluate(self, data):
        """Evaluate the model given data and return the accuracy."""
        pred = []
        for i, q in enumerate(data["question_id"]):
            u = data["user_id"][i]
            x = (self.theta[u] - self.beta[q]).sum()
            p_a = self.sigmoid(x)
            pred.append(p_a >= 0.5)
        return np.sum((data["is_correct"] == np.array(pred))) / len(data["is_correct"])

    def irt(self, train_data, val_data):
        for i in range(self.iterations):
            neg_lld = self.neg_log_likelihood(train_data)
            score = self.evaluate(val_data)
            self.val_acc_lst.append(score)
            self.neg_lld_lst.append(neg_lld)
            self.val_lld_lst.append(self.neg_log_likelihood(val_data))
            print("NLLK: {}\tScore: {}".format(neg_lld, score))
            self.update_theta_beta(train_data)

        return self.theta, self.beta, self.val_acc_lst, self.neg_lld_lst, self.val_lld_lst

In [21]:
# Knowledge prediction with BKT and IRT
def main(skill='Plot imperfect radical'):
    # Convert 'skill' to string to avoid potential issues with boolean values
    skill = str(skill)
    train_data = df.loc[df['KC(Default)'] == skill]
    train_data = train_data[[ 'Anon Student Id', 'Problem Name', 'Correct First Attempt' ]]
    train_data.columns = ['user_id', 'question_id', 'is_correct']

    # Factorize the 'user_id' column
    labels, levels = pd.factorize(train_data['user_id'])
    train_data['user_id'] = labels
    user_dic = dict(zip(levels, list(range(len(levels)))))

    # Factorize the 'question_id' column
    labels, levels = pd.factorize(train_data['question_id'])
    train_data['question_id'] = labels
    question_dic = dict(zip(levels, list(range(len(levels)))))

    # Use 30% of the dataset for testing and 30% of the test data for validation
    train_data, test_data = train_test_split(train_data, test_size=0.3)
    val_data, test_data = train_test_split(test_data, test_size=0.3)

    # Reset index and clean up the datasets
    train_data = train_data.reset_index().drop(columns=['index'])
    test_data = test_data.reset_index().drop(columns=['index'])
    val_data = val_data.reset_index().drop(columns=['index'])

    # Define learning rate and number of iterations
    lr = 1e-2
    num_iteration = 50

    # Initialize and train the IRT model
    irt_model = IRT(lr, num_iteration, len(user_dic), len(question_dic))
    theta, beta, val_acc_lst, neg_lld_lst, val_lld_lst = irt_model.irt(train_data, val_data)

    # Print validation and test accuracy
    print("Validation accuracy: {}".format(val_acc_lst[-1]))
    test_acc = irt_model.evaluate(test_data)
    print("Test accuracy: {}".format(test_acc))

    return test_acc, user_dic, question_dic, theta, beta

# Process skills from the dataframe
skill_list = list(df['KC(Default)'].unique())
skill_acc = []

for s in skill_list:
    # Train the model on the skill-specific data
    test_acc, user_dic, question_dic, theta, beta = main(s)
    skill_acc.append(test_acc)

NLLK: 352.118767724453	Score: 0.5197368421052632
NLLK: 349.1178513094916	Score: 0.5986842105263158
NLLK: 346.42096008770625	Score: 0.6118421052631579
NLLK: 343.9691224352335	Score: 0.6118421052631579
NLLK: 341.7155881272335	Score: 0.6118421052631579
NLLK: 339.6233322671811	Score: 0.6118421052631579
NLLK: 337.66302823908046	Score: 0.6118421052631579
NLLK: 335.8114248719198	Score: 0.6118421052631579
NLLK: 334.0500590211625	Score: 0.6118421052631579
NLLK: 332.3642399722739	Score: 0.6118421052631579
NLLK: 330.7422508375758	Score: 0.6118421052631579
NLLK: 329.1747215358721	Score: 0.6118421052631579
NLLK: 327.65413665889884	Score: 0.6118421052631579
NLLK: 326.17444902610225	Score: 0.6118421052631579
NLLK: 324.7307759195262	Score: 0.6118421052631579
NLLK: 323.31915997555035	Score: 0.6118421052631579
NLLK: 321.9363806625925	Score: 0.6118421052631579
NLLK: 320.5798053769802	Score: 0.6118421052631579
NLLK: 319.2472716111881	Score: 0.6118421052631579
NLLK: 317.93699353259484	Score: 0.611842105263

In [22]:
# Python code example linking BKT and IRT parameters.

# Initialize an empty list to store correlation results
correal_result = []

# Iterate over all unique skills in the dataset
for s in list(set(df['KC(Default)'])):
    # Initialize the BKT model
    model = Model(seed=42, num_fits=1)

    # Fit the model on the specified skill with multiple settings enabled
    model.fit(
        data_path='ct.csv',
        skills=s,
        multigs=True,
        forgets=True,
        multilearn=True
    )

    # Evaluate the model accuracy
    acc = model.evaluate(data_path='ct.csv', metric='accuracy')

    # Extract learn parameters
    learn_params = {}
    for i, j in dict(model.params()['value']).items():
        if i[-2] == 'learns':
            learn_params[i[-1]] = j

    # Extract forget parameters
    forget_params = {}
    for i, j in dict(model.params()['value']).items():
        if i[-2] == 'forgets':
            forget_params[i[-1]] = j

    # Extract guess parameters
    guess_params = {}
    for i, j in dict(model.params()['value']).items():
        if i[-2] == 'guesses':
            guess_params[i[-1]] = j

    # Extract slip parameters
    slip_params = {}
    for i, j in dict(model.params()['value']).items():
        if i[-2] == 'slips':
            slip_params[i[-1]] = j

    # Train IRT model
    test_acc, user_dic, question_dic, theta, beta = main(s)

    # Function to convert BKT parameters to IRT parameters
    def bkt_to_irt(q_id):
        # Extract BKT parameters
        p_guess = guess_params[q_id]

        # Convert BKT parameters to IRT parameters
        difficulty = np.log(p_guess)

        # Return IRT parameters
        irt_params = {
            'difficulty': difficulty
        }
        return irt_params

    # Convert BKT parameters for all questions
    beta_bkt = []
    for q_id in list(question_dic.keys()):
        irt_parameters = bkt_to_irt(q_id)
        beta_bkt.append(irt_parameters['difficulty'])

    # Print and calculate correlations
    print(s)
    correlation = pd.DataFrame(zip(beta_bkt, beta)).corr().abs()
    print(correlation)
    correal_result.append(correlation)

NLLK: 352.118767724453	Score: 0.5328947368421053
NLLK: 348.30029198714476	Score: 0.5328947368421053
NLLK: 344.9484517450642	Score: 0.5328947368421053
NLLK: 341.9618123388379	Score: 0.5328947368421053
NLLK: 339.26239824833914	Score: 0.5328947368421053
NLLK: 336.79037659405856	Score: 0.5328947368421053
NLLK: 334.49980334475157	Score: 0.5328947368421053
NLLK: 332.3553160814343	Score: 0.5328947368421053
NLLK: 330.32960386642	Score: 0.5328947368421053
NLLK: 328.4014878859304	Score: 0.5328947368421053
NLLK: 326.5544706313823	Score: 0.5328947368421053
NLLK: 324.7756397833389	Score: 0.5328947368421053
NLLK: 323.05483886772896	Score: 0.5328947368421053
NLLK: 321.38403807863705	Score: 0.5328947368421053
NLLK: 319.75685533549097	Score: 0.5328947368421053
NLLK: 318.16819032030367	Score: 0.5460526315789473
NLLK: 316.61394373423565	Score: 0.5460526315789473
NLLK: 315.0908010665059	Score: 0.5328947368421053
NLLK: 313.5960653927098	Score: 0.5328947368421053
NLLK: 312.1275275874872	Score: 0.53947368421

  difficulty = np.log(p_guess)


NLLK: 273.099989140619	Score: 0.3697478991596639
NLLK: 266.8176174089276	Score: 0.6134453781512605
NLLK: 261.3384270312867	Score: 0.6134453781512605
NLLK: 256.54212944990076	Score: 0.6134453781512605
NLLK: 252.32571141750685	Score: 0.6134453781512605
NLLK: 248.60165285943333	Score: 0.6134453781512605
NLLK: 245.29598825346508	Score: 0.6134453781512605
NLLK: 242.34640717269144	Score: 0.6134453781512605
NLLK: 239.70050187244644	Score: 0.6134453781512605
NLLK: 237.31421074006326	Score: 0.6218487394957983
NLLK: 235.15046989045092	Score: 0.6218487394957983
NLLK: 233.17806455144324	Score: 0.6218487394957983
NLLK: 231.37066162457444	Score: 0.6134453781512605
NLLK: 229.70600088743032	Score: 0.6134453781512605
NLLK: 228.1652219917335	Score: 0.6134453781512605
NLLK: 226.7323059698287	Score: 0.6134453781512605
NLLK: 225.39361234379317	Score: 0.6134453781512605
NLLK: 224.13749553875735	Score: 0.6134453781512605
NLLK: 222.95398681805761	Score: 0.6134453781512605
NLLK: 221.83453023291892	Score: 0.613

  difficulty = np.log(p_guess)


NLLK: 447.7730786417257	Score: 0.24870466321243523
NLLK: 433.35836461205076	Score: 0.7564766839378239
NLLK: 421.14553570522946	Score: 0.7564766839378239
NLLK: 410.7444761612119	Score: 0.7564766839378239
NLLK: 401.8314107878438	Score: 0.7564766839378239
NLLK: 394.1406827880342	Score: 0.7564766839378239
NLLK: 387.45580097104136	Score: 0.7564766839378239
NLLK: 381.6009440764619	Score: 0.7564766839378239
NLLK: 376.4334629710024	Score: 0.7564766839378239
NLLK: 371.8375391482985	Score: 0.7564766839378239
NLLK: 367.7189589309054	Score: 0.7564766839378239
NLLK: 364.00087530787465	Score: 0.7564766839378239
NLLK: 360.6204032923512	Score: 0.7564766839378239
NLLK: 357.525899092915	Score: 0.7564766839378239
NLLK: 354.6747907340508	Score: 0.7564766839378239
NLLK: 352.03184885000513	Score: 0.7564766839378239
NLLK: 349.5678068197392	Score: 0.7564766839378239
NLLK: 347.2582574035889	Score: 0.7564766839378239
NLLK: 345.0827680898465	Score: 0.7564766839378239
NLLK: 343.02416957629686	Score: 0.75647668393