Skip to content

Commit

Permalink
Merge branch 'main' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
weizhehuang0827 committed Dec 4, 2023
2 parents 21b4b24 + ff426dd commit 0802d5a
Show file tree
Hide file tree
Showing 18 changed files with 1,072 additions and 1 deletion.
2 changes: 2 additions & 0 deletions AUTHORS.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,4 +12,6 @@

[Weizhe Huang](https://github.com/weizhehuang0827)

[Bihan Xu](https://github.com/xbh0720)

The starred is the corresponding author
189 changes: 189 additions & 0 deletions EduKTM/LBKT/LBKT.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
# coding: utf-8
# 2023/11/21 @ xubihan

from sklearn import metrics
from sklearn.metrics import mean_squared_error
import logging
import torch
import torch.nn as nn
import numpy as np
from .model import Recurrent
from EduKTM import KTM
from tqdm import tqdm


device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


def compute_auc(all_target, all_pred):
return metrics.roc_auc_score(all_target, all_pred)


def compute_accuracy(all_target, all_pred):
all_pred[all_pred > 0.5] = 1.0
all_pred[all_pred <= 0.5] = 0.0
return metrics.accuracy_score(all_target, all_pred)


def binary_entropy(target, pred):
loss = target * np.log(np.maximum(1e-10, pred)) \
+ (1.0 - target) * np.log(np.maximum(1e-10, 1.0 - pred))
return np.average(loss) * -1.0


def train_one_epoch(recurrent, optimizer, criterion,
batch_size, Topics_all, Resps_all,
time_factor_all, attempts_factor_all, hints_factor_all):
recurrent.train()
all_pred = []
all_target = []
n = len(Topics_all) // batch_size
shuffled_ind = np.arange(len(Topics_all))
np.random.shuffle(shuffled_ind)
Topics_all = Topics_all[shuffled_ind]
Resps_all = Resps_all[shuffled_ind]
time_factor_all = time_factor_all[shuffled_ind]
attempts_factor_all = attempts_factor_all[shuffled_ind]
hints_factor_all = hints_factor_all[shuffled_ind]

for idx in tqdm(range(n)):
optimizer.zero_grad()

Topics = Topics_all[idx * batch_size: (idx + 1) * batch_size, :]
Resps = Resps_all[idx * batch_size: (idx + 1) * batch_size, :]
time_factor = time_factor_all[idx * batch_size:
(idx + 1) * batch_size, :]
attempts_factor = attempts_factor_all[idx * batch_size:
(idx + 1) * batch_size, :]
hints_factor = hints_factor_all[idx * batch_size:
(idx + 1) * batch_size, :]

input_topics = torch.from_numpy(Topics).long().to(device)
input_resps = torch.from_numpy(Resps).long().to(device)
input_time_factor = torch.from_numpy(time_factor).float().to(device)
input_attempts_factor = torch.from_numpy(
attempts_factor).float().to(device)
input_hints_factor = torch.from_numpy(hints_factor).float().to(device)

y_pred = recurrent(input_topics, input_resps, input_time_factor,
input_attempts_factor, input_hints_factor)

mask = input_topics[:, 1:] > 0
masked_pred = y_pred[:, 1:][mask]
masked_truth = input_resps[:, 1:][mask]
loss = criterion(masked_pred, masked_truth.float()).sum()
loss.backward()
optimizer.step()

masked_pred = masked_pred.detach().cpu().numpy()
masked_truth = masked_truth.detach().cpu().numpy()

all_pred.append(masked_pred)
all_target.append(masked_truth)

all_pred = np.concatenate(all_pred, axis=0)
all_target = np.concatenate(all_target, axis=0)

loss = binary_entropy(all_target, all_pred)
auc = compute_auc(all_target, all_pred)
acc = compute_accuracy(all_target, all_pred)

return loss, auc, acc


def test_one_epoch(recurrent, batch_size, Topics_all, Resps_all,
time_factor_all, attempts_factor_all, hints_factor_all):
recurrent.eval()
all_pred, all_target = [], []
n = len(Topics_all) // batch_size
for idx in range(n):
Topics = Topics_all[idx * batch_size:
(idx + 1) * batch_size, :]
Resps = Resps_all[idx * batch_size:
(idx + 1) * batch_size, :]
time_factor = time_factor_all[idx * batch_size:
(idx + 1) * batch_size, :]
attempts_factor = attempts_factor_all[idx * batch_size:
(idx + 1) * batch_size, :]
hints_factor = hints_factor_all[idx * batch_size:
(idx + 1) * batch_size, :]

input_topics = torch.from_numpy(Topics).long().to(device)
input_resps = torch.from_numpy(Resps).long().to(device)
input_time_factor = torch.from_numpy(time_factor).float().to(device)
input_attempts_factor = torch.from_numpy(attempts_factor)\
.float().to(device)
input_hints_factor = torch.from_numpy(hints_factor)\
.float().to(device)

with torch.no_grad():
y_pred = recurrent(input_topics, input_resps, input_time_factor,
input_attempts_factor, input_hints_factor)

mask = input_topics[:, 1:] > 0
masked_pred = y_pred[:, 1:][mask]
masked_truth = input_resps[:, 1:][mask]

masked_pred = masked_pred.detach().cpu().numpy()
masked_truth = masked_truth.detach().cpu().numpy()

all_pred.append(masked_pred)
all_target.append(masked_truth)

all_pred = np.concatenate(all_pred, axis=0)
all_target = np.concatenate(all_target, axis=0)

loss = binary_entropy(all_target, all_pred)
auc = compute_auc(all_target, all_pred)
rmse = mean_squared_error(all_target, all_pred, squared=False)
acc = compute_accuracy(all_target, all_pred)

return loss, auc, acc, rmse


class LBKT(KTM):
def __init__(self, num_topics, dim_tp, num_resps, num_units,
dropout, dim_hidden, memory_size, BATCH_SIZE, q_matrix):
super(LBKT, self).__init__()
q_matrix = torch.from_numpy(q_matrix).float().to(device)
self.recurrent = Recurrent(num_topics, dim_tp, num_resps, num_units,
dropout, dim_hidden, memory_size,
BATCH_SIZE, q_matrix).to(device)
self.batch_size = BATCH_SIZE

def train(self, train_data, test_data, epoch: int,
lr, lr_decay_step=1, lr_decay_rate=0.5) -> ...:
optimizer = torch.optim.Adam(self.recurrent.parameters(), lr=lr,
eps=1e-8, betas=(0.1, 0.999),
weight_decay=1e-6)
scheduler = torch.optim.lr_scheduler.StepLR(
optimizer, lr_decay_step, gamma=lr_decay_rate)
criterion = nn.BCELoss(reduction='none')

best_test_auc = 0
for idx in range(epoch):
train_loss, _, _ = train_one_epoch(self.recurrent,
optimizer, criterion,
self.batch_size, *train_data)
print("[Epoch %d] LogisticLoss: %.6f" % (idx, train_loss))
scheduler.step()
if test_data is not None:
_, valid_auc, valid_acc, valid_rmse = self.eval(test_data)
print("[Epoch %d] auc: %.6f, accuracy: %.6f, rmse: %.6f" % (
idx, valid_auc, valid_acc, valid_rmse))
if valid_auc > best_test_auc:
best_test_auc = valid_auc
return best_test_auc

def eval(self, test_data) -> ...:
self.recurrent.eval()
return test_one_epoch(self.recurrent, self.batch_size, *test_data)

def save(self, filepath) -> ...:

torch.save(self.recurrent.state_dict(), filepath)
logging.info("save parameters to %s" % filepath)

def load(self, filepath) -> ...:
self.recurrent.load_state_dict(torch.load(filepath))
logging.info("load parameters from %s" % filepath)
4 changes: 4 additions & 0 deletions EduKTM/LBKT/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# coding: utf-8
# 2023/11/21 @ xubihan

from .LBKT import LBKT
161 changes: 161 additions & 0 deletions EduKTM/LBKT/model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,161 @@
# coding: utf-8
# 2023/11/21 @ xubihan

import torch
import torch.nn as nn

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


class Layer1(nn.Module):
def __init__(self, num_units, d=10, k=0.3, b=0.3, name='lb'):
super(Layer1, self).__init__()
self.weight = nn.Parameter(torch.Tensor(2 * num_units, num_units))
self.bias = nn.Parameter(torch.zeros(1, num_units))

nn.init.xavier_normal_(self.weight)
nn.init.xavier_normal_(self.bias)

self.d = d
self.k = k
self.b = b

def forward(self, factor, interact_emb, h):
k = self.k
d = self.d
b = self.b

gate = k + (1 - k) / (1 + torch.exp(-d * (factor - b)))

w = torch.cat([h, interact_emb], -1).matmul(self.weight) + self.bias

w = nn.Sigmoid()(w * gate)
return w


class LBKTcell(nn.Module):
def __init__(self, num_units, memory_size, dim_tp,
dropout=0.2, name='lbktcell'):
super(LBKTcell, self).__init__()
self.num_units = num_units
self.memory_size = memory_size
self.dim_tp = dim_tp
self.r = 4
self.factor_dim = 50

self.time_gain = Layer1(self.num_units, name='time_gain')
self.attempt_gain = Layer1(self.num_units, name='attempt_gain')
self.hint_gain = Layer1(self.num_units, name='hint_gain')

self.time_weight = nn.Parameter(torch.Tensor(self.r, num_units + 1, num_units))
nn.init.xavier_normal_(self.time_weight)

self.attempt_weight = nn.Parameter(torch.Tensor(self.r, num_units + 1, num_units))
nn.init.xavier_normal_(self.attempt_weight)

self.hint_weight = nn.Parameter(torch.Tensor(self.r, num_units + 1, num_units))
nn.init.xavier_normal_(self.hint_weight)

self.Wf = nn.Parameter(torch.Tensor(1, self.r))
nn.init.xavier_normal_(self.Wf)

self.bias = nn.Parameter(torch.Tensor(1, num_units))
nn.init.xavier_normal_(self.bias)

self.gate3 = nn.Linear(2 * num_units + 3 * self.factor_dim, num_units)
torch.nn.init.xavier_normal_(self.gate3.weight)

self.dropout = nn.Dropout(dropout)
self.output_layer = nn.Linear(dim_tp + num_units, num_units)
torch.nn.init.xavier_normal_(self.output_layer.weight)
self.sig = nn.Sigmoid()

def forward(self, interact_emb, correlation_weight, topic_emb,
time_factor, attempt_factor, hint_factor, h_pre):
# bs *1 * memory_size , bs * memory_size * d_k
h_pre_tilde = torch.squeeze(torch.bmm(correlation_weight.unsqueeze(1), h_pre), 1)
# predict performance
preds = torch.sum(self.sig(self.output_layer(torch.cat([h_pre_tilde, topic_emb], -1))),
-1) / self.num_units # bs

# characterize each behavior's effect
time_gain = self.time_gain(time_factor, interact_emb, h_pre_tilde)
attempt_gain = self.attempt_gain(attempt_factor, interact_emb, h_pre_tilde)
hint_gain = self.hint_gain(hint_factor, interact_emb, h_pre_tilde)

# capture the dependency among different behaviors
pad = torch.ones_like(time_factor) # bs * 1
time_gain1 = torch.cat([time_gain, pad], -1) # bs * num_units + 1
attempt_gain1 = torch.cat([attempt_gain, pad], -1)
hint_gain1 = torch.cat([hint_gain, pad], -1)
# bs * r *num_units: bs * num_units + 1 ,r * num_units + 1 *num_units
fusion_time = torch.matmul(time_gain1, self.time_weight)
fusion_attempt = torch.matmul(attempt_gain1, self.attempt_weight)
fusion_hint = torch.matmul(hint_gain1, self.hint_weight)
fusion_all = fusion_time * fusion_attempt * fusion_hint
# 1 * r, bs * r * num_units -> bs * 1 * num_units -> bs * num_units
fusion_all = torch.matmul(self.Wf, fusion_all.permute(1, 0, 2)).squeeze(1) + self.bias
learning_gain = torch.relu(fusion_all)

LG = torch.matmul(correlation_weight.unsqueeze(-1), learning_gain.unsqueeze(1))

# forget effect
forget_gate = self.gate3(torch.cat([h_pre, interact_emb.unsqueeze(1).repeat(1, self.memory_size, 1),
time_factor.unsqueeze(1).repeat(1, self.memory_size, self.factor_dim),
attempt_factor.unsqueeze(1).repeat(1, self.memory_size, self.factor_dim),
hint_factor.unsqueeze(1).repeat(1, self.memory_size, self.factor_dim)], -1))
LG = self.dropout(LG)
h = h_pre * self.sig(forget_gate) + LG

return preds, h


class Recurrent(nn.Module):
def __init__(self, num_topics, dim_tp, num_resps, num_units, dropout,
dim_hidden, memory_size, batch_size, q_matrix):
super(Recurrent, self).__init__()

self.embedding_topic = nn.Embedding(num_topics + 10, dim_tp)
torch.nn.init.xavier_normal_(self.embedding_topic.weight)

self.embedding_resps = nn.Embedding(num_resps, dim_hidden)
torch.nn.init.xavier_normal_(self.embedding_resps.weight)

self.memory_size = memory_size
self.num_units = num_units
self.dim_tp = dim_tp
self.q_matrix = q_matrix

self.input_layer = nn.Linear(dim_tp + dim_hidden, num_units)
torch.nn.init.xavier_normal_(self.input_layer.weight)

self.lbkt_cell = LBKTcell(num_units, memory_size,
dim_tp, dropout=dropout, name='lbkt')

self.init_h = nn.Parameter(torch.Tensor(memory_size, num_units))
nn.init.xavier_normal_(self.init_h)

def forward(self, topics, resps, time_factor, attempt_factor, hint_factor):
batch_size, seq_len = topics.size(0), topics.size(1)
topic_emb = self.embedding_topic(topics)
resps_emb = self.embedding_resps(resps)

correlation_weight = self.q_matrix[topics]
acts_emb = torch.relu(self.input_layer(torch.cat([topic_emb, resps_emb], -1)))

time_factor = time_factor.unsqueeze(-1)
attempt_factor = attempt_factor.unsqueeze(-1)
hint_factor = hint_factor.unsqueeze(-1)

h_init = self.init_h.unsqueeze(0).repeat(batch_size, 1, 1)
h_pre = h_init
preds = torch.zeros(batch_size, seq_len).to(device)
for t in range(0, seq_len):
pred, h = self.lbkt_cell(acts_emb[:, t], correlation_weight[:, t],
topic_emb[:, t], time_factor[:, t],
attempt_factor[:, t], hint_factor[:, t], h_pre)
h_pre = h

preds[:, t] = pred

return preds
1 change: 1 addition & 0 deletions EduKTM/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,3 +11,4 @@
from .GKT import GKT
from .DKVMN import DKVMN
from .SKT import SKT
from .LBKT import LBKT
10 changes: 9 additions & 1 deletion docs/DKT.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
# Deep Knowledge Tracing(DKT)

If the reader wants to know the details of DKT, please refer to the Appendix of the paper: *[Deep Knowledge Tracing](http://stanford.edu/~cpiech/bio/papers/deepKnowledgeTracing.pdf)*.
Deep knowledge tracing (DKT) is the first approach to introduce deep learning into KT, which utilizes recurrent neural networks (RNNs) to model the students’ learning process. DKT applies RNNs to process the input sequence of learning interactions over time, maintaining a hidden state that implicitly represents students' knowledge state which evolves based on both the previous knowledge state and the present input learning interaction.

![DKT model](_static/DKT.png)

The above figure shows the data flow of DKT model. $x_i$ are the input embeddings of students’ learning interactions, $h_i$ are the hidden states that represent students’ knowledge states, and $y_i$ are the predicted answers. The high-dimensional and continuous representation of the knowledge state makes it better able to model the complex learning process. Generally, RNNs’ variant long short term memory (LSTM) networks are more commonly used in the implementation of DKT, which is made more powerful through considering forgetting. We also use LSTM to implement DKT model in our library.



If the reader wants to know the details of DKT, please refer to the paper: *[Deep Knowledge Tracing](http://stanford.edu/~cpiech/bio/papers/deepKnowledgeTracing.pdf)*.
```bibtex
@article{piech2015dkt,
title={Deep Knowledge Tracing},
Expand Down
Loading

0 comments on commit 0802d5a

Please sign in to comment.