In [1]:
def pnn1(inputs, embed_size, hidden_dim, keep_prob):
#     inputs = [inp.cuda() for inp in inputs]  # 把输入移动到GPU

    num_inputs = len(inputs)
#     for o in inputs:
#         print(o.shape)
    num_pairs = int(num_inputs * (num_inputs - 1) / 2)

    xw = torch.cat(inputs, 1)
    xw3d = xw.view(-1, num_inputs, embed_size)

    row = []
    col = []
    for i in range(num_inputs - 1):
        for j in range(i + 1, num_inputs):
            row.append(i)
            col.append(j)

    p = xw3d[:, row, :].transpose(1, 0)
    q = xw3d[:, col, :].transpose(1, 0)

    p = p.contiguous().view(-1, num_pairs, embed_size)
    q = q.contiguous().view(-1, num_pairs, embed_size)

    ip = (p * q).sum(-1).view(-1, num_pairs)
    l = torch.cat([xw, ip], 1)

    h = nn.Linear(l.size(1), hidden_dim)(l)
    h = nn.ReLU()(h)
    h = nn.Dropout(p=1 - keep_prob)(h)

    p = nn.Linear(hidden_dim, 1)(h).view(-1)

    return h, p

In [2]:
import os
import torch
import numpy as np
import math
from scipy import sparse
from torch.nn import functional as F


import torch
import torch.nn as nn




# 加载数据
data_folder = 'assist09'
con_sym = ';'

pro_skill_coo = sparse.load_npz(os.path.join(data_folder, 'pro_skill_sparse.npz'))
skill_skill_coo = sparse.load_npz(os.path.join(data_folder, 'skill_skill_sparse.npz'))
pro_pro_coo = sparse.load_npz(os.path.join(data_folder, 'pro_pro_sparse.npz'))

pro_num, skill_num = pro_skill_coo.shape
print(f'问题数目{pro_num}, 技能数目{skill_num}')

pro_skill = pro_skill_coo.toarray()
pro_pro = pro_pro_coo.toarray()
skill_skill = skill_skill_coo.toarray()
pro_skill_tensor = torch.from_numpy(pro_skill)
skill_skill_tensor = torch.from_numpy(skill_skill)
pro_pro_tensor = torch.from_numpy(pro_pro)

pro_feat = np.load(os.path.join(data_folder, 'pro_feat.npz'))['pro_feat']
pro_feat_tensor = torch.from_numpy(pro_feat)
print('问题特征形状:', pro_feat.shape)

diff_feat_dim = pro_feat.shape[1] - 1
embed_dim = 64
hidden_dim = 128
dropout = 0.5
lr = 0.001
batch_size = 256
epochs = 200

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


# 定义模型
class PEGB(torch.nn.Module):
    def __init__(self, pro_num, skill_num, diff_feat_dim, embed_dim):
        super(PEGB, self).__init__()

        self.pro_embeddings = torch.nn.Embedding(pro_num, embed_dim)
        self.skill_embeddings = torch.nn.Embedding(skill_num, embed_dim)
        self.diff_embeddings = torch.nn.Linear(diff_feat_dim, embed_dim)



    def forward(self, pro, diff_feat, pro_skill, pro_pro, skill_skill):
        pro_embed = self.pro_embeddings(pro)
        skill_embed = self.skill_embeddings(skill_skill)
        diff_feat_embed = self.diff_embeddings(diff_feat)

        # pro-skill
        pro_skill_logits = (pro_embed @ skill_embed.t()).view(-1)
        pro_skill_loss = F.binary_cross_entropy_with_logits(pro_skill_logits, pro_skill.view(-1))

        # pro-pro
        pro_pro_logits = (pro_embed @ pro_embed.t()).view(-1)
        # print(pro_pro_logits.shape,pro_pro.shape)
        pro_pro_loss = F.binary_cross_entropy_with_logits(pro_pro_logits, pro_pro.contiguous().view(-1))

        # skill-skill
        skill_skill_logits = (skill_embed @ skill_embed.t()).view(-1)
        skill_skill_loss = F.binary_cross_entropy_with_logits(skill_skill_logits, skill_skill_tensor.view(-1))

        # 特征融合
        skill_embed = pro_skill @ skill_embed / pro_skill.sum(1, keepdim=True)
        h,p= pnn1([pro_embed, skill_embed, diff_feat_embed],embed_dim, hidden_dim, 0.5)
#         h,p = self.pnn([pro_embed, skill_embed, diff_feat_embed])
        # pro_final_embed = None
#         print(p.shape,pro_feat_tensor.shape)
        mse = ((p-diff_feat[:,-1])**2).mean()

        return pro_skill_loss, pro_pro_loss, skill_skill_loss,mse,h


model = PEGB(pro_num, skill_num, diff_feat_dim, embed_dim).to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=lr)




问题数目15911, 技能数目123
问题特征形状: (15911, 7)


In [None]:
for epoch in range(1000):
    model.train()
    train_loss = 0

    for i in range(0, pro_num, batch_size):
        batch_pro = torch.arange(i, min(i + batch_size, pro_num)).long().to(device)
        batch_pro_skill = torch.Tensor(pro_skill[i:i + batch_size]).to(device)
        batch_pro_pro = torch.Tensor(pro_pro[i:i + batch_size,i:i + batch_size]).to(device)
        batch_diff_feat = torch.Tensor(pro_feat[i:i + batch_size, :-1]).to(device)
        batch_skill_skill = torch.arange(skill_num).to(device)

        pro_skill_loss, pro_pro_loss, skill_skill_loss, mse = model(batch_pro,
                                                                  batch_diff_feat,
                                                                  batch_pro_skill,
                                                                  batch_pro_pro,
                                                                  batch_skill_skill)
        loss = pro_skill_loss + pro_pro_loss + skill_skill_loss + mse

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        train_loss += loss.item()

    train_loss /= math.ceil(pro_num / batch_size)
    print(f'Epoch {epoch}, Loss {train_loss:.4f}')

Epoch 0, Loss 9.4473
Epoch 1, Loss 8.9229
Epoch 2, Loss 8.4589
Epoch 3, Loss 8.0568
Epoch 4, Loss 7.6895
Epoch 5, Loss 7.3615
Epoch 6, Loss 7.0605
Epoch 7, Loss 6.7796
Epoch 8, Loss 6.5295
Epoch 9, Loss 6.2889
Epoch 10, Loss 6.0691
Epoch 11, Loss 5.8686
Epoch 12, Loss 5.6734
Epoch 13, Loss 5.4985
Epoch 14, Loss 5.3273
Epoch 15, Loss 5.1666
Epoch 16, Loss 5.0175
Epoch 17, Loss 4.8739
Epoch 18, Loss 4.7335
Epoch 19, Loss 4.6026
Epoch 20, Loss 4.4809
Epoch 21, Loss 4.3615
Epoch 22, Loss 4.2441
Epoch 23, Loss 4.1350
Epoch 24, Loss 4.0317
Epoch 25, Loss 3.9227
Epoch 26, Loss 3.8230
Epoch 27, Loss 3.7277
Epoch 28, Loss 3.6320
Epoch 29, Loss 3.5371
Epoch 30, Loss 3.4509
Epoch 31, Loss 3.3633
Epoch 32, Loss 3.2803
Epoch 33, Loss 3.1970
Epoch 34, Loss 3.1182
Epoch 35, Loss 3.0408
Epoch 36, Loss 2.9687
Epoch 37, Loss 2.8959
Epoch 38, Loss 2.8274
Epoch 39, Loss 2.7568
Epoch 40, Loss 2.6922
Epoch 41, Loss 2.6310
Epoch 42, Loss 2.5715
Epoch 43, Loss 2.5156
Epoch 44, Loss 2.4600
Epoch 45, Loss 2.405

Epoch 361, Loss 0.9032
Epoch 362, Loss 0.9085
Epoch 363, Loss 0.9052
Epoch 364, Loss 0.9033
Epoch 365, Loss 0.9061
Epoch 366, Loss 0.9088
Epoch 367, Loss 0.9002
Epoch 368, Loss 0.8984
Epoch 369, Loss 0.8988
Epoch 370, Loss 0.8974
Epoch 371, Loss 0.8980
Epoch 372, Loss 0.9047
Epoch 373, Loss 0.8986
Epoch 374, Loss 0.8978
Epoch 375, Loss 0.8969
Epoch 376, Loss 0.8922
Epoch 377, Loss 0.8934
Epoch 378, Loss 0.8895
Epoch 379, Loss 0.8931
Epoch 380, Loss 0.8978
Epoch 381, Loss 0.8897
Epoch 382, Loss 0.8892
Epoch 383, Loss 0.8834
Epoch 384, Loss 0.8889
Epoch 385, Loss 0.8846
Epoch 386, Loss 0.8811
Epoch 387, Loss 0.8918
Epoch 388, Loss 0.8916
Epoch 389, Loss 0.8913
Epoch 390, Loss 0.8856
Epoch 391, Loss 0.8902
Epoch 392, Loss 0.8810
Epoch 393, Loss 0.8830
Epoch 394, Loss 0.8776
Epoch 395, Loss 0.8847
Epoch 396, Loss 0.8783
Epoch 397, Loss 0.8835
Epoch 398, Loss 0.8998
Epoch 399, Loss 0.8750
Epoch 400, Loss 0.8792
Epoch 401, Loss 0.8799
Epoch 402, Loss 0.8778
Epoch 403, Loss 0.8835
Epoch 404, 

In [None]:
# 保存训练好的embedding
model.eval()
with torch.no_grad():
    pro_embeddings = model.pro_embeddings.weight.cpu().numpy()
    skill_embeddings = model.skill_embeddings.weight.cpu().numpy()

    batch_pro = torch.arange(pro_num).long()
    batch_pro_skill = torch.Tensor(pro_skill)
    batch_diff_feat = torch.Tensor(pro_feat[:, :-1])
    batch_skill_skill = torch.arange(skill_num).to(device)

# 处理技能embedding
with open(os.path.join(data_folder, 'skill_id_dict.txt'), 'r') as f:
    skill_id_dict = eval(f.read())

joint_skill_num = len(skill_id_dict)
skill_embeddings_new = np.zeros((joint_skill_num, skill_embeddings.shape[1]))
skill_embeddings_new[:skill_num] = skill_embeddings

for s in skill_id_dict:
    if con_sym in s:
        tmp_skill_id = skill_id_dict[s]
        tmp_skills = [skill_id_dict[t] for t in s.split(con_sym)]
        skill_embeddings_new[tmp_skill_id] = np.mean(skill_embeddings[tmp_skills], axis=0)

In [None]:
pro_embeddings.shape

In [None]:
skill_embeddings.shape

In [None]:
pro_skill_tensor

In [None]:
torch.sigmoid(model.pro_embeddings.weight @ model.skill_embeddings.weight.t())

In [None]:
pro_skill_tensor

In [None]:
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt

# 准备数据
data_tensor = model.pro_embeddings.weight

# 初始化t-SNE模型
tsne = TSNE(n_components=2, perplexity=30, learning_rate=100, n_iter=1000)

# 执行t-SNE降维
embedded_data = tsne.fit_transform(data_tensor.detach().numpy())

# 可视化结果
# plt.scatter(embedded_data[:, 0], embedded_data[:, 1])
for label in range(5):
    pids = pro_skill_tensor[:,label].nonzero().view(-1).detach().numpy()
#     print(pids)
    red =  np.random.randint(0, 256)
    green = np.random.randint(0, 256)
    blue = np.random.randint(0, 256)
    color = (red / 255, green / 255, blue / 255)
    class_data = embedded_data[pids]
    plt.scatter(class_data[:, 0], class_data[:, 1],c=color, label=str(label))
plt.show()