In [1]:
import pandas as pd

from src.data_loader.data_loader import BooksDataset
from src.models.mm_model import MmModel
from src.train import Trainer
import torch

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
dataset = BooksDataset(data_dir="../data/books")

In [4]:
print(dataset.sample(3))
print(dataset.sample( 3))
print(dataset.sample(1))

([8783, 4640, 5551], [27163, 2587, 15276], [24726, 27634, 19277])
([12042, 6444, 5439], [29098, 7090, 7608], [13506, 18450, 23183])
([9506], [28669], [6998])


In [5]:
model = MmModel(n_users=dataset.n_users,n_items=dataset.n_items,adjacency_matrix=dataset.get_dataset("adjacency_matrix"),interactions= dataset.get_dataset("interactions") ,image_embeddings_data=dataset.get_dataset("images"),text_embeddings_data=dataset.get_dataset("text"),embed_size=64,n_layers=3,user_profiles_data=dataset.get_dataset("user_profiles"),book_attributes_data=dataset.get_dataset("books_attributes"))
model.to(device)

MmModel(
  (E0): Embedding(48752, 64)
  (text_feat): Linear(in_features=1024, out_features=64, bias=True)
  (text_feat_dropout): Dropout(p=0.2, inplace=False)
  (image_feat): Linear(in_features=1024, out_features=64, bias=True)
  (image_feat_dropout): Dropout(p=0.2, inplace=False)
  (user_profiles): Linear(in_features=768, out_features=64, bias=True)
  (user_profiles_dropout): Dropout(p=0.2, inplace=False)
  (book_attributes): Linear(in_features=768, out_features=64, bias=True)
  (book_attributes_dropout): Dropout(p=0.2, inplace=False)
)

In [6]:
trainer = Trainer(model=model,dataset=dataset,lr=0.001)

In [7]:
trainer.train(epochs=10,batch_size=1024)

100%|██████████| 15/15 [00:03<00:00,  4.09it/s]
100%|██████████| 1/1 [00:17<00:00, 17.68s/it]


{'precision': array([0.0095 , 0.00765, 0.00578]), 'recall': array([0.0190216 , 0.03165034, 0.05343636]), 'ndcg': array([0.03943236, 0.04938495, 0.06684249]), 'hit_ratio': array([0.084 , 0.1185, 0.1765]), 'auc': 0.0}
Epoch 0 Loss 0.7108594099680583 Time 21.528899669647217


100%|██████████| 15/15 [00:03<00:00,  4.37it/s]
100%|██████████| 1/1 [00:17<00:00, 17.95s/it]


{'precision': array([0.00965 , 0.007625, 0.00595 ]), 'recall': array([0.01968818, 0.03027852, 0.05332186]), 'ndcg': array([0.03996557, 0.04959582, 0.06877334]), 'hit_ratio': array([0.0835, 0.117 , 0.1845]), 'auc': 0.0}
Epoch 1 Loss 0.709807292620341 Time 21.576141357421875


100%|██████████| 15/15 [00:03<00:00,  4.18it/s]
100%|██████████| 1/1 [00:18<00:00, 18.08s/it]


{'precision': array([0.0095 , 0.00785, 0.00611]), 'recall': array([0.02096221, 0.03103344, 0.05501047]), 'ndcg': array([0.04036104, 0.05068025, 0.0706136 ]), 'hit_ratio': array([0.083, 0.121, 0.192]), 'auc': 0.0}
Epoch 2 Loss 0.7095495303471883 Time 21.83698058128357


100%|██████████| 15/15 [00:03<00:00,  4.32it/s]
100%|██████████| 1/1 [00:18<00:00, 18.18s/it]


{'precision': array([0.00955, 0.00815, 0.00614]), 'recall': array([0.0202523 , 0.03196716, 0.05391051]), 'ndcg': array([0.03971127, 0.05144756, 0.07012868]), 'hit_ratio': array([0.0835, 0.125 , 0.1895]), 'auc': 0.0}
Epoch 3 Loss 0.7093831578890483 Time 21.8381450176239


100%|██████████| 15/15 [00:03<00:00,  4.24it/s]
100%|██████████| 1/1 [00:18<00:00, 18.44s/it]


{'precision': array([0.00965 , 0.008425, 0.0063  ]), 'recall': array([0.02024744, 0.03132493, 0.05731786]), 'ndcg': array([0.03944977, 0.05178684, 0.07039691]), 'hit_ratio': array([0.083 , 0.13  , 0.1905]), 'auc': 0.0}
Epoch 4 Loss 0.7091845273971558 Time 22.151050806045532


100%|██████████| 15/15 [00:03<00:00,  4.13it/s]
100%|██████████| 1/1 [00:18<00:00, 18.57s/it]


{'precision': array([0.00995 , 0.008225, 0.00646 ]), 'recall': array([0.01855732, 0.0299559 , 0.05953419]), 'ndcg': array([0.03962305, 0.05080462, 0.07101474]), 'hit_ratio': array([0.0845, 0.126 , 0.1945]), 'auc': 0.0}
Epoch 5 Loss 0.7090351064999898 Time 22.44580864906311


100%|██████████| 15/15 [00:03<00:00,  4.35it/s]
100%|██████████| 1/1 [00:18<00:00, 18.39s/it]


{'precision': array([0.0101 , 0.00835, 0.00655]), 'recall': array([0.01851125, 0.03096051, 0.06027699]), 'ndcg': array([0.03917274, 0.05061981, 0.07119371]), 'hit_ratio': array([0.085 , 0.128 , 0.1975]), 'auc': 0.0}
Epoch 6 Loss 0.708818793296814 Time 22.010789394378662


100%|██████████| 15/15 [00:03<00:00,  4.10it/s]
100%|██████████| 1/1 [00:18<00:00, 18.47s/it]


{'precision': array([0.011  , 0.00865, 0.00663]), 'recall': array([0.02122097, 0.03223966, 0.06389418]), 'ndcg': array([0.0422908 , 0.05302782, 0.07407213]), 'hit_ratio': array([0.0915, 0.13  , 0.203 ]), 'auc': 0.0}
Epoch 7 Loss 0.7086076696713766 Time 22.313654899597168


100%|██████████| 15/15 [00:03<00:00,  4.25it/s]
100%|██████████| 1/1 [00:18<00:00, 18.48s/it]


{'precision': array([0.0108  , 0.008625, 0.00665 ]), 'recall': array([0.0208633 , 0.03280081, 0.06388346]), 'ndcg': array([0.04177265, 0.05256405, 0.07434835]), 'hit_ratio': array([0.0895, 0.1275, 0.205 ]), 'auc': 0.0}
Epoch 8 Loss 0.7083766102790833 Time 22.17875361442566


100%|██████████| 15/15 [00:03<00:00,  4.22it/s]
100%|██████████| 1/1 [00:18<00:00, 18.66s/it]

{'precision': array([0.011   , 0.008875, 0.00671 ]), 'recall': array([0.02137832, 0.0333308 , 0.06446632]), 'ndcg': array([0.04382614, 0.05497291, 0.0761081 ]), 'hit_ratio': array([0.091 , 0.1315, 0.2065]), 'auc': 0.0}
Epoch 9 Loss 0.7081830978393555 Time 22.452505111694336





In [2]:
import torch
import  pandas as pd

data = pd.read_pickle("../data/books/train_matrix.pkl")

In [4]:
import numpy as np
import scipy.sparse as sp
def matrix_to_tensor(numpy_matrix):
    sparse_tensor = torch.sparse_coo_tensor(torch.from_numpy(np.argwhere(numpy_matrix != 0).T),
                                            torch.from_numpy(numpy_matrix[np.nonzero(numpy_matrix)]),
                                            numpy_matrix.shape,dtype=torch.float32)
    return sparse_tensor


def csr_norm(csr_mat, mean_flag=False):  # TODO: check if this function exists in a python library
    rowsum = np.array(csr_mat.sum(1))
    rowsum = np.power(rowsum + 1e-8, -0.5).flatten()
    rowsum[np.isinf(rowsum)] = 0.
    rowsum_diag = sp.diags(rowsum)
    colsum = np.array(csr_mat.sum(0))
    colsum = np.power(colsum + 1e-8, -0.5).flatten()
    colsum[np.isinf(colsum)] = 0.
    colsum_diag = sp.diags(colsum)
    if mean_flag == False:
        return rowsum_diag * csr_mat * colsum_diag
    else:
        return rowsum_diag * csr_mat

In [5]:
data = matrix_to_tensor(csr_norm(data))

In [6]:
# save the tensor
torch.save(data, "../data/books/train_matrix.pt")

In [7]:
import json
with open("../data/books/test.json", "r") as f:
    test = json.load(f)

In [11]:
# create 2000 user dict
user_dict = {}
for i in range(2000):
    user_dict[i] = test[str(i)]
# save the user dict
with open("../data/books/test.json", "w") as f:
    json.dump(user_dict, f)

