In [2]:
import pandas as pd

from src.data_loader.data_loader import BooksDataset
from src.models.mm_model import MmModel
from src.train import Trainer
import torch

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
dataset = BooksDataset(data_dir="../data/books")

In [5]:
model = MmModel(n_users=dataset.n_users,n_items=dataset.n_items,adjacency_matrix=dataset.get_dataset("adjacency_matrix"),interactions= dataset.get_dataset("interactions") ,image_embeddings_data=dataset.get_dataset("images"),text_embeddings_data=dataset.get_dataset("text"),embed_size=128,n_layers=3,user_profiles_data=dataset.get_dataset("user_profiles"),book_attributes_data=dataset.get_dataset("books_attributes"))
model.to(device)

MmModel(
  (E0): Embedding(48752, 128)
  (text_feat): Linear(in_features=1024, out_features=128, bias=True)
  (text_feat_dropout): Dropout(p=0.2, inplace=False)
  (image_feat): Linear(in_features=1024, out_features=128, bias=True)
  (image_feat_dropout): Dropout(p=0.2, inplace=False)
  (user_profiles): Linear(in_features=768, out_features=128, bias=True)
  (user_profiles_dropout): Dropout(p=0.2, inplace=False)
  (book_attributes): Linear(in_features=768, out_features=128, bias=True)
  (book_attributes_dropout): Dropout(p=0.2, inplace=False)
)

In [6]:
trainer = Trainer(model=model,dataset=dataset,lr=0.01)

In [7]:
trainer.train(epochs=10,batch_size=1024)

100%|██████████| 15/15 [00:06<00:00,  2.36it/s]
100%|██████████| 1/1 [00:21<00:00, 21.28s/it]


{'precision': array([0.0031 , 0.0024 , 0.00171]), 'recall': array([0.00307618, 0.00589307, 0.01184239]), 'ndcg': array([0.01337923, 0.01705062, 0.02296211]), 'hit_ratio': array([0.027 , 0.04  , 0.0645]), 'auc': 0.0}
Epoch 0 Loss 448.20237884521487 Time 27.889414072036743


100%|██████████| 15/15 [00:05<00:00,  2.78it/s]
  0%|          | 0/1 [00:21<?, ?it/s]


KeyboardInterrupt: 

In [2]:
import torch
import  pandas as pd

data = pd.read_pickle("../data/books/train_matrix.pkl")

In [4]:
import numpy as np
import scipy.sparse as sp
def matrix_to_tensor(numpy_matrix):
    sparse_tensor = torch.sparse_coo_tensor(torch.from_numpy(np.argwhere(numpy_matrix != 0).T),
                                            torch.from_numpy(numpy_matrix[np.nonzero(numpy_matrix)]),
                                            numpy_matrix.shape,dtype=torch.float32)
    return sparse_tensor


def csr_norm(csr_mat, mean_flag=False):  # TODO: check if this function exists in a python library
    rowsum = np.array(csr_mat.sum(1))
    rowsum = np.power(rowsum + 1e-8, -0.5).flatten()
    rowsum[np.isinf(rowsum)] = 0.
    rowsum_diag = sp.diags(rowsum)
    colsum = np.array(csr_mat.sum(0))
    colsum = np.power(colsum + 1e-8, -0.5).flatten()
    colsum[np.isinf(colsum)] = 0.
    colsum_diag = sp.diags(colsum)
    if mean_flag == False:
        return rowsum_diag * csr_mat * colsum_diag
    else:
        return rowsum_diag * csr_mat

In [5]:
data = matrix_to_tensor(csr_norm(data))

In [6]:
# save the tensor
torch.save(data, "../data/books/train_matrix.pt")

In [7]:
import json
with open("../data/books/test.json", "r") as f:
    test = json.load(f)

In [11]:
# create 2000 user dict
user_dict = {}
for i in range(2000):
    user_dict[i] = test[str(i)]
# save the user dict
with open("../data/books/test.json", "w") as f:
    json.dump(user_dict, f)

