In [1]:
import pandas as pd

from src.data_loader.data_loader import BooksDataset
from src.models.mm_model import MmModel
from src.train import Trainer
import torch

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
dataset = BooksDataset(data_dir="../data/netflix")

In [4]:
model = MmModel(n_users=dataset.n_users,n_items=dataset.n_items,adjacency_matrix=dataset.get_dataset("adjacency_matrix"),interactions= dataset.get_dataset("interactions") ,image_embeddings_data=dataset.get_dataset("images"),text_embeddings_data=dataset.get_dataset("text"),embed_size=128,n_layers=3,user_profiles_data=dataset.get_dataset("user_profiles"),book_attributes_data=dataset.get_dataset("books_attributes"))
model.to(device)

MmModel(
  (E0): Embedding(30553, 128)
  (image_feat): Linear(in_features=512, out_features=128, bias=True)
  (text_feat_dropout): Dropout(p=0.1, inplace=False)
  (text_feat): Linear(in_features=768, out_features=128, bias=True)
  (image_feat_dropout): Dropout(p=0.1, inplace=False)
  (user_profiles): Linear(in_features=1536, out_features=128, bias=True)
  (user_profiles_dropout): Dropout(p=0.1, inplace=False)
  (book_attributes): Linear(in_features=1536, out_features=128, bias=True)
  (book_attributes_dropout): Dropout(p=0.1, inplace=False)
)

In [5]:
trainer = Trainer(model=model,dataset=dataset,lr=0.1)

2024-04-11 23:06:  PID: 7388
2024-04-11 23:06:  
Dataset             Shape                
----------------------------------------
images              (17366, 512)
text                (17366, 768)
user_profiles       (13187, 1536)
books_attributes    (17366, 1536)
train_dict          13187
test_dict           5000
val_dict            5000
adjacency_matrix    30553
interactions        13187

Number of interactions: 68933
Sparsity: 99.97%


In [None]:
trainer.train(epochs=10,batch_size=1024)

100%|██████████| 13/13 [00:02<00:00,  4.91it/s]
100%|██████████| 3/3 [00:24<00:00,  8.32s/it]


2024-04-11 23:06:  Epoch 0 Loss 5604.622565049392 Time 27.73566961288452
2024-04-11 23:06:  
precision: [0.00046  0.00043  0.000432]
recall: [0.0046 0.0086 0.0216]
ndcg: [0.00214707 0.00313305 0.00569635]
hit_ratio: [0.0046 0.0086 0.0216]
auc: 0.0


100%|██████████| 13/13 [00:02<00:00,  5.81it/s]
100%|██████████| 3/3 [00:24<00:00,  8.13s/it]


2024-04-11 23:06:  Epoch 1 Loss 749.1023301344651 Time 26.76872420310974
2024-04-11 23:06:  
precision: [0.00056  0.00055  0.000416]
recall: [0.0056 0.011  0.0208]
ndcg: [0.00205755 0.00342935 0.00534125]
hit_ratio: [0.0056 0.011  0.0208]
auc: 0.0


100%|██████████| 13/13 [00:02<00:00,  6.12it/s]
100%|██████████| 3/3 [00:24<00:00,  8.19s/it]


2024-04-11 23:07:  Epoch 2 Loss 172.7341079711914 Time 26.810803651809692
2024-04-11 23:07:  
precision: [0.00058  0.00056  0.000484]
recall: [0.0058 0.0112 0.0242]
ndcg: [0.0029919  0.00433758 0.00692855]
hit_ratio: [0.0058 0.0112 0.0242]
auc: 0.0


100%|██████████| 13/13 [00:02<00:00,  5.68it/s]
  0%|          | 0/3 [00:00<?, ?it/s]

In [4]:
import numpy as np
import scipy.sparse as sp
def matrix_to_tensor(numpy_matrix):
    sparse_tensor = torch.sparse_coo_tensor(torch.from_numpy(np.argwhere(numpy_matrix != 0).T),
                                            torch.from_numpy(numpy_matrix[np.nonzero(numpy_matrix)]),
                                            numpy_matrix.shape,dtype=torch.float32)
    return sparse_tensor


def csr_norm(csr_mat, mean_flag=False):  # TODO: check if this function exists in a python library
    rowsum = np.array(csr_mat.sum(1))
    rowsum = np.power(rowsum + 1e-8, -0.5).flatten()
    rowsum[np.isinf(rowsum)] = 0.
    rowsum_diag = sp.diags(rowsum)
    colsum = np.array(csr_mat.sum(0))
    colsum = np.power(colsum + 1e-8, -0.5).flatten()
    colsum[np.isinf(colsum)] = 0.
    colsum_diag = sp.diags(colsum)
    if mean_flag == False:
        return rowsum_diag * csr_mat * colsum_diag
    else:
        return rowsum_diag * csr_mat

In [5]:
data = matrix_to_tensor(csr_norm(data))

In [6]:
# save the tensor
torch.save(data, "../data/books/train_matrix.pt")

In [7]:
import json
with open("../data/books/test.json", "r") as f:
    test = json.load(f)

In [11]:
# create 2000 user dict
user_dict = {}
for i in range(2000):
    user_dict[i] = test[str(i)]
# save the user dict
with open("../data/books/test.json", "w") as f:
    json.dump(user_dict, f)



In [49]:
import json
with open("../data/books/augmented_interactions_dict.json", "r") as f:
    data = json.load(f)

In [51]:
# convert all lists items to int and save the dict
mauvais = {}
for key in data.keys():
    if type(data[key][0]) != int or type(data[key][1]) != int:
        mauvais[key] = data[key]

    