In [1]:
import pandas as pd

from src.data_loader.data_loader import BooksDataset
from src.models.mm_model import MmModel
from src.train import Trainer
import torch
import random
import numpy as np

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def set_seed(seed):
    np.random.seed(seed)
    random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
set_seed(2022)

In [3]:
dataset = BooksDataset(data_dir="../data/netflix")

In [4]:
model = MmModel(n_users=dataset.n_users,n_items=dataset.n_items,adjacency_matrix=dataset.get_dataset("adjacency_matrix"),interactions= dataset.get_dataset("interactions") ,interactions_t= dataset.get_dataset("interactions_T") ,image_embeddings_data=dataset.get_dataset("images"),text_embeddings_data=dataset.get_dataset("text"),embed_size=64,n_layers=3,user_profiles_data=dataset.get_dataset("user_profiles"),book_attributes_data=dataset.get_dataset("books_attributes"))
model.to(device)

MmModel(
  (E0): Embedding(30553, 64)
  (image_feat): Linear(in_features=512, out_features=64, bias=True)
  (text_feat_dropout): Dropout(p=0, inplace=False)
  (text_feat): Linear(in_features=768, out_features=64, bias=True)
  (image_feat_dropout): Dropout(p=0, inplace=False)
  (user_profiles): Linear(in_features=1536, out_features=64, bias=True)
  (user_profiles_dropout): Dropout(p=0, inplace=False)
  (book_attributes): Linear(in_features=1536, out_features=64, bias=True)
  (book_attributes_dropout): Dropout(p=0, inplace=False)
  (softmax): Softmax(dim=-1)
)

In [5]:
trainer = Trainer(model=model,dataset=dataset,lr=0.0001)

2024-04-17 01:40:  PID: 15868
2024-04-17 01:40:  
Dataset             Shape                
----------------------------------------
images              (17366, 512)
text                (17366, 768)
user_profiles       (13187, 1536)
books_attributes    (17366, 1536)
train_dict          13187
test_dict           1845
val_dict            5000
adjacency_matrix    30553
interactions        13187
interactions_T      17366

Number of interactions: 68933
Sparsity: 99.97%


In [6]:
trainer.train(epochs=10,batch_size=1024)

100%|██████████| 68/68 [00:09<00:00,  6.80it/s]


Epoch 1/10, Loss=50.0883, Time=18.86
{'precision': array([0.00401084, 0.00303523, 0.00215718]), 'recall': array([0.0401084 , 0.06070461, 0.10785908]), 'ndcg': array([0.02106033, 0.02633168, 0.03556328]), 'hit_ratio': array([0.0401084 , 0.06070461, 0.10785908]), 'auc': 0.0}


100%|██████████| 68/68 [00:08<00:00,  7.65it/s]


Epoch 2/10, Loss=23.8793, Time=16.78
{'precision': array([0.00444444, 0.00303523, 0.00210298]), 'recall': array([0.04444444, 0.06070461, 0.10514905]), 'ndcg': array([0.02223423, 0.02636534, 0.0349263 ]), 'hit_ratio': array([0.04444444, 0.06070461, 0.10514905]), 'auc': 0.0}


100%|██████████| 68/68 [00:08<00:00,  7.72it/s]


Epoch 3/10, Loss=21.9356, Time=16.70
{'precision': array([0.00417344, 0.00319783, 0.00194038]), 'recall': array([0.04173442, 0.06395664, 0.09701897]), 'ndcg': array([0.0218173 , 0.02748676, 0.03405477]), 'hit_ratio': array([0.04173442, 0.06395664, 0.09701897]), 'auc': 0.0}


100%|██████████| 68/68 [00:08<00:00,  7.71it/s]


Epoch 4/10, Loss=20.7636, Time=16.84
{'precision': array([0.00444444, 0.00349593, 0.00228726]), 'recall': array([0.04444444, 0.0699187 , 0.11436314]), 'ndcg': array([0.02263804, 0.02901224, 0.03770953]), 'hit_ratio': array([0.04444444, 0.0699187 , 0.11436314]), 'auc': 0.0}


100%|██████████| 68/68 [00:08<00:00,  7.66it/s]


Epoch 5/10, Loss=19.8346, Time=16.70
{'precision': array([0.00401084, 0.00333333, 0.0020813 ]), 'recall': array([0.0401084 , 0.06666667, 0.10406504]), 'ndcg': array([0.02076542, 0.02740596, 0.03486152]), 'hit_ratio': array([0.0401084 , 0.06666667, 0.10406504]), 'auc': 0.0}


100%|██████████| 68/68 [00:08<00:00,  7.71it/s]


Epoch 6/10, Loss=20.1546, Time=16.89
{'precision': array([0.00422764, 0.00322493, 0.00243902]), 'recall': array([0.04227642, 0.06449864, 0.12195122]), 'ndcg': array([0.02198353, 0.02762923, 0.03896811]), 'hit_ratio': array([0.04227642, 0.06449864, 0.12195122]), 'auc': 0.0}


100%|██████████| 68/68 [00:08<00:00,  7.69it/s]


Epoch 7/10, Loss=19.7107, Time=16.89
{'precision': array([0.00368564, 0.00325203, 0.00209214]), 'recall': array([0.03685637, 0.06504065, 0.10460705]), 'ndcg': array([0.01881431, 0.02593801, 0.03359981]), 'hit_ratio': array([0.03685637, 0.06504065, 0.10460705]), 'auc': 0.0}


 60%|██████    | 41/68 [00:05<00:03,  7.71it/s]


KeyboardInterrupt: 

In [4]:
import numpy as np
import scipy.sparse as sp
def matrix_to_tensor(numpy_matrix):
    sparse_tensor = torch.sparse_coo_tensor(torch.from_numpy(np.argwhere(numpy_matrix != 0).T),
                                            torch.from_numpy(numpy_matrix[np.nonzero(numpy_matrix)]),
                                            numpy_matrix.shape,dtype=torch.float32)
    return sparse_tensor


def csr_norm(csr_mat, mean_flag=False):  # TODO: check if this function exists in a python library
    rowsum = np.array(csr_mat.sum(1))
    rowsum = np.power(rowsum + 1e-8, -0.5).flatten()
    rowsum[np.isinf(rowsum)] = 0.
    rowsum_diag = sp.diags(rowsum)
    colsum = np.array(csr_mat.sum(0))
    colsum = np.power(colsum + 1e-8, -0.5).flatten()
    colsum[np.isinf(colsum)] = 0.
    colsum_diag = sp.diags(colsum)
    if mean_flag == False:
        return rowsum_diag * csr_mat * colsum_diag
    else:
        return rowsum_diag * csr_mat

In [5]:
data = matrix_to_tensor(csr_norm(data))

In [6]:
# save the tensor
torch.save(data, "../data/books/train_matrix.pt")

In [7]:
import json
with open("../data/books/test.json", "r") as f:
    test = json.load(f)

In [11]:
# create 2000 user dict
user_dict = {}
for i in range(2000):
    user_dict[i] = test[str(i)]
# save the user dict
with open("../data/books/test.json", "w") as f:
    json.dump(user_dict, f)



In [49]:
import json
with open("../data/books/augmented_interactions_dict.json", "r") as f:
    data = json.load(f)

In [51]:
# convert all lists items to int and save the dict
mauvais = {}
for key in data.keys():
    if type(data[key][0]) != int or type(data[key][1]) != int:
        mauvais[key] = data[key]

    

In [11]:
a = torch.tensor([[1,2,3],[4,5,6],[7,8,9]])
a +=  torch.tensor([1,2,3],requires_grad=False)
a

tensor([[ 2,  4,  6],
        [ 5,  7,  9],
        [ 8, 10, 12]])