In [1]:
import pandas as pd

from src.data_loader.data_loader import BooksDataset
from src.models.mm_model import MmModel
from src.train import Trainer
import torch

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
dataset = BooksDataset(data_dir="../data/books")

In [4]:
model = MmModel(n_users=dataset.n_users,n_items=dataset.n_items,adjacency_matrix=dataset.get_dataset("adjacency_matrix"),interactions= dataset.get_dataset("interactions") ,image_embeddings_data=dataset.get_dataset("images"),text_embeddings_data=dataset.get_dataset("text"),embed_size=128,n_layers=3,user_profiles_data=dataset.get_dataset("user_profiles"),book_attributes_data=dataset.get_dataset("books_attributes"))
model.to(device)

MmModel(
  (E0): Embedding(48752, 128)
  (text_feat): Linear(in_features=1024, out_features=128, bias=True)
  (text_feat_dropout): Dropout(p=0.1, inplace=False)
  (image_feat): Linear(in_features=1024, out_features=128, bias=True)
  (image_feat_dropout): Dropout(p=0.1, inplace=False)
  (user_profiles): Linear(in_features=768, out_features=128, bias=True)
  (user_profiles_dropout): Dropout(p=0.1, inplace=False)
  (book_attributes): Linear(in_features=768, out_features=128, bias=True)
  (book_attributes_dropout): Dropout(p=0.1, inplace=False)
)

In [5]:
trainer = Trainer(model=model,dataset=dataset,lr=0.005)

In [6]:
trainer.train(epochs=10,batch_size=1024)

100%|██████████| 15/15 [00:05<00:00,  2.87it/s]
100%|██████████| 1/1 [00:17<00:00, 17.38s/it]


{'precision': array([0.0071  , 0.005725, 0.00448 ]), 'recall': array([0.01007669, 0.01545939, 0.02780003]), 'ndcg': array([0.02703461, 0.03381015, 0.04744178]), 'hit_ratio': array([0.0575, 0.0825, 0.1275]), 'auc': 0.0}
Epoch 0 Loss 88.86294682820638 Time 22.830434799194336


100%|██████████| 15/15 [00:04<00:00,  3.04it/s]
100%|██████████| 1/1 [00:18<00:00, 18.85s/it]


{'precision': array([0.00985, 0.0079 , 0.00544]), 'recall': array([0.0163394 , 0.02725497, 0.04636342]), 'ndcg': array([0.03905457, 0.04905186, 0.06384936]), 'hit_ratio': array([0.0815, 0.1165, 0.165 ]), 'auc': 0.0}
Epoch 1 Loss 9.569678656260173 Time 23.99320340156555


100%|██████████| 15/15 [00:05<00:00,  2.93it/s]
100%|██████████| 1/1 [00:17<00:00, 17.67s/it]


{'precision': array([0.0087 , 0.0074 , 0.00558]), 'recall': array([0.01529628, 0.02542705, 0.04753287]), 'ndcg': array([0.03460715, 0.04484972, 0.06210806]), 'hit_ratio': array([0.074 , 0.1095, 0.169 ]), 'auc': 0.0}
Epoch 2 Loss 4.300299978256225 Time 22.993314266204834


100%|██████████| 15/15 [00:05<00:00,  2.87it/s]
100%|██████████| 1/1 [00:18<00:00, 18.09s/it]


{'precision': array([0.0092  , 0.007475, 0.00552 ]), 'recall': array([0.01662056, 0.02772745, 0.05178429]), 'ndcg': array([0.03466605, 0.04483455, 0.061672  ]), 'hit_ratio': array([0.078, 0.115, 0.172]), 'auc': 0.0}
Epoch 3 Loss 4.410169967015585 Time 23.544707536697388


100%|██████████| 15/15 [00:04<00:00,  3.00it/s]
100%|██████████| 1/1 [00:18<00:00, 18.26s/it]


{'precision': array([0.01025, 0.0077 , 0.00577]), 'recall': array([0.01931972, 0.03041069, 0.0554625 ]), 'ndcg': array([0.03968843, 0.04873389, 0.06717434]), 'hit_ratio': array([0.085 , 0.1155, 0.186 ]), 'auc': 0.0}
Epoch 4 Loss 3.9063353379567465 Time 23.46857523918152


100%|██████████| 15/15 [00:05<00:00,  2.99it/s]
100%|██████████| 1/1 [00:18<00:00, 18.30s/it]


{'precision': array([0.01065 , 0.009025, 0.00667 ]), 'recall': array([0.0218447 , 0.03447718, 0.06362671]), 'ndcg': array([0.03794911, 0.05009674, 0.06983708]), 'hit_ratio': array([0.0895, 0.132 , 0.1995]), 'auc': 0.0}
Epoch 5 Loss 3.6012551466623943 Time 23.54867649078369


100%|██████████| 15/15 [00:05<00:00,  2.92it/s]
100%|██████████| 1/1 [00:19<00:00, 19.05s/it]


{'precision': array([0.0117  , 0.009925, 0.00702 ]), 'recall': array([0.02275981, 0.0377526 , 0.06468567]), 'ndcg': array([0.0410974 , 0.05472485, 0.07447643]), 'hit_ratio': array([0.095 , 0.1415, 0.2095]), 'auc': 0.0}
Epoch 6 Loss 3.6178314367930096 Time 24.392152070999146


100%|██████████| 15/15 [00:05<00:00,  2.98it/s]
100%|██████████| 1/1 [00:17<00:00, 17.99s/it]


{'precision': array([0.0122  , 0.010025, 0.007   ]), 'recall': array([0.02311014, 0.0374351 , 0.06730164]), 'ndcg': array([0.04426012, 0.05737085, 0.07724212]), 'hit_ratio': array([0.1005, 0.1435, 0.213 ]), 'auc': 0.0}
Epoch 7 Loss 4.2082620620727536 Time 23.256929636001587


100%|██████████| 15/15 [00:05<00:00,  2.95it/s]
100%|██████████| 1/1 [00:18<00:00, 18.71s/it]


{'precision': array([0.01155 , 0.010225, 0.00717 ]), 'recall': array([0.02094473, 0.03906412, 0.06877693]), 'ndcg': array([0.04124796, 0.05678427, 0.07681142]), 'hit_ratio': array([0.093, 0.148, 0.215]), 'auc': 0.0}
Epoch 8 Loss 3.6105868180592853 Time 23.997808933258057


100%|██████████| 15/15 [00:04<00:00,  3.01it/s]
100%|██████████| 1/1 [00:18<00:00, 18.37s/it]

{'precision': array([0.01145, 0.0102 , 0.00737]), 'recall': array([0.01879762, 0.0398198 , 0.07323002]), 'ndcg': array([0.03998525, 0.05539494, 0.07710433]), 'hit_ratio': array([0.092 , 0.1485, 0.2225]), 'auc': 0.0}
Epoch 9 Loss 4.678288062413533 Time 23.573596715927124





In [2]:
import torch
import  pandas as pd

data = pd.read_pickle("../data/books/train_matrix.pkl")

In [4]:
import numpy as np
import scipy.sparse as sp
def matrix_to_tensor(numpy_matrix):
    sparse_tensor = torch.sparse_coo_tensor(torch.from_numpy(np.argwhere(numpy_matrix != 0).T),
                                            torch.from_numpy(numpy_matrix[np.nonzero(numpy_matrix)]),
                                            numpy_matrix.shape,dtype=torch.float32)
    return sparse_tensor


def csr_norm(csr_mat, mean_flag=False):  # TODO: check if this function exists in a python library
    rowsum = np.array(csr_mat.sum(1))
    rowsum = np.power(rowsum + 1e-8, -0.5).flatten()
    rowsum[np.isinf(rowsum)] = 0.
    rowsum_diag = sp.diags(rowsum)
    colsum = np.array(csr_mat.sum(0))
    colsum = np.power(colsum + 1e-8, -0.5).flatten()
    colsum[np.isinf(colsum)] = 0.
    colsum_diag = sp.diags(colsum)
    if mean_flag == False:
        return rowsum_diag * csr_mat * colsum_diag
    else:
        return rowsum_diag * csr_mat

In [5]:
data = matrix_to_tensor(csr_norm(data))

In [6]:
# save the tensor
torch.save(data, "../data/books/train_matrix.pt")

In [7]:
import json
with open("../data/books/test.json", "r") as f:
    test = json.load(f)

In [11]:
# create 2000 user dict
user_dict = {}
for i in range(2000):
    user_dict[i] = test[str(i)]
# save the user dict
with open("../data/books/test.json", "w") as f:
    json.dump(user_dict, f)



In [49]:
import json
with open("../data/books/augmented_interactions_dict.json", "r") as f:
    data = json.load(f)

In [51]:
# convert all lists items to int and save the dict
mauvais = {}
for key in data.keys():
    if type(data[key][0]) != int or type(data[key][1]) != int:
        mauvais[key] = data[key]

    