In [1]:
import pandas as pd

from src.data_loader.data_loader import BooksDataset
from src.models.mm_model import MmModel
from src.train import Trainer
import torch

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
dataset = BooksDataset(data_dir="../data/books")

In [4]:
model = MmModel(n_users=dataset.n_users,n_items=dataset.n_items,adjacency_matrix=dataset.get_dataset("adjacency_matrix"),interactions= dataset.get_dataset("interactions") ,image_embeddings_data=dataset.get_dataset("images"),text_embeddings_data=dataset.get_dataset("text"),embed_size=128,n_layers=3,user_profiles_data=dataset.get_dataset("user_profiles"),book_attributes_data=dataset.get_dataset("books_attributes"))
model.to(device)

MmModel(
  (E0): Embedding(48752, 128)
  (text_feat): Linear(in_features=1024, out_features=128, bias=True)
  (text_feat_dropout): Dropout(p=0.2, inplace=False)
  (image_feat): Linear(in_features=1024, out_features=128, bias=True)
  (image_feat_dropout): Dropout(p=0.2, inplace=False)
  (user_profiles): Linear(in_features=768, out_features=128, bias=True)
  (user_profiles_dropout): Dropout(p=0.2, inplace=False)
  (book_attributes): Linear(in_features=768, out_features=128, bias=True)
  (book_attributes_dropout): Dropout(p=0.2, inplace=False)
)

In [5]:
trainer = Trainer(model=model,dataset=dataset,lr=0.01)

In [6]:
trainer.train(epochs=10,batch_size=1024)

100%|██████████| 15/15 [00:05<00:00,  2.66it/s]
100%|██████████| 1/1 [00:21<00:00, 21.04s/it]


{'precision': array([0.00245 , 0.001675, 0.00123 ]), 'recall': array([0.00331187, 0.00460845, 0.00886342]), 'ndcg': array([0.01117216, 0.01289446, 0.01736534]), 'hit_ratio': array([0.022 , 0.028 , 0.0465]), 'auc': 0.0}
Epoch 0 Loss 449.0741333007812 Time 26.98769521713257


100%|██████████| 15/15 [00:05<00:00,  2.82it/s]
100%|██████████| 1/1 [00:22<00:00, 22.59s/it]


{'precision': array([0.0087  , 0.006875, 0.0046  ]), 'recall': array([0.0149535 , 0.02452307, 0.04023084]), 'ndcg': array([0.03467055, 0.04374699, 0.05643445]), 'hit_ratio': array([0.073 , 0.1025, 0.1505]), 'auc': 0.0}
Epoch 1 Loss 38.141219011942546 Time 28.167550086975098


100%|██████████| 15/15 [00:05<00:00,  2.59it/s]
100%|██████████| 1/1 [00:22<00:00, 22.82s/it]


{'precision': array([0.0104 , 0.0076 , 0.00534]), 'recall': array([0.02210794, 0.03236706, 0.05213052]), 'ndcg': array([0.04069744, 0.04931356, 0.06489174]), 'hit_ratio': array([0.0865, 0.1155, 0.1715]), 'auc': 0.0}
Epoch 2 Loss 11.694350369771321 Time 28.888647079467773


100%|██████████| 15/15 [00:05<00:00,  2.88it/s]
100%|██████████| 1/1 [00:23<00:00, 23.43s/it]


{'precision': array([0.00975 , 0.007675, 0.00574 ]), 'recall': array([0.01982568, 0.03034045, 0.05408188]), 'ndcg': array([0.0384941 , 0.04761186, 0.06571154]), 'hit_ratio': array([0.0835, 0.1155, 0.1805]), 'auc': 0.0}
Epoch 3 Loss 9.766905307769775 Time 28.887646675109863


100%|██████████| 15/15 [00:05<00:00,  2.84it/s]
100%|██████████| 1/1 [00:21<00:00, 21.44s/it]


{'precision': array([0.0094 , 0.00845, 0.00617]), 'recall': array([0.019507  , 0.03309177, 0.05966628]), 'ndcg': array([0.03766683, 0.05049013, 0.06879571]), 'hit_ratio': array([0.081 , 0.1275, 0.1885]), 'auc': 0.0}
Epoch 4 Loss 10.03747386932373 Time 26.95736050605774


100%|██████████| 15/15 [00:05<00:00,  2.78it/s]
100%|██████████| 1/1 [00:21<00:00, 21.72s/it]


{'precision': array([0.0099 , 0.0086 , 0.00618]), 'recall': array([0.01904434, 0.03512375, 0.05916217]), 'ndcg': array([0.03903013, 0.05188963, 0.06981637]), 'hit_ratio': array([0.0835, 0.1315, 0.1915]), 'auc': 0.0}
Epoch 5 Loss 10.002503331502279 Time 27.375131845474243


100%|██████████| 15/15 [00:05<00:00,  2.77it/s]
100%|██████████| 1/1 [00:20<00:00, 20.03s/it]


{'precision': array([0.0098 , 0.00835, 0.0064 ]), 'recall': array([0.0175183 , 0.03214436, 0.06209535]), 'ndcg': array([0.03723146, 0.0495492 , 0.07006462]), 'hit_ratio': array([0.0825, 0.127 , 0.1995]), 'auc': 0.0}
Epoch 6 Loss 9.488785934448241 Time 25.703593730926514


100%|██████████| 15/15 [00:05<00:00,  2.85it/s]
100%|██████████| 1/1 [00:20<00:00, 20.10s/it]


{'precision': array([0.0095  , 0.008675, 0.00669 ]), 'recall': array([0.01861959, 0.03779219, 0.06793873]), 'ndcg': array([0.03656006, 0.04992258, 0.07156366]), 'hit_ratio': array([0.0835, 0.132 , 0.2075]), 'auc': 0.0}
Epoch 7 Loss 9.11321210861206 Time 25.623244047164917


100%|██████████| 15/15 [00:05<00:00,  2.87it/s]
100%|██████████| 1/1 [00:19<00:00, 19.19s/it]


{'precision': array([0.0106 , 0.0091 , 0.00679]), 'recall': array([0.02129211, 0.03762478, 0.06667412]), 'ndcg': array([0.04144487, 0.05442822, 0.07485089]), 'hit_ratio': array([0.0915, 0.137 , 0.207 ]), 'auc': 0.0}
Epoch 8 Loss 8.221775118509928 Time 24.67738962173462


100%|██████████| 15/15 [00:05<00:00,  2.87it/s]
100%|██████████| 1/1 [00:19<00:00, 19.63s/it]

{'precision': array([0.01115, 0.00915, 0.00701]), 'recall': array([0.02412593, 0.03724605, 0.07001176]), 'ndcg': array([0.04299566, 0.05484236, 0.0769517 ]), 'hit_ratio': array([0.096 , 0.137 , 0.2135]), 'auc': 0.0}
Epoch 9 Loss 7.8775052706400555 Time 25.113595962524414





In [2]:
import torch
import  pandas as pd

data = pd.read_pickle("../data/books/train_matrix.pkl")

In [4]:
import numpy as np
import scipy.sparse as sp
def matrix_to_tensor(numpy_matrix):
    sparse_tensor = torch.sparse_coo_tensor(torch.from_numpy(np.argwhere(numpy_matrix != 0).T),
                                            torch.from_numpy(numpy_matrix[np.nonzero(numpy_matrix)]),
                                            numpy_matrix.shape,dtype=torch.float32)
    return sparse_tensor


def csr_norm(csr_mat, mean_flag=False):  # TODO: check if this function exists in a python library
    rowsum = np.array(csr_mat.sum(1))
    rowsum = np.power(rowsum + 1e-8, -0.5).flatten()
    rowsum[np.isinf(rowsum)] = 0.
    rowsum_diag = sp.diags(rowsum)
    colsum = np.array(csr_mat.sum(0))
    colsum = np.power(colsum + 1e-8, -0.5).flatten()
    colsum[np.isinf(colsum)] = 0.
    colsum_diag = sp.diags(colsum)
    if mean_flag == False:
        return rowsum_diag * csr_mat * colsum_diag
    else:
        return rowsum_diag * csr_mat

In [5]:
data = matrix_to_tensor(csr_norm(data))

In [6]:
# save the tensor
torch.save(data, "../data/books/train_matrix.pt")

In [7]:
import json
with open("../data/books/test.json", "r") as f:
    test = json.load(f)

In [11]:
# create 2000 user dict
user_dict = {}
for i in range(2000):
    user_dict[i] = test[str(i)]
# save the user dict
with open("../data/books/test.json", "w") as f:
    json.dump(user_dict, f)



In [49]:
import json
with open("../data/books/augmented_interactions_dict.json", "r") as f:
    data = json.load(f)

In [51]:
# convert all lists items to int and save the dict
mauvais = {}
for key in data.keys():
    if type(data[key][0]) != int or type(data[key][1]) != int:
        mauvais[key] = data[key]

    