In [1]:
import pandas as pd

from src.data_loader.data_loader import BooksDataset
from src.models.mm_model import MmModel
from src.train import Trainer
import torch

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
dataset = BooksDataset(data_dir="../data/books")

In [4]:
print(dataset.sample(3))
print(dataset.sample( 3))
print(dataset.sample(1))

([3802, 84, 10943], [4732, 19139, 553], [24989, 22630, 8235])
([2935, 7591, 6272], [6548, 21847, 24327], [24894, 33220, 11931])
([3122], [31751], [28190])


In [5]:
model = MmModel(n_users=dataset.n_users,n_items=dataset.n_items,adjacency_matrix=dataset.get_dataset("adjacency_matrix"),interactions= dataset.get_dataset("interactions") ,image_embeddings_data=dataset.get_dataset("images"),text_embeddings_data=dataset.get_dataset("text"),embed_size=128,n_layers=3,user_profiles_data=dataset.get_dataset("user_profiles"),book_attributes_data=dataset.get_dataset("books_attributes"))
model.to(device)

MmModel(
  (E0): Embedding(48752, 128)
  (text_feat): Linear(in_features=1024, out_features=128, bias=True)
  (text_feat_dropout): Dropout(p=0.2, inplace=False)
  (image_feat): Linear(in_features=1024, out_features=128, bias=True)
  (image_feat_dropout): Dropout(p=0.2, inplace=False)
  (user_profiles): Linear(in_features=768, out_features=128, bias=True)
  (user_profiles_dropout): Dropout(p=0.2, inplace=False)
  (book_attributes): Linear(in_features=768, out_features=128, bias=True)
  (book_attributes_dropout): Dropout(p=0.2, inplace=False)
)

In [10]:
trainer = Trainer(model=model,dataset=dataset,lr=0.01)

In [11]:
trainer.train(epochs=10,batch_size=1024)

100%|██████████| 15/15 [00:05<00:00,  2.84it/s]
100%|██████████| 1/1 [00:17<00:00, 17.48s/it]


{'precision': array([0.01185, 0.0098 , 0.00747]), 'recall': array([0.02636094, 0.04117428, 0.07713581]), 'ndcg': array([0.04155191, 0.05483238, 0.07856825]), 'hit_ratio': array([0.1005, 0.144 , 0.231 ]), 'auc': 0.0}
Epoch 0 Loss 519.6659310658773 Time 22.978755474090576


100%|██████████| 15/15 [00:04<00:00,  3.00it/s]
100%|██████████| 1/1 [00:18<00:00, 18.03s/it]


{'precision': array([0.0118  , 0.010025, 0.00777 ]), 'recall': array([0.02615579, 0.04284015, 0.07999013]), 'ndcg': array([0.04246577, 0.05607903, 0.081241  ]), 'hit_ratio': array([0.101 , 0.146 , 0.2375]), 'auc': 0.0}
Epoch 1 Loss 31.857196871439616 Time 23.253037214279175


100%|██████████| 15/15 [00:05<00:00,  2.91it/s]
100%|██████████| 1/1 [00:18<00:00, 18.17s/it]


{'precision': array([0.01205 , 0.009825, 0.0076  ]), 'recall': array([0.02410037, 0.03976983, 0.0787781 ]), 'ndcg': array([0.04141911, 0.05343411, 0.07842947]), 'hit_ratio': array([0.101 , 0.1425, 0.231 ]), 'auc': 0.0}
Epoch 2 Loss 13.099110794067382 Time 23.550331830978394


100%|██████████| 15/15 [00:05<00:00,  2.97it/s]
100%|██████████| 1/1 [00:18<00:00, 18.36s/it]


{'precision': array([0.0123 , 0.0097 , 0.00783]), 'recall': array([0.02551683, 0.03787028, 0.07915771]), 'ndcg': array([0.04293453, 0.05420963, 0.08061644]), 'hit_ratio': array([0.1025, 0.1425, 0.235 ]), 'auc': 0.0}
Epoch 3 Loss 11.270422744750977 Time 23.632065534591675


100%|██████████| 15/15 [00:05<00:00,  2.91it/s]
100%|██████████| 1/1 [00:18<00:00, 18.58s/it]


{'precision': array([0.01235 , 0.010225, 0.0079  ]), 'recall': array([0.02614076, 0.0427463 , 0.08000244]), 'ndcg': array([0.04324917, 0.05671787, 0.08143748]), 'hit_ratio': array([0.102 , 0.1505, 0.2375]), 'auc': 0.0}
Epoch 4 Loss 10.02435925801595 Time 23.949123859405518


100%|██████████| 15/15 [00:05<00:00,  2.92it/s]
100%|██████████| 1/1 [00:19<00:00, 19.11s/it]


{'precision': array([0.0123 , 0.01045, 0.00782]), 'recall': array([0.02626696, 0.04447728, 0.08093863]), 'ndcg': array([0.04271601, 0.05682844, 0.08069163]), 'hit_ratio': array([0.1035, 0.153 , 0.2355]), 'auc': 0.0}
Epoch 5 Loss 9.336460145314534 Time 24.487983465194702


100%|██████████| 15/15 [00:05<00:00,  2.60it/s]
  0%|          | 0/1 [00:18<?, ?it/s]


KeyboardInterrupt: 

In [2]:
import torch
import  pandas as pd

data = pd.read_pickle("../data/books/train_matrix.pkl")

In [4]:
import numpy as np
import scipy.sparse as sp
def matrix_to_tensor(numpy_matrix):
    sparse_tensor = torch.sparse_coo_tensor(torch.from_numpy(np.argwhere(numpy_matrix != 0).T),
                                            torch.from_numpy(numpy_matrix[np.nonzero(numpy_matrix)]),
                                            numpy_matrix.shape,dtype=torch.float32)
    return sparse_tensor


def csr_norm(csr_mat, mean_flag=False):  # TODO: check if this function exists in a python library
    rowsum = np.array(csr_mat.sum(1))
    rowsum = np.power(rowsum + 1e-8, -0.5).flatten()
    rowsum[np.isinf(rowsum)] = 0.
    rowsum_diag = sp.diags(rowsum)
    colsum = np.array(csr_mat.sum(0))
    colsum = np.power(colsum + 1e-8, -0.5).flatten()
    colsum[np.isinf(colsum)] = 0.
    colsum_diag = sp.diags(colsum)
    if mean_flag == False:
        return rowsum_diag * csr_mat * colsum_diag
    else:
        return rowsum_diag * csr_mat

In [5]:
data = matrix_to_tensor(csr_norm(data))

In [6]:
# save the tensor
torch.save(data, "../data/books/train_matrix.pt")

In [7]:
import json
with open("../data/books/test.json", "r") as f:
    test = json.load(f)

In [11]:
# create 2000 user dict
user_dict = {}
for i in range(2000):
    user_dict[i] = test[str(i)]
# save the user dict
with open("../data/books/test.json", "w") as f:
    json.dump(user_dict, f)

