In [1]:
import pandas as pd

from src.data_loader.data_loader import BooksDataset
from src.models.mm_model import MmModel
from src.train import Trainer
import torch

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
dataset = BooksDataset(data_dir="../data/books")

In [4]:
print(dataset.sample(3))
print(dataset.sample( 3))
print(dataset.sample(1))

([14314, 3675, 5335], [27299, 2081, 28955], [21310, 30431, 13975])
([13323, 9463, 6984], [2714, 7366, 27138], [14043, 1182, 2311])
([224], [954], [2030])


In [5]:
model = MmModel(n_users=dataset.n_users,n_items=dataset.n_items,adjacency_matrix=dataset.get_dataset("adjacency_matrix"),interactions= dataset.get_dataset("interactions") ,image_embeddings_data=dataset.get_dataset("images"),text_embeddings_data=dataset.get_dataset("text"),embed_size=64,n_layers=3)
model.to(device)

MmModel(
  (E0): Embedding(48752, 64)
  (text_feat): Linear(in_features=1024, out_features=64, bias=True)
  (text_feat_dropout): Dropout(p=0.2, inplace=False)
  (image_feat): Linear(in_features=1024, out_features=64, bias=True)
  (image_feat_dropout): Dropout(p=0.2, inplace=False)
)

In [6]:
trainer = Trainer(model=model,dataset=dataset,lr=0.001)

In [7]:
trainer.train(epochs=10,batch_size=1024)

100%|██████████| 15/15 [00:02<00:00,  5.67it/s]
100%|██████████| 1/1 [00:17<00:00, 17.48s/it]


{'precision': array([0.01416667]), 'recall': array([0.007266]), 'ndcg': array([0.03350272]), 'hit_ratio': array([0.041]), 'auc': 0.0}
Epoch 0 Loss 0.6942788561185201 Time 20.2837393283844


100%|██████████| 15/15 [00:02<00:00,  5.85it/s]
100%|██████████| 1/1 [00:17<00:00, 17.55s/it]


{'precision': array([0.01433333]), 'recall': array([0.0071972]), 'ndcg': array([0.03343726]), 'hit_ratio': array([0.0415]), 'auc': 0.0}
Epoch 1 Loss 0.6933048288027446 Time 20.285451650619507


100%|██████████| 15/15 [00:02<00:00,  6.11it/s]
100%|██████████| 1/1 [00:17<00:00, 17.73s/it]


{'precision': array([0.01466667]), 'recall': array([0.00806657]), 'ndcg': array([0.03453102]), 'hit_ratio': array([0.043]), 'auc': 0.0}
Epoch 2 Loss 0.693122414747874 Time 20.32788848876953


100%|██████████| 15/15 [00:02<00:00,  6.11it/s]
100%|██████████| 1/1 [00:17<00:00, 17.84s/it]


{'precision': array([0.015]), 'recall': array([0.00817674]), 'ndcg': array([0.03501755]), 'hit_ratio': array([0.0445]), 'auc': 0.0}
Epoch 3 Loss 0.6930138389269511 Time 20.482635259628296


100%|██████████| 15/15 [00:02<00:00,  5.66it/s]
100%|██████████| 1/1 [00:17<00:00, 17.73s/it]


{'precision': array([0.0155]), 'recall': array([0.00849004]), 'ndcg': array([0.03575866]), 'hit_ratio': array([0.0445]), 'auc': 0.0}
Epoch 4 Loss 0.6929102420806885 Time 20.517237901687622


100%|██████████| 15/15 [00:02<00:00,  6.11it/s]
100%|██████████| 1/1 [00:18<00:00, 18.00s/it]


{'precision': array([0.01566667]), 'recall': array([0.00850312]), 'ndcg': array([0.03616195]), 'hit_ratio': array([0.0455]), 'auc': 0.0}
Epoch 5 Loss 0.69280477364858 Time 20.60312795639038


100%|██████████| 15/15 [00:02<00:00,  6.10it/s]
100%|██████████| 1/1 [00:17<00:00, 17.75s/it]


{'precision': array([0.01533333]), 'recall': array([0.00872221]), 'ndcg': array([0.03596555]), 'hit_ratio': array([0.0445]), 'auc': 0.0}
Epoch 6 Loss 0.6926687995592753 Time 20.348026752471924


100%|██████████| 15/15 [00:02<00:00,  5.84it/s]
100%|██████████| 1/1 [00:17<00:00, 17.66s/it]


{'precision': array([0.01516667]), 'recall': array([0.0085156]), 'ndcg': array([0.03467838]), 'hit_ratio': array([0.0435]), 'auc': 0.0}
Epoch 7 Loss 0.6925490657488506 Time 20.386708736419678


100%|██████████| 15/15 [00:02<00:00,  6.05it/s]
100%|██████████| 1/1 [00:17<00:00, 17.55s/it]


{'precision': array([0.01583333]), 'recall': array([0.00900694]), 'ndcg': array([0.03615306]), 'hit_ratio': array([0.046]), 'auc': 0.0}
Epoch 8 Loss 0.6923970897992452 Time 20.215401887893677


100%|██████████| 15/15 [00:02<00:00,  5.97it/s]
100%|██████████| 1/1 [00:17<00:00, 17.67s/it]

{'precision': array([0.015]), 'recall': array([0.00843883]), 'ndcg': array([0.03582413]), 'hit_ratio': array([0.044]), 'auc': 0.0}
Epoch 9 Loss 0.692205262184143 Time 20.33605194091797





In [2]:
import torch
import  pandas as pd

data = pd.read_pickle("../data/books/train_matrix.pkl")

In [4]:
import numpy as np
import scipy.sparse as sp
def matrix_to_tensor(numpy_matrix):
    sparse_tensor = torch.sparse_coo_tensor(torch.from_numpy(np.argwhere(numpy_matrix != 0).T),
                                            torch.from_numpy(numpy_matrix[np.nonzero(numpy_matrix)]),
                                            numpy_matrix.shape,dtype=torch.float32)
    return sparse_tensor


def csr_norm(csr_mat, mean_flag=False):  # TODO: check if this function exists in a python library
    rowsum = np.array(csr_mat.sum(1))
    rowsum = np.power(rowsum + 1e-8, -0.5).flatten()
    rowsum[np.isinf(rowsum)] = 0.
    rowsum_diag = sp.diags(rowsum)
    colsum = np.array(csr_mat.sum(0))
    colsum = np.power(colsum + 1e-8, -0.5).flatten()
    colsum[np.isinf(colsum)] = 0.
    colsum_diag = sp.diags(colsum)
    if mean_flag == False:
        return rowsum_diag * csr_mat * colsum_diag
    else:
        return rowsum_diag * csr_mat

In [5]:
data = matrix_to_tensor(csr_norm(data))

In [6]:
# save the tensor
torch.save(data, "../data/books/train_matrix.pt")

In [7]:
import json
with open("../data/books/test.json", "r") as f:
    test = json.load(f)

In [11]:
# create 2000 user dict
user_dict = {}
for i in range(2000):
    user_dict[i] = test[str(i)]
# save the user dict
with open("../data/books/test.json", "w") as f:
    json.dump(user_dict, f)

