In [19]:
from tqdm import tqdm
import os

from sklearn.model_selection import train_test_split

from models.metrics import sentiment_score
from models.matrix_factorization import GMF, NeuMF, get_mf_loader, mf_train, mf_evaluate

import pandas as pd 
import random 
import numpy as np 
from torch import nn, optim 
import torch 

from torch.utils.data import Dataset, DataLoader

In [2]:
class args:
    seed = 42 
    batch_size = 512
    SAVE_PATH = os.path.basename(os.getcwd())
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    lr = 1e-3

In [5]:
DATA_PATH = 'data/sample.csv'
d_set = pd.read_csv(DATA_PATH, encoding='utf-8-sig')

d_set = d_set.astype({'user_id':'category', 'business_id':'category'})

d_set.stars = d_set.stars.apply(sentiment_score)

In [6]:
d_train, d_test = train_test_split(d_set, train_size=0.8, random_state=args.seed)
d_valid, d_test = train_test_split(d_test, train_size=0.5, random_state=args.seed)

In [7]:
u_cat = d_train.user_id.cat.categories
b_cat = d_train.business_id.cat.categories

In [8]:
d_valid.user_id = d_valid.user_id.cat.set_categories(u_cat)
d_valid.business_id = d_valid.business_id.cat.set_categories(b_cat)

d_test.user_id = d_test.user_id.cat.set_categories(u_cat)
d_test.business_id = d_test.business_id.cat.set_categories(b_cat)

In [9]:
d_train.user_id = d_train.user_id.cat.codes
d_train.business_id = d_train.business_id.cat.codes 

d_valid.user_id = d_valid.user_id.cat.codes
d_valid.business_id = d_valid.business_id.cat.codes 

d_test.user_id = d_test.user_id.cat.codes
d_test.business_id = d_test.business_id.cat.codes 

In [10]:
d_train = d_train.dropna()
d_valid = d_valid.dropna()
d_test = d_test.dropna()

d_train.reset_index(drop=True, inplace=True)
d_valid.reset_index(drop=True, inplace=True)
d_test.reset_index(drop=True, inplace=True)

In [16]:
args.num_users = d_train.user_id.max() + 1
args.num_items = d_train.business_id.max() + 1
args.latent_dim = 64
args.num_epochs = 10

In [17]:
gmf_model = GMF(args).to(args.device)

train_loader = get_mf_loader(args, d_train, num_workers=4)
valid_loader = get_mf_loader(args, d_valid, num_workers=4)
test_loader = get_mf_loader(args, d_test, num_workers=4)

optimizer = optim.SGD(gmf_model.parameters(), lr=args.lr)
criterion = nn.BCELoss()

In [18]:
losses = mf_train(args, gmf_model, train_loader, valid_loader, optimizer, criterion)

Training...: 100%|██████████| 79/79 [00:00<00:00, 146.30it/s]
Evaluating...: 100%|██████████| 10/10 [00:00<00:00, 71.29it/s]


Epoch: [1/10]
Train Loss: 0.71452
Valid Loss: 0.71602


Training...: 100%|██████████| 79/79 [00:00<00:00, 277.06it/s]
Evaluating...: 100%|██████████| 10/10 [00:00<00:00, 77.52it/s]


Epoch: [2/10]
Train Loss: 0.71026
Valid Loss: 0.71197


Training...: 100%|██████████| 79/79 [00:00<00:00, 274.33it/s]
Evaluating...: 100%|██████████| 10/10 [00:00<00:00, 70.63it/s]


Epoch: [3/10]
Train Loss: 0.70615
Valid Loss: 0.70807


Training...: 100%|██████████| 79/79 [00:00<00:00, 285.82it/s]
Evaluating...: 100%|██████████| 10/10 [00:00<00:00, 64.72it/s]


Epoch: [4/10]
Train Loss: 0.70219
Valid Loss: 0.70431


Training...: 100%|██████████| 79/79 [00:00<00:00, 266.15it/s]
Evaluating...: 100%|██████████| 10/10 [00:00<00:00, 70.22it/s]


Epoch: [5/10]
Train Loss: 0.69837
Valid Loss: 0.70068


Training...: 100%|██████████| 79/79 [00:00<00:00, 265.41it/s]
Evaluating...: 100%|██████████| 10/10 [00:00<00:00, 70.83it/s]


Epoch: [6/10]
Train Loss: 0.69469
Valid Loss: 0.69720


Training...: 100%|██████████| 79/79 [00:00<00:00, 254.85it/s]
Evaluating...: 100%|██████████| 10/10 [00:00<00:00, 74.40it/s]


Epoch: [7/10]
Train Loss: 0.69114
Valid Loss: 0.69384


Training...: 100%|██████████| 79/79 [00:00<00:00, 251.85it/s]
Evaluating...: 100%|██████████| 10/10 [00:00<00:00, 74.89it/s]


Epoch: [8/10]
Train Loss: 0.68772
Valid Loss: 0.69060


Training...: 100%|██████████| 79/79 [00:00<00:00, 252.72it/s]
Evaluating...: 100%|██████████| 10/10 [00:00<00:00, 72.67it/s]


Epoch: [9/10]
Train Loss: 0.68443
Valid Loss: 0.68749


Training...: 100%|██████████| 79/79 [00:00<00:00, 235.77it/s]
Evaluating...: 100%|██████████| 10/10 [00:00<00:00, 70.86it/s]


Epoch: [10/10]
Train Loss: 0.68126
Valid Loss: 0.68449


In [21]:
ncf_model = NeuMF(args).to(args.device)

optimizer = optim.SGD(ncf_model.parameters(), lr=args.lr)
criterion = nn.BCELoss()

In [22]:
losses = mf_train(args, ncf_model, train_loader, valid_loader, optimizer, criterion)

Training...: 100%|██████████| 79/79 [00:00<00:00, 146.93it/s]
Evaluating...: 100%|██████████| 10/10 [00:00<00:00, 68.75it/s]


Epoch: [1/10]
Train Loss: 0.71381
Valid Loss: 0.71120


Training...: 100%|██████████| 79/79 [00:00<00:00, 207.82it/s]
Evaluating...: 100%|██████████| 10/10 [00:00<00:00, 75.77it/s]


Epoch: [2/10]
Train Loss: 0.70849
Valid Loss: 0.70630


Training...: 100%|██████████| 79/79 [00:00<00:00, 216.78it/s]
Evaluating...: 100%|██████████| 10/10 [00:00<00:00, 70.06it/s]


Epoch: [3/10]
Train Loss: 0.70342
Valid Loss: 0.70164


Training...: 100%|██████████| 79/79 [00:00<00:00, 206.73it/s]
Evaluating...: 100%|██████████| 10/10 [00:00<00:00, 66.14it/s]


Epoch: [4/10]
Train Loss: 0.69858
Valid Loss: 0.69720


Training...: 100%|██████████| 79/79 [00:00<00:00, 212.12it/s]
Evaluating...: 100%|██████████| 10/10 [00:00<00:00, 73.33it/s]


Epoch: [5/10]
Train Loss: 0.69396
Valid Loss: 0.69297


Training...: 100%|██████████| 79/79 [00:00<00:00, 199.55it/s]
Evaluating...: 100%|██████████| 10/10 [00:00<00:00, 70.45it/s]


Epoch: [6/10]
Train Loss: 0.68956
Valid Loss: 0.68894


Training...: 100%|██████████| 79/79 [00:00<00:00, 195.23it/s]
Evaluating...: 100%|██████████| 10/10 [00:00<00:00, 75.85it/s]


Epoch: [7/10]
Train Loss: 0.68535
Valid Loss: 0.68509


Training...: 100%|██████████| 79/79 [00:00<00:00, 226.43it/s]
Evaluating...: 100%|██████████| 10/10 [00:00<00:00, 69.33it/s]


Epoch: [8/10]
Train Loss: 0.68134
Valid Loss: 0.68142


Training...: 100%|██████████| 79/79 [00:00<00:00, 220.32it/s]
Evaluating...: 100%|██████████| 10/10 [00:00<00:00, 73.02it/s]


Epoch: [9/10]
Train Loss: 0.67750
Valid Loss: 0.67793


Training...: 100%|██████████| 79/79 [00:00<00:00, 218.84it/s]
Evaluating...: 100%|██████████| 10/10 [00:00<00:00, 71.15it/s]


Epoch: [10/10]
Train Loss: 0.67384
Valid Loss: 0.67460
