In [1]:
from __init__ import * 
from models.MF import * 
from models.utils import * 


import pandas as pd
from torch import optim, nn

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
train_path = os.path.join(YELP_DIR, 'train.csv')
valid_path = os.path.join(YELP_DIR, 'valid.csv')

trainset = pd.read_csv(train_path, encoding='utf-8-sig')
validset = pd.read_csv(valid_path, encoding='utf-8-sig')

In [3]:
trainset.loc[:, 'stars'] = trainset.loc[:, 'stars'].apply(sentiment_score)
validset.loc[:, 'stars'] = validset.loc[:, 'stars'].apply(sentiment_score)

In [4]:
class args:
    batch_size = 512
    latent_dim = 32
    learning_rate = 1e-4
    num_epochs = 20
    seed = 42
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    num_users = trainset.user_id.nunique()
    num_items = trainset.business_id.nunique()

In [5]:
train_loader = get_dataloader(args, trainset, num_workers=4)
valid_loader = get_dataloader(args, validset, num_workers=4)

In [6]:
# model = GMF(args).to(args.device)
model = NeuMF(args).to(args.device)

In [8]:
optimizer = optim.Adam(model.parameters(), lr=args.learning_rate)
criterion = nn.BCELoss()

In [9]:
train_loss, valid_loss = train(args, model, train_loader, valid_loader, optimizer, criterion)

Training...: 100%|██████████| 1414/1414 [00:08<00:00, 163.33it/s]
Evaluating...: 100%|██████████| 468/468 [00:01<00:00, 310.35it/s]


Epoch: [1/20]
Train Loss: 0.62108
Valid Loss: 0.60446


Training...: 100%|██████████| 1414/1414 [00:08<00:00, 173.78it/s]
Evaluating...: 100%|██████████| 468/468 [00:01<00:00, 280.37it/s]


Epoch: [2/20]
Train Loss: 0.60071
Valid Loss: 0.60047


Training...: 100%|██████████| 1414/1414 [00:08<00:00, 175.30it/s]
Evaluating...: 100%|██████████| 468/468 [00:01<00:00, 307.13it/s]


Epoch: [3/20]
Train Loss: 0.59832
Valid Loss: 0.59936


Training...: 100%|██████████| 1414/1414 [00:08<00:00, 176.06it/s]
Evaluating...: 100%|██████████| 468/468 [00:01<00:00, 293.48it/s]


Epoch: [4/20]
Train Loss: 0.59678
Valid Loss: 0.59842


Training...: 100%|██████████| 1414/1414 [00:07<00:00, 178.12it/s]
Evaluating...: 100%|██████████| 468/468 [00:01<00:00, 317.00it/s]


Epoch: [5/20]
Train Loss: 0.59489
Valid Loss: 0.59732


Training...: 100%|██████████| 1414/1414 [00:07<00:00, 176.98it/s]
Evaluating...: 100%|██████████| 468/468 [00:01<00:00, 307.06it/s]


Epoch: [6/20]
Train Loss: 0.59243
Valid Loss: 0.59603


Training...: 100%|██████████| 1414/1414 [00:07<00:00, 178.56it/s]
Evaluating...: 100%|██████████| 468/468 [00:01<00:00, 312.12it/s]


Epoch: [7/20]
Train Loss: 0.58931
Valid Loss: 0.59457


Training...: 100%|██████████| 1414/1414 [00:07<00:00, 178.68it/s]
Evaluating...: 100%|██████████| 468/468 [00:01<00:00, 313.83it/s]


Epoch: [8/20]
Train Loss: 0.58551
Valid Loss: 0.59303


Training...: 100%|██████████| 1414/1414 [00:08<00:00, 176.70it/s]
Evaluating...: 100%|██████████| 468/468 [00:01<00:00, 313.86it/s]


Epoch: [9/20]
Train Loss: 0.58110
Valid Loss: 0.59155


Training...: 100%|██████████| 1414/1414 [00:08<00:00, 174.00it/s]
Evaluating...: 100%|██████████| 468/468 [00:01<00:00, 299.77it/s]


Epoch: [10/20]
Train Loss: 0.57627
Valid Loss: 0.59026


Training...: 100%|██████████| 1414/1414 [00:08<00:00, 172.85it/s]
Evaluating...: 100%|██████████| 468/468 [00:01<00:00, 315.28it/s]


Epoch: [11/20]
Train Loss: 0.57123
Valid Loss: 0.58919


Training...: 100%|██████████| 1414/1414 [00:08<00:00, 173.79it/s]
Evaluating...: 100%|██████████| 468/468 [00:01<00:00, 309.55it/s]


Epoch: [12/20]
Train Loss: 0.56612
Valid Loss: 0.58832


Training...: 100%|██████████| 1414/1414 [00:08<00:00, 176.55it/s]
Evaluating...: 100%|██████████| 468/468 [00:01<00:00, 302.59it/s]


Epoch: [13/20]
Train Loss: 0.56101
Valid Loss: 0.58759


Training...: 100%|██████████| 1414/1414 [00:08<00:00, 176.05it/s]
Evaluating...: 100%|██████████| 468/468 [00:01<00:00, 323.71it/s]


Epoch: [14/20]
Train Loss: 0.55591
Valid Loss: 0.58699


Training...: 100%|██████████| 1414/1414 [00:08<00:00, 174.22it/s]
Evaluating...: 100%|██████████| 468/468 [00:01<00:00, 310.30it/s]


Epoch: [15/20]
Train Loss: 0.55080
Valid Loss: 0.58652


Training...: 100%|██████████| 1414/1414 [00:08<00:00, 173.15it/s]
Evaluating...: 100%|██████████| 468/468 [00:01<00:00, 309.51it/s]


Epoch: [16/20]
Train Loss: 0.54569
Valid Loss: 0.58617


Training...: 100%|██████████| 1414/1414 [00:08<00:00, 175.73it/s]
Evaluating...: 100%|██████████| 468/468 [00:01<00:00, 305.78it/s]


Epoch: [17/20]
Train Loss: 0.54058
Valid Loss: 0.58595


Training...: 100%|██████████| 1414/1414 [00:08<00:00, 175.68it/s]
Evaluating...: 100%|██████████| 468/468 [00:01<00:00, 285.54it/s]


Epoch: [18/20]
Train Loss: 0.53546
Valid Loss: 0.58586


Training...: 100%|██████████| 1414/1414 [00:08<00:00, 173.85it/s]
Evaluating...: 100%|██████████| 468/468 [00:01<00:00, 308.18it/s]


Epoch: [19/20]
Train Loss: 0.53035
Valid Loss: 0.58593


Training...: 100%|██████████| 1414/1414 [00:08<00:00, 175.77it/s]
Evaluating...: 100%|██████████| 468/468 [00:01<00:00, 303.76it/s]

Epoch: [20/20]
Train Loss: 0.52524
Valid Loss: 0.58614





## Inference

In [10]:
from __init__ import * 
from models.MF import *
from models.utils import *  
from models.metrics import * 

import pandas as pd

In [13]:
class args:
    batch_size = 512
    latent_dim = 32
    learning_rate = 1e-4
    seed = 42 
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    num_users = 25369
    num_items = 44553

In [14]:
model = NeuMF(args).to(args.device)
model.load_state_dict(torch.load(os.path.join(SAVE_PATH, 'NeuMF_parameters.pt')))

<All keys matched successfully>

In [15]:
test_path = os.path.join(YELP_DIR, 'test.csv')
testset = pd.read_csv(test_path, encoding='utf-8-sig')

testset.loc[:, 'stars'] = testset.loc[:, 'stars'].apply(sentiment_score)

test_loader = get_dataloader(args, testset, num_workers=4)
criterion = nn.BCELoss().to(args.device)

In [16]:
test_loss = evaluate(args, model, test_loader, criterion)

Evaluating...: 100%|██████████| 469/469 [00:01<00:00, 270.05it/s]


In [19]:
results = cf_metrics(args, testset, model, [10, 20])

evaluating...: 100%|██████████| 25303/25303 [01:14<00:00, 338.05it/s]
evaluating...: 100%|██████████| 25303/25303 [01:12<00:00, 348.70it/s]


In [20]:
def lstm_evaluate(args, model, test_loader, criterion):
    valid_acc, valid_loss = 0, 0

    model.eval()
    with torch.no_grad():
        for batch in tqdm.tqdm(test_loader, desc='Evaluating..'):
            batch = tuple(t.to(args.device) for t in batch)
            reviews = batch[0]
            labels = batch[1]

            pred_y = model(reviews).squeeze()
            loss = criterion(pred_y, labels)

            valid_acc += calc_accuracy(pred_y, labels) / len(pred_y)
            valid_loss += loss.item() / len(pred_y)
    
    valid_acc /= len(test_loader)
    valid_loss /= len(test_loader)
    
    return valid_acc, valid_loss 

Unnamed: 0,precision,recall,f1_score,ndcg
0,0.48874,0.881324,0.567297,0.86208
1,0.298309,0.954832,0.411871,0.862757
