In [2]:
import sys
import os
import torch
import cornac
import papermill as pm
import scrapbook as sb
import pandas as pd
from recommenders.datasets import movielens
from recommenders.datasets.python_splitters import python_random_split
from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k
from recommenders.models.cornac.cornac_utils import predict_ranking
from recommenders.utils.timer import Timer
from recommenders.utils.constants import SEED

print("System version: {}".format(sys.version))
print("PyTorch version: {}".format(torch.__version__))
print("Cornac version: {}".format(cornac.__version__))

System version: 3.8.13 (default, Mar 28 2022, 11:38:47) 
[GCC 7.5.0]
PyTorch version: 1.7.1
Cornac version: 1.14.2


In [11]:
# Select MovieLens data size: 100k, 1m, 10m, or 20m
MOVIELENS_DATA_SIZE = '100k'

# top k items to recommend
TOP_K = 10

# Model parameters
LATENT_DIM = 50
ENCODER_DIMS = [100]
ACT_FUNC = "tanh"
LIKELIHOOD = "pois"
NUM_EPOCHS = 500
BATCH_SIZE = 128
LEARNING_RATE = 0.001

In [21]:
train = pd.read_csv("/opt/ml/input/data/train/train_ratings.csv")

In [22]:
train = train[['user', 'item']]

In [23]:
train.columns = ['userID', 'itemID']

In [25]:
train['rating'] = 1
train.head()

Unnamed: 0,userID,itemID,rating
0,11,4643,1
1,11,170,1
2,11,531,1
3,11,616,1
4,11,2140,1


In [8]:
train, test = python_random_split(data, 0.75)

In [26]:
train_set = cornac.data.Dataset.from_uir(train.itertuples(index=False), seed=SEED)

print('Number of users: {}'.format(train_set.num_users))
print('Number of items: {}'.format(train_set.num_items))

Number of users: 31360
Number of items: 6807


In [27]:
bivae = cornac.models.BiVAECF(
    k=LATENT_DIM,
    encoder_structure=ENCODER_DIMS,
    act_fn=ACT_FUNC,
    likelihood=LIKELIHOOD,
    n_epochs=NUM_EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=LEARNING_RATE,
    seed=SEED,
    use_gpu=torch.cuda.is_available(),
    verbose=True
)

with Timer() as t:
    bivae.fit(train_set)
print("Took {} seconds for training.".format(t))

  0%|          | 0/500 [00:00<?, ?it/s]

Took 3434.8445 seconds for training.


In [28]:
with Timer() as t:
    all_predictions = predict_ranking(bivae, train, usercol='userID', itemcol='itemID', remove_seen=True)
print("Took {} seconds for prediction.".format(t))

Took 242.6125 seconds for prediction.


In [14]:
eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.000000
NDCG:	0.000000
Precision@K:	0.000000
Recall@K:	0.000000


In [16]:
all_predictions[all_predictions['userID'] == 69029]

Unnamed: 0,userID,itemID,prediction
1288618,69029,5902,0.473038
1288619,69029,4896,0.158540
1288620,69029,253,0.157383
1288621,69029,924,0.595679
1288622,69029,33166,0.285728
...,...,...,...
1295362,69029,49276,0.000008
1295363,69029,2170,0.000018
1295364,69029,43,0.000241
1295365,69029,27744,0.000087


In [31]:
users = list(train['userID'].unique())

In [38]:
submission = pd.DataFrame()

for user in users:
    interaction = all_predictions[all_predictions['userID']==user]
    interaction = interaction.sort_values(by=['prediction'], ascending=False).iloc[:10]
    submission = pd.concat([submission, interaction])
    
    

print(submission)

KeyboardInterrupt: 

In [36]:
data = all_predictions.values.tolist()

In [38]:
data.sort()

In [39]:
data

[[11.0, 2.0, 0.8541661500930786],
 [11.0, 3.0, 0.10261920839548111],
 [11.0, 4.0, 0.0011345482198521495],
 [11.0, 5.0, 0.09183979034423828],
 [11.0, 6.0, 0.08217637240886688],
 [11.0, 7.0, 0.02674957364797592],
 [11.0, 8.0, 0.002078825840726495],
 [11.0, 9.0, 0.005010365042835474],
 [11.0, 10.0, 0.2797224819660187],
 [11.0, 11.0, 0.03425038978457451],
 [11.0, 12.0, 0.037443216890096664],
 [11.0, 13.0, 0.009658555500209332],
 [11.0, 14.0, 0.001498099765740335],
 [11.0, 15.0, 0.0205389317125082],
 [11.0, 16.0, 0.07583043724298477],
 [11.0, 17.0, 0.016698572784662247],
 [11.0, 18.0, 0.053634148091077805],
 [11.0, 20.0, 0.02168428897857666],
 [11.0, 21.0, 0.021613428369164467],
 [11.0, 22.0, 0.05926218628883362],
 [11.0, 23.0, 0.021898331120610237],
 [11.0, 24.0, 0.28816530108451843],
 [11.0, 25.0, 0.03770565614104271],
 [11.0, 26.0, 0.022710494697093964],
 [11.0, 27.0, 0.009626016952097416],
 [11.0, 28.0, 0.0018993897829204798],
 [11.0, 29.0, 0.12293393909931183],
 [11.0, 30.0, 0.00018559

In [40]:
users.sort()

In [41]:
users

[11,
 14,
 18,
 25,
 31,
 35,
 43,
 50,
 58,
 60,
 61,
 65,
 72,
 77,
 82,
 85,
 90,
 91,
 96,
 98,
 99,
 102,
 116,
 121,
 124,
 129,
 132,
 133,
 135,
 136,
 147,
 152,
 154,
 155,
 162,
 163,
 168,
 175,
 182,
 189,
 190,
 201,
 204,
 205,
 206,
 208,
 209,
 211,
 213,
 215,
 218,
 220,
 232,
 237,
 239,
 241,
 248,
 252,
 254,
 258,
 264,
 266,
 271,
 279,
 284,
 285,
 294,
 304,
 312,
 313,
 316,
 317,
 318,
 337,
 340,
 342,
 348,
 351,
 359,
 361,
 367,
 370,
 372,
 375,
 379,
 383,
 387,
 388,
 394,
 395,
 398,
 407,
 409,
 413,
 419,
 421,
 422,
 425,
 427,
 430,
 431,
 436,
 440,
 442,
 448,
 451,
 455,
 457,
 459,
 462,
 466,
 469,
 471,
 482,
 485,
 486,
 492,
 500,
 503,
 504,
 505,
 508,
 512,
 520,
 521,
 532,
 534,
 535,
 540,
 546,
 548,
 557,
 563,
 571,
 572,
 573,
 577,
 578,
 586,
 588,
 598,
 604,
 609,
 612,
 614,
 617,
 619,
 626,
 631,
 633,
 637,
 649,
 650,
 661,
 662,
 664,
 672,
 689,
 692,
 693,
 694,
 700,
 710,
 724,
 729,
 735,
 737,
 738,
 739,
 741,
 

In [47]:
submission = []
tmp = []
for user in users:
    while data_:
        if data_[0][0] != user:
            break
        else:
            interaction = data_.popleft()
            tmp.append(interaction)
    tmp.sort(key=lambda x: -x[2])
    submission += tmp[:10]
    tmp = []
    
print(len(submission))

313600


In [42]:
from collections import deque

In [43]:
data_ = deque(data)

In [44]:
data_

deque([[11.0, 2.0, 0.8541661500930786],
       [11.0, 3.0, 0.10261920839548111],
       [11.0, 4.0, 0.0011345482198521495],
       [11.0, 5.0, 0.09183979034423828],
       [11.0, 6.0, 0.08217637240886688],
       [11.0, 7.0, 0.02674957364797592],
       [11.0, 8.0, 0.002078825840726495],
       [11.0, 9.0, 0.005010365042835474],
       [11.0, 10.0, 0.2797224819660187],
       [11.0, 11.0, 0.03425038978457451],
       [11.0, 12.0, 0.037443216890096664],
       [11.0, 13.0, 0.009658555500209332],
       [11.0, 14.0, 0.001498099765740335],
       [11.0, 15.0, 0.0205389317125082],
       [11.0, 16.0, 0.07583043724298477],
       [11.0, 17.0, 0.016698572784662247],
       [11.0, 18.0, 0.053634148091077805],
       [11.0, 20.0, 0.02168428897857666],
       [11.0, 21.0, 0.021613428369164467],
       [11.0, 22.0, 0.05926218628883362],
       [11.0, 23.0, 0.021898331120610237],
       [11.0, 24.0, 0.28816530108451843],
       [11.0, 25.0, 0.03770565614104271],
       [11.0, 26.0, 0.022710494697

In [49]:
len(users)

31360

In [52]:
submission_bivae = pd.DataFrame(submission)

submission_bivae


Unnamed: 0,0,1,2
0,11.0,2.0,0.854166
1,11.0,32587.0,0.762114
2,11.0,4886.0,0.740395
3,11.0,8360.0,0.737080
4,11.0,7438.0,0.734858
...,...,...,...
313595,138493.0,4308.0,0.773831
313596,138493.0,110.0,0.762027
313597,138493.0,1270.0,0.732594
313598,138493.0,5349.0,0.722768


In [53]:
submission_bivae.columns = ['user', 'item', 'rating']

In [54]:
submission_bivae

Unnamed: 0,user,item,rating
0,11.0,2.0,0.854166
1,11.0,32587.0,0.762114
2,11.0,4886.0,0.740395
3,11.0,8360.0,0.737080
4,11.0,7438.0,0.734858
...,...,...,...
313595,138493.0,4308.0,0.773831
313596,138493.0,110.0,0.762027
313597,138493.0,1270.0,0.732594
313598,138493.0,5349.0,0.722768


In [56]:
submission_bivae = submission_bivae[['user', 'item']]

In [61]:
submission_bivae.to_csv('/opt/ml/level2-movie-recommendation-level2-recsys-15/임경연/CODE/Recommenders/BiVAE/submission_bivae.csv', index=False)