In [1]:
import sys
import os
import torch
import cornac
import papermill as pm
import scrapbook as sb
import pandas as pd
from recommenders.datasets import movielens
from recommenders.datasets.python_splitters import python_random_split
from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k
from recommenders.models.cornac.cornac_utils import predict_ranking
from recommenders.utils.timer import Timer
from recommenders.utils.constants import SEED

print("System version: {}".format(sys.version))
print("PyTorch version: {}".format(torch.__version__))
print("Cornac version: {}".format(cornac.__version__))

System version: 3.8.13 (default, Mar 28 2022, 11:38:47) 
[GCC 7.5.0]
PyTorch version: 1.7.1
Cornac version: 1.14.2


In [2]:
# Select MovieLens data size: 100k, 1m, 10m, or 20m
MOVIELENS_DATA_SIZE = '100k'

# top k items to recommend
TOP_K = 10

# Model parameters
LATENT_DIM = 200
ENCODER_DIMS = [100]
ACT_FUNC = "tanh"
LIKELIHOOD = "pois"
NUM_EPOCHS = 700
BATCH_SIZE = 32
LEARNING_RATE = 0.0001

In [3]:
train = pd.read_csv("/opt/ml/input/data/train/train_ratings.csv")

In [4]:
train = train[['user', 'item']]

In [5]:
train.columns = ['userID', 'itemID']

In [6]:
train['rating'] = 1
train.head()

Unnamed: 0,userID,itemID,rating
0,11,4643,1
1,11,170,1
2,11,531,1
3,11,616,1
4,11,2140,1


In [7]:
# train, test = python_random_split(data, 0.75)

In [8]:
train_set = cornac.data.Dataset.from_uir(train.itertuples(index=False), seed=SEED)

print('Number of users: {}'.format(train_set.num_users))
print('Number of items: {}'.format(train_set.num_items))

Number of users: 31360
Number of items: 6807


In [9]:
bivae = cornac.models.BiVAECF(
    k=LATENT_DIM,
    encoder_structure=ENCODER_DIMS,
    act_fn=ACT_FUNC,
    likelihood=LIKELIHOOD,
    n_epochs=NUM_EPOCHS,
    batch_size=BATCH_SIZE,
    learning_rate=LEARNING_RATE,
    seed=SEED,
    use_gpu=torch.cuda.is_available(),
    verbose=True
)

with Timer() as t:
    bivae.fit(train_set)
print("Took {} seconds for training.".format(t))

  0%|          | 0/700 [00:00<?, ?it/s]

Took 9972.9541 seconds for training.


In [10]:
with Timer() as t:
    all_predictions = predict_ranking(bivae, train, usercol='userID', itemcol='itemID', remove_seen=True)
print("Took {} seconds for prediction.".format(t))

Took 236.6117 seconds for prediction.


In [14]:
eval_map = map_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_ndcg = ndcg_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_precision = precision_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)
eval_recall = recall_at_k(test, all_predictions, col_prediction='prediction', k=TOP_K)

print("MAP:\t%f" % eval_map,
      "NDCG:\t%f" % eval_ndcg,
      "Precision@K:\t%f" % eval_precision,
      "Recall@K:\t%f" % eval_recall, sep='\n')

MAP:	0.000000
NDCG:	0.000000
Precision@K:	0.000000
Recall@K:	0.000000


In [16]:
all_predictions[all_predictions['userID'] == 69029]

Unnamed: 0,userID,itemID,prediction
1288618,69029,5902,0.473038
1288619,69029,4896,0.158540
1288620,69029,253,0.157383
1288621,69029,924,0.595679
1288622,69029,33166,0.285728
...,...,...,...
1295362,69029,49276,0.000008
1295363,69029,2170,0.000018
1295364,69029,43,0.000241
1295365,69029,27744,0.000087


In [11]:
users = list(train['userID'].unique())

In [12]:
data = all_predictions.values.tolist()

In [13]:
data.sort()

In [14]:
data

[[11.0, 2.0, 0.5212566256523132],
 [11.0, 3.0, 0.010587792843580246],
 [11.0, 4.0, 1.7008114809868857e-05],
 [11.0, 5.0, 0.019548503682017326],
 [11.0, 6.0, 0.11548358201980591],
 [11.0, 7.0, 0.007277555298060179],
 [11.0, 8.0, 0.001257253112271428],
 [11.0, 9.0, 0.0005171067896299064],
 [11.0, 10.0, 0.18717628717422485],
 [11.0, 11.0, 0.015407354570925236],
 [11.0, 12.0, 0.014151708222925663],
 [11.0, 13.0, 0.03760424256324768],
 [11.0, 14.0, 0.0009793441276997328],
 [11.0, 15.0, 0.015152473002672195],
 [11.0, 16.0, 0.06264356523752213],
 [11.0, 17.0, 0.013868962414562702],
 [11.0, 18.0, 0.04707712680101395],
 [11.0, 20.0, 0.007337118498980999],
 [11.0, 21.0, 0.023000363260507584],
 [11.0, 22.0, 0.03503836318850517],
 [11.0, 23.0, 0.01876095123589039],
 [11.0, 24.0, 0.42295077443122864],
 [11.0, 25.0, 0.024736598134040833],
 [11.0, 26.0, 0.0024009221233427525],
 [11.0, 27.0, 0.00042748430860228837],
 [11.0, 28.0, 6.123909406596795e-05],
 [11.0, 29.0, 0.06951075792312622],
 [11.0, 30.0

In [15]:
users.sort()

In [16]:
users

[11,
 14,
 18,
 25,
 31,
 35,
 43,
 50,
 58,
 60,
 61,
 65,
 72,
 77,
 82,
 85,
 90,
 91,
 96,
 98,
 99,
 102,
 116,
 121,
 124,
 129,
 132,
 133,
 135,
 136,
 147,
 152,
 154,
 155,
 162,
 163,
 168,
 175,
 182,
 189,
 190,
 201,
 204,
 205,
 206,
 208,
 209,
 211,
 213,
 215,
 218,
 220,
 232,
 237,
 239,
 241,
 248,
 252,
 254,
 258,
 264,
 266,
 271,
 279,
 284,
 285,
 294,
 304,
 312,
 313,
 316,
 317,
 318,
 337,
 340,
 342,
 348,
 351,
 359,
 361,
 367,
 370,
 372,
 375,
 379,
 383,
 387,
 388,
 394,
 395,
 398,
 407,
 409,
 413,
 419,
 421,
 422,
 425,
 427,
 430,
 431,
 436,
 440,
 442,
 448,
 451,
 455,
 457,
 459,
 462,
 466,
 469,
 471,
 482,
 485,
 486,
 492,
 500,
 503,
 504,
 505,
 508,
 512,
 520,
 521,
 532,
 534,
 535,
 540,
 546,
 548,
 557,
 563,
 571,
 572,
 573,
 577,
 578,
 586,
 588,
 598,
 604,
 609,
 612,
 614,
 617,
 619,
 626,
 631,
 633,
 637,
 649,
 650,
 661,
 662,
 664,
 672,
 689,
 692,
 693,
 694,
 700,
 710,
 724,
 729,
 735,
 737,
 738,
 739,
 741,
 

In [17]:
from collections import deque

In [18]:
data_ = deque(data)

In [19]:
submission = []
tmp = []
for user in users:
    while data_:
        if data_[0][0] != user:
            break
        else:
            interaction = data_.popleft()
            tmp.append(interaction)
    tmp.sort(key=lambda x: -x[2])
    submission += tmp[:10]
    tmp = []
    
print(len(submission))

313600


In [49]:
len(users)

31360

In [20]:
submission_bivae = pd.DataFrame(submission)

submission_bivae


Unnamed: 0,0,1,2
0,11.0,37386.0,0.892994
1,11.0,48780.0,0.805969
2,11.0,3156.0,0.793221
3,11.0,8861.0,0.786100
4,11.0,58025.0,0.778887
...,...,...,...
313595,138493.0,33615.0,0.658024
313596,138493.0,2712.0,0.644426
313597,138493.0,4370.0,0.638131
313598,138493.0,6934.0,0.635799


In [21]:
submission_bivae.columns = ['user', 'item', 'rating']

In [22]:
submission_bivae

Unnamed: 0,user,item,rating
0,11.0,37386.0,0.892994
1,11.0,48780.0,0.805969
2,11.0,3156.0,0.793221
3,11.0,8861.0,0.786100
4,11.0,58025.0,0.778887
...,...,...,...
313595,138493.0,33615.0,0.658024
313596,138493.0,2712.0,0.644426
313597,138493.0,4370.0,0.638131
313598,138493.0,6934.0,0.635799


In [23]:
submission_bivae = submission_bivae[['user', 'item']]

In [24]:
submission_bivae.to_csv('/opt/ml/level2-movie-recommendation-level2-recsys-15/임경연/CODE/Recommenders/BiVAE/submission_bivae_4.csv', index=False)