In [38]:
import sys
import os
import torch
import cornac
import papermill as pm
import scrapbook as sb
import pandas as pd
from recommenders.datasets import movielens
from recommenders.datasets.python_splitters import python_random_split
from recommenders.evaluation.python_evaluation import map_at_k, ndcg_at_k, precision_at_k, recall_at_k
from recommenders.models.cornac.cornac_utils import predict_ranking
from recommenders.utils.timer import Timer
from recommenders.utils.constants import SEED


print("System version: {}".format(sys.version))
print("PyTorch version: {}".format(torch.__version__))
print("Cornac version: {}".format(cornac.__version__))

System version: 3.8.5 (default, Sep  4 2020, 07:30:14) 
[GCC 7.3.0]
PyTorch version: 1.10.2+cu102
Cornac version: 1.14.2


In [79]:
#SEED
SEED=0
# top k items to recommend
TOP_K = 10

# Model parameters
LATENT_DIM = 300
ENCODER_DIMS = [300]
ACT_FUNC = "tanh"
LIKELIHOOD = "pois"
NUM_EPOCHS = 100
BATCH_SIZE = 512
LEARNING_RATE = 0.0005

In [73]:
train = pd.read_csv('/opt/ml/input/data/train/train_ratings.csv')

In [74]:
train['rating'] = 1
train.drop(columns='time')

Unnamed: 0,user,item,rating
0,11,4643,1
1,11,170,1
2,11,531,1
3,11,616,1
4,11,2140,1
...,...,...,...
5154466,138493,44022,1
5154467,138493,4958,1
5154468,138493,68319,1
5154469,138493,40819,1


In [75]:
train_set = cornac.data.Dataset.from_uir(train.itertuples(index=False), seed=SEED)

In [76]:
print('Number of users: {}'.format(train_set.num_users))
print('Number of items: {}'.format(train_set.num_items))

Number of users: 31360
Number of items: 6807


In [None]:
model = cornac.models.vaecf.recom_vaecf.VAECF(name='VAECF', k=100, autoencoder_structure=[400], act_fn='sigmoid', likelihood='mult', 
                                              n_epochs=50, batch_size=20000, learning_rate=0.001, beta=1.0, 
                                              trainable=True, verbose=True, seed=SEED, use_gpu=True)

In [None]:
model.fit(train_set)

In [124]:
with Timer() as t:
    all_predictions = predict_ranking(model, train, usercol='user', itemcol='item', remove_seen=True)
print("Took {} seconds for prediction.".format(t))

Took 251.6347 seconds for prediction.


In [125]:
pre_top10 = all_predictions.sort_values(by='prediction',ascending=False).groupby("user").head(10)

In [126]:
pre_sort_top10 = pre_top10.sort_values(by=['user','prediction'],ascending=[True,False])

In [127]:
pre_sort_top10 = pre_sort_top10.reset_index(drop=True)

submission = pd.DataFrame()
submission['user'] = pre_sort_top10['user']
submission['item'] = pre_sort_top10['item']

In [128]:
submission.to_csv('/opt/ml/input/submission/bie.csv', index=False)