In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from dataset import extract_users_movies_ratings_lists, TripletDataset, save_predictions
import torch 

#Useful constants
number_of_users, number_of_movies = (10000, 1000)
RANDOM_STATE = 58
DATA_DIR = '../data'
EXPERIMENT_NAME = 'SVDpp'
N_TRIALS = 10

data_pd = pd.read_csv(DATA_DIR+'/data_train.csv')
train_pd, val_pd = train_test_split(data_pd, train_size=0.9, random_state=RANDOM_STATE)


import time
import pandas as pd
from libreco.data import split_by_ratio_chrono, DatasetPure
from libreco.algorithms import SVDpp
# remove unnecessary tensorflow logging
import os
import tensorflow as tf
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
os.environ["KMP_WARNINGS"] = "FALSE"
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)


users_train, movies_train, ratings_train = extract_users_movies_ratings_lists(train_pd)
users_val, movies_val, ratings_val = extract_users_movies_ratings_lists(val_pd)

test_pd = pd.read_csv(DATA_DIR+'/sampleSubmission.csv')
users_test, movies_test, ratings_test = extract_users_movies_ratings_lists(test_pd)

train = pd.DataFrame({'user': users_train, 'item': movies_train, 'label': ratings_train})
val = pd.DataFrame({'user': users_val, 'item': movies_val, 'label': ratings_val})

train_data, data_info = DatasetPure.build_trainset(train)
eval_data = DatasetPure.build_evalset(val)



def prepate_model(emb_size):
    with tf.compat.v1.variable_scope(f'model_{emb_size}'):
        svdpp = SVDpp(task="rating", data_info=data_info, embed_size=emb_size,
                        n_epochs=1, lr=0.001, reg=None, batch_size=256)
        svdpp.fit(train_data, verbose=2, eval_data=eval_data,
                    metrics=["rmse", "mae", "r2"])
        yhat = svdpp.predict(user=users_val, item=movies_val)
        return svdpp, yhat

start = 5
end = 6

models = []
val_yhat = []
for i in range(start, end+1):
    model, yhat = prepate_model(i)
    models.append(model)
    val_yhat.append(yhat)


2022-07-15 15:35:56.574144: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2022-07-15 15:35:56.574172: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


Instructions for updating:
non-resource variables are not supported in the long term


2022-07-15 15:36:08.146866: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2022-07-15 15:36:08.148417: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-07-15 15:36:08.148474: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-07-15 15:36:08.148505: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (fedora): /proc/driver/nvidia/version does not exist


Training start time: [35m2022-07-15 15:36:08[0m


2022-07-15 15:36:08.894473: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:354] MLIR V1 optimization pass is not enabled
train: 100%|██████████| 4138/4138 [07:42<00:00,  8.96it/s]


Epoch 1 elapsed: 462.356s
	 [32mtrain_loss: 1.0385[0m


eval_pred: 100%|██████████| 15/15 [00:00<00:00, 563.69it/s]


	 eval rmse: 0.9999
	 eval mae: 0.8072
	 eval r2: 0.1994
Training start time: [35m2022-07-15 15:43:52[0m


train: 100%|██████████| 4138/4138 [08:11<00:00,  8.41it/s]


Epoch 1 elapsed: 492.196s
	 [32mtrain_loss: 1.038[0m


eval_pred: 100%|██████████| 15/15 [00:00<00:00, 615.22it/s]


	 eval rmse: 0.9994
	 eval mae: 0.8056
	 eval r2: 0.2002


In [17]:

val_base_model = np.column_stack(val_yhat)
import scipy
def combine_models(mu, sigma, yhat):
    coeff = np.linspace(start, end, num=(end-start+1))
    coeff = scipy.stats.norm.pdf(coeff, loc=mu, scale=sigma)
    coeff = coeff / coeff.sum()

    return np.matmul(yhat, coeff)


def run_trial(trial):
    mu = trial.suggest_float('mu', start, end)
    sigma = trial.suggest_float('sigma', 1e-5, 1000)
    yhat = combine_models(mu, sigma, val_base_model)
    print(yhat)
    return np.sqrt(np.mean((yhat-ratings_val)**2))



from optuna_single_gpu import run_optuna
best_params = run_optuna(run_trial, EXPERIMENT_NAME, N_TRIALS)


save_predictions(f'{EXPERIMENT_NAME}-predictedSubmission.csv', yhat)

[32m[I 2022-07-15 15:57:27,304][0m A new study created in memory with name: SVDpp[0m
[32m[I 2022-07-15 15:57:27,318][0m Trial 0 finished with value: 0.9984683365160986 and parameters: {'mu': 5.89048638622441, 'sigma': 491.25267091210765}. Best is trial 0 with value: 0.9984683365160986.[0m
[32m[I 2022-07-15 15:57:27,324][0m Trial 1 finished with value: 0.9984682693814392 and parameters: {'mu': 5.759589004970345, 'sigma': 21.568542029723456}. Best is trial 1 with value: 0.9984682693814392.[0m
[32m[I 2022-07-15 15:57:27,328][0m Trial 2 finished with value: 0.99846833655756 and parameters: {'mu': 5.951834660233731, 'sigma': 595.3182084320346}. Best is trial 1 with value: 0.9984682693814392.[0m
[32m[I 2022-07-15 15:57:27,332][0m Trial 3 finished with value: 0.9984683367654725 and parameters: {'mu': 5.181331518830458, 'sigma': 845.4362208777652}. Best is trial 1 with value: 0.9984682693814392.[0m
[32m[I 2022-07-15 15:57:27,337][0m Trial 4 finished with value: 0.9984683379728

[3.2137366  3.42360348 3.95772928 ... 3.67357856 3.89950097 4.06745813]
[3.21371922 3.42358406 3.95770774 ... 3.67355669 3.89950234 4.06745009]
[3.21373661 3.42360349 3.95772929 ... 3.67357857 3.89950097 4.06745813]
[3.21373667 3.42360355 3.95772936 ... 3.67357864 3.89950096 4.06745816]
[3.21373698 3.4236039  3.95772974 ... 3.67357903 3.89950094 4.0674583 ]
[3.21373663 3.42360351 3.95772931 ... 3.67357859 3.89950097 4.06745814]
[3.21373677 3.42360366 3.95772948 ... 3.67357876 3.89950096 4.0674582 ]
[3.21373664 3.42360352 3.95772932 ... 3.6735786  3.89950097 4.06745815]
[3.21373663 3.42360351 3.95772932 ... 3.6735786  3.89950097 4.06745814]
[3.21373667 3.42360355 3.95772936 ... 3.67357864 3.89950097 4.06745816]


FileNotFoundError: [Errno 2] No such file or directory: '/cluster/scratch/piattigi/CIL/res_optuna/SVDpp/SVDpp-study.pkl'