In [3]:
from pyro import param
import pytorch_lightning as pl
import torch
import pandas as pd
from pytorch_lightning.loggers import NeptuneLogger
from neptune.new.types import File
from sklearn.model_selection import train_test_split

from dataset import extract_users_movies_ratings_lists, TripletDataset, save_predictions

#Useful constants
number_of_users, number_of_movies = (10000, 1000)
RANDOM_STATE = 42
BATCH_SIZE = 256
DATA_DIR = '../data'

#Data source and split into val and train
data_pd = pd.read_csv(DATA_DIR+'/data_train.csv')
train_pd, val_pd = train_test_split(data_pd, train_size=0.9, random_state=RANDOM_STATE)


users_train, movies_train, ratings_train = extract_users_movies_ratings_lists(train_pd)
d_train = TripletDataset(users_train, movies_train, ratings_train)
train_dataloader = torch.utils.data.DataLoader(d_train, batch_size=BATCH_SIZE, drop_last=True, shuffle=True)

users_val, movies_val, ratings_val = extract_users_movies_ratings_lists(val_pd)
d_val= TripletDataset(users_val, movies_val, ratings_val)
val_dataloader = torch.utils.data.DataLoader(d_val, batch_size=BATCH_SIZE, drop_last=False, shuffle=False)


test_pd = pd.read_csv(DATA_DIR+'/sampleSubmission.csv')
users_test, movies_test, ratings_test = extract_users_movies_ratings_lists(test_pd)
d_test= TripletDataset(users_test, movies_test, ratings_test, is_test_dataset=True)
test_dataloader = torch.utils.data.DataLoader(d_test, batch_size=BATCH_SIZE, drop_last=False, shuffle=False)



EXPERIMENT_NAME = 'NCF_dist_exp'
DEBUG = False

proxies = {
'http': 'http://proxy.ethz.ch:3128',
'https': 'http://proxy.ethz.ch:3128',
}
neptune_logger = NeptuneLogger(
    project="TiCinesi/CIL-project", 
    api_key='eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJmMzQyZmQ3MS02OGM5LTQ2Y2EtOTEzNC03MjBjMzUyN2UzNDMifQ==',
    mode = 'debug' if DEBUG else 'async',
    name=EXPERIMENT_NAME,
    tags=[],  # optional
    proxies=proxies
)

from model import NCFDistribution

params =  {
    'embedding_size': 39, 'hidden_size': 14, 
    'alpha': 0.1812479548064849, 
    'sigma_prior': 0.2286523513862455, 
    'distance_0_to_3': 0.33728622361587846, 
    'distance_3_to_2': 0.986891143302744, 
    'distance_2_to_1': 0.6932965943259499, 
    'distance_0_to_4': 0.9201001618073893, 
    'distance_4_to_5': 1.4031427821242537, 
    'p_dropout': 0.18573958557776177, 
    'scaling': 2.687106607498346
}
model = NCFDistribution.load_from_checkpoint('./epoch=19-step=82740.ckpt', **params)



In [4]:
trainer = pl.Trainer(
        max_epochs=20, 
        accelerator="gpu" if torch.cuda.is_available() else None,
        devices=1, 
        log_every_n_steps=1, 
        detect_anomaly=True, 
        track_grad_norm=2,
        )

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


In [5]:
predictions = trainer.predict(model, dataloaders=test_dataloader)

Missing logger folder: /home/gio/kDrive/ETH/ETH_code/CIL-project/models/NCF_distribution_exp/lightning_logs
  rank_zero_warn(


Predicting DataLoader 0: 100%|██████████| 4598/4598 [00:42<00:00, 107.55it/s]


In [7]:
def save_predictions(res_path, predictions):
    test_pd = pd.read_csv(DATA_DIR+'/sampleSubmission.csv')
   
    test_pd = test_pd.astype({'Prediction': 'float'})

    test_pd.iloc[:, 1] = predictions

 
    test_pd.to_csv(res_path, index=False, float_format='%.3f')


In [8]:
#trainer.fit(model, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader)

#predictions = trainer.predict(model, dataloaders=test_dataloader)

yhat = torch.concat(predictions)

save_predictions(f'{EXPERIMENT_NAME}-predictedSubmission.csv', yhat)
neptune_logger.experiment['results/end_model'].upload(File(f'{EXPERIMENT_NAME}-predictedSubmission.csv'))

https://app.neptune.ai/TiCinesi/CIL-project/e/CIL1-14


Info (NVML): NVML Shared Library Not Found. GPU usage metrics may not be reported. For more information, see https://docs.neptune.ai/you-should-know/what-can-you-log-and-display#hardware-consumption


Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#.stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.
