# Preparation stuff

## Connect to Drive

In [None]:
connect_to_drive = False

In [None]:
#Run command and authorize by popup --> other window
if connect_to_drive:
    from google.colab import drive
    drive.mount('/content/gdrive', force_remount=True)

## Install packages

In [None]:
if connect_to_drive:
    #Install FS code
    !pip install  --upgrade --no-deps --force-reinstall git+https://github.com/federicosiciliano/easy_lightning.git@fedsic

    !pip install pytorch_lightning

## IMPORTS

In [None]:
#Put all imports here
import numpy as np
import matplotlib.pyplot as plt
from copy import deepcopy
#import pickle
import os
import sys
#import cv2
import csv
import torch

## Define paths

In [None]:
#every path should start from the project folder:
project_folder = "../"
if connect_to_drive:
    project_folder = "/content/gdrive/Shareddrives/<SharedDriveName>" #Name of SharedDrive folder
    #project_folder = "/content/gdrive/MyDrive/<MyDriveName>" #Name of MyDrive folder

#Config folder should contain hyperparameters configurations
cfg_folder = os.path.join(project_folder,"cfg")

#Data folder should contain raw and preprocessed data
data_folder = os.path.join(project_folder,"data")
raw_data_folder = os.path.join(data_folder,"raw")
processed_data_folder = os.path.join(data_folder,"processed")

#Source folder should contain all the (essential) source code
source_folder = os.path.join(project_folder,"src")

#The out folder should contain all outputs: models, results, plots, etc.
out_folder = os.path.join(project_folder,"out")
img_folder = os.path.join(out_folder,"img")

## Import own code

In [None]:
#To import from src:

#attach the source folder to the start of sys.path
sys.path.insert(0, project_folder)

#import from src directory
# from src import ??? as additional_module
import easy_rec as additional_module #REMOVE THIS LINE IF IMPORTING OWN ADDITIONAL MODULE

import easy_exp, easy_rec, easy_torch #easy_data

# MAIN

## Train

### Data

In [None]:
cfg = easy_exp.cfg.load_configuration("config_rec")

In [None]:
#cfg["data_params"]["test_sizes"] = [cfg["data_params.dataset_params.out_seq_len.val"],cfg["data_params.dataset_params.out_seq_len.test"]]

data_params = cfg["data_params"].copy()
data_params["data_folder"] = raw_data_folder

data, maps = easy_rec.data_generation_utils.preprocess_dataset(**data_params)

In [None]:
#Save user and item mappings
# TODO: check
with open(os.path.join(processed_data_folder,"user_map.csv"), "w") as f_user:
    w = csv.writer(f_user)
    w.writerows(maps['uid'].items())

with open(os.path.join(processed_data_folder,"item_map.csv"), "w") as f_item:
    w = csv.writer(f_item)
    w.writerows(maps['sid'].items())

In [None]:
datasets = easy_rec.rec_torch.prepare_rec_datasets(data,**cfg["data_params"]["dataset_params"])

In [None]:
collator_params = cfg["data_params"]["collator_params"].copy()

In [None]:
collator_params["num_items"] = np.max(list(maps["sid"].values()))

In [None]:
app = collator_params.get("negatives_distribution",None)
if app is not None:
    if app == "popularity":
        collator_params["negatives_distribution"] = easy_rec.data_generation_utils.get_popularity_items(datasets["train"], collator_params["num_items"])
    elif app not in ["uniform"]:
        raise ValueError("Invalid negatives distribution")

In [None]:
collators = easy_rec.rec_torch.prepare_rec_collators(data, **collator_params)

In [None]:
loaders = easy_rec.rec_torch.prepare_rec_data_loaders(datasets, **cfg["model"]["loader_params"], collate_fn=collators)

In [None]:
rec_model_params = cfg["model"]["rec_model"].copy()
rec_model_params["num_items"] = np.max(list(maps["sid"].values()))
rec_model_params["num_users"] = np.max(list(maps["uid"].values()))
rec_model_params["lookback"] = cfg["data_params"]["collator_params"]["lookback"]

In [None]:
main_module = easy_rec.rec_torch.create_rec_model(**rec_model_params)

In [None]:
exp_found, experiment_id = easy_exp.exp.get_set_experiment_id(cfg)
print("Experiment already found:", exp_found, "----> The experiment id is:", experiment_id)

In [None]:
# # Find "original" implementation:
# # ...

# keys_to_change = {"model.rec_model.seed": 42}
# orig_cfg = deepcopy(cfg)
# for k,v in keys_to_change.items():
#     orig_cfg[k] = 42

# orig_exp_found, orig_experiment_id = easy_exp.exp.get_experiment_id(orig_cfg)
# print("Experiment already found:", orig_exp_found, "----> The experiment id is:", orig_experiment_id)

# Caricare modello originale (last o best) e fare predizione...
# Mettere la predizione dentro metrica RLS... --> prossime celle

In [None]:
#if exp_found: exit() #TODO: make the notebook/script stop here if the experiment is already found

In [None]:
trainer_params = easy_torch.preparation.prepare_experiment_id(cfg["model"]["trainer_params"], experiment_id)

# Prepare callbacks and logger using the prepared trainer_params
trainer_params["callbacks"] = easy_torch.preparation.prepare_callbacks(trainer_params)
trainer_params["logger"] = easy_torch.preparation.prepare_logger(trainer_params)

# Prepare the trainer using the prepared trainer_params
trainer = easy_torch.preparation.prepare_trainer(**trainer_params)

model_params = cfg["model"].copy()

model_params["loss"] = easy_torch.preparation.prepare_loss(cfg["model"]["loss"], additional_module.losses)

# Prepare the optimizer using configuration from cfg
model_params["optimizer"] = easy_torch.preparation.prepare_optimizer(**cfg["model"]["optimizer"])

# Prepare the metrics using configuration from cfg
model_params["metrics"] = easy_torch.preparation.prepare_metrics(cfg["model"]["metrics"], additional_module.metrics)

# Create the model using main_module, loss, and optimizer
model = easy_torch.process.create_model(main_module, **model_params)

In [None]:
# Prepare the emission tracker using configuration from cfg
#tracker = easy_torch.preparation.prepare_emission_tracker(**cfg["model"]["emission_tracker"], experiment_id=experiment_id)

In [None]:
# Prepare the flops profiler using configuration from cfg
#profiler = easy_torch.preparation.prepare_flops_profiler(model=model, **cfg["model"]["flops_profiler"], experiment_id=experiment_id)

### Train

In [None]:
easy_torch.process.test_model(trainer, model, loaders, test_key=["train","val","test"]) #, tracker=tracker, profiler=profiler)

In [None]:
# Train the model using the prepared trainer, model, and data loaders
easy_torch.process.train_model(trainer, model, loaders, val_key=["val","test"]) #tracker=tracker, profiler=profiler, 

In [None]:
easy_torch.process.test_model(trainer, model, loaders) #, tracker=tracker, profiler=profiler)

In [None]:
# Save experiment and print the current configuration
#save_experiment_and_print_config(cfg)
easy_exp.exp.save_experiment(cfg)

# Print completion message
print("Execution completed.")
print("######################################################################")
print()