# Preparation stuff

## Connect to Drive

In [17]:
connect_to_drive = False

In [18]:
#Run command and authorize by popup --> other window
if connect_to_drive:
    from google.colab import drive
    drive.mount('/content/gdrive', force_remount=True)

## Install packages

In [19]:
if connect_to_drive:
    #Install FS code
    #!pip install  --upgrade --no-deps --force-reinstall git+https://github.com/federicosiciliano/easy_lightning.git@fedsic
    !pip install  --upgrade --no-deps --force-reinstall git+https://github.com/PokeResearchLab/easy_lightning.git

    !pip install pytorch_lightning

## IMPORTS

In [4]:
#Put all imports here
import numpy as np
import matplotlib.pyplot as plt
#from copy import deepcopy
#import pickle
import os
import sys
#import cv2
import torch
import csv

## Define paths

In [5]:
#every path should start from the project folder:
project_folder = "../"
if connect_to_drive:
    project_folder = "/content/gdrive/Shareddrives/<SharedDriveName>" #Name of SharedDrive folder
    #project_folder = "/content/gdrive/MyDrive/<MyDriveName>" #Name of MyDrive folder

#Config folder should contain hyperparameters configurations
cfg_folder = os.path.join(project_folder,"cfg")

#Data folder should contain raw and preprocessed data
data_folder = os.path.join(project_folder,"data")
raw_data_folder = os.path.join(data_folder,"raw")
processed_data_folder = os.path.join(data_folder,"processed")

#Source folder should contain all the (essential) source code
source_folder = os.path.join(project_folder,"src")

#The out folder should contain all outputs: models, results, plots, etc.
out_folder = os.path.join(project_folder,"out")
img_folder = os.path.join(out_folder,"img")

## Import own code

In [6]:
#To import from src:

#attach the source folder to the start of sys.path
sys.path.insert(0, project_folder)

#import from src directory
from src.module import *
from src.pruning import *

import easy_exp, easy_rec, easy_torch #easy_data

# MAIN

## Train

### Data

In [7]:
cfg = easy_exp.cfg.load_configuration("config_rec")

In [8]:
cfg["data_params"]["data_folder"] = raw_data_folder

In [9]:
#cfg["data_params"]["test_sizes"] = [cfg["data_params.dataset_params.out_seq_len.val"],cfg["data_params.dataset_params.out_seq_len.test"]]

data, maps = easy_rec.data_generation_utils.preprocess_dataset(**cfg["data_params"])

Ratings data already exists. Skip pre-processing
Filtering by minimum number of users per item: 5
Filtering by minimum number of items per user: 5
Densifying index
Splitting: leave_n_out


In [10]:
datasets = easy_rec.rec_torch.prepare_rec_datasets(data,**cfg["data_params"]["dataset_params"])

In [11]:
cfg["data_params"]["collator_params"]["num_items"] = np.max(list(maps["sid"].values()))

In [12]:
collators = easy_rec.rec_torch.prepare_rec_collators(**cfg["data_params"]["collator_params"])

In [13]:
loaders = easy_rec.rec_torch.prepare_rec_data_loaders(datasets, **cfg["model"]["loader_params"], collate_fn=collators)

### MODEL

In [14]:
cfg["model"]["rec_model"]["num_items"] = np.max(list(maps["sid"].values()))
cfg["model"]["rec_model"]["num_users"] = np.max(list(maps["uid"].values()))
cfg["model"]["rec_model"]["lookback"] = cfg["data_params"]["collator_params"]["lookback"]

In [15]:
#load the default SASRec module with the specified parameters
main_module = easy_rec.rec_torch.create_rec_model(**cfg["model"]["rec_model"])
#print(main_module)

Seed set to 42


#### TRAINING PROCESS

In [16]:
exp_found, experiment_id = easy_exp.exp.get_set_experiment_id(cfg)
print("Experiment already found:", exp_found, "----> The experiment id is:", experiment_id)

Experiment already found: True ----> The experiment id is: zfmgG7mdl83IVwwD


In [20]:
trainer_params = easy_torch.preparation.prepare_experiment_id(cfg["model"]["trainer_params"], experiment_id)

# Prepare callbacks and logger using the prepared trainer_params
trainer_params["callbacks"] = easy_torch.preparation.prepare_callbacks(trainer_params)
trainer_params["logger"] = easy_torch.preparation.prepare_logger(trainer_params)

# Prepare the trainer using the prepared trainer_params
trainer = easy_torch.preparation.prepare_trainer(**trainer_params)

model_params = cfg["model"].copy()

model_params["loss"] = easy_torch.preparation.prepare_loss(cfg["model"]["loss"], easy_rec.losses)

# Prepare the optimizer using configuration from cfg
model_params["optimizer"] = easy_torch.preparation.prepare_optimizer(**cfg["model"]["optimizer"])

# Prepare the metrics using configuration from cfg
model_params["metrics"] = easy_torch.preparation.prepare_metrics(cfg["model"]["metrics"], easy_rec.metrics)

# Create the model using main_module, loss, and optimizer
model = easy_torch.process.create_model(main_module, **model_params)

Seed set to 42
Seed set to 42
Seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Seed set to 42
Seed set to 42
Seed set to 42
Seed set to 42
Seed set to 42
Seed set to 42
Seed set to 42
Seed set to 42
Seed set to 42
Seed set to 42
Seed set to 42
Seed set to 42
Seed set to 42
Seed set to 42
Seed set to 42
Seed set to 42
Seed set to 42
Seed set to 42
Seed set to 42
Seed set to 42
Seed set to 42
Seed set to 42
Seed set to 42
Seed set to 42
Seed set to 42
Seed set to 42
Seed set to 42


### PRUNING

In [21]:
# Load checkpoint of experiment with emb_size = 64

models_path = os.path.join(out_folder, "models/amz_beauty")
best_model_path = os.path.join(models_path, experiment_id+'/best.ckpt')

checkpoint = torch.load(best_model_path, map_location=torch.device('cpu'))
state_dict = checkpoint['state_dict']
model.load_state_dict(state_dict)
model.eval()

#print(main_module.item_emb.weight.data)

  checkpoint = torch.load(best_model_path, map_location=torch.device('cpu'))


BaseNN(
  (main_module): SASRec(
    (item_emb): Embedding(1077, 64, padding_idx=0)
    (pos_emb): Embedding(200, 64)
    (dropout): Dropout(p=0.2, inplace=False)
    (encoder): TransformerEncoder(
      (layers): ModuleList(
        (0): TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=64, out_features=64, bias=True)
          )
          (linear1): Linear(in_features=64, out_features=256, bias=True)
          (dropout): Dropout(p=0.2, inplace=False)
          (linear2): Linear(in_features=256, out_features=64, bias=True)
          (norm1): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropout(p=0.2, inplace=False)
          (dropout2): Dropout(p=0.2, inplace=False)
          (activation): GELU(approximate='none')
        )
      )
    )
    (last_layernorm): LayerNorm((64,), eps=1e-08, elementwise_a

In [None]:
#print("Before pruning there are ",main_module.item_emb.weight.data[main_module.item_emb.weight.data == 0].shape[0], " zero elements.")

In [None]:
pruning = "no"

pruning_percentage = 50

if pruning == "weight_pruning":
    weight_pruning(main_module.item_emb, pruning_percentage)

elif pruning == "neuron_pruning":
    pruned_neurons = neuron_pruning(main_module.item_emb, pruning_percentage)
    print(sorted(pruned_neurons))

elif pruning == "lazy_neuron_pruning":
    lazy_pruned_neurons = lazy_neuron_pruning(main_module.item_emb, pruning_percentage)
    print("Lazy pruned neurons: ", lazy_pruned_neurons)

else:
    print("Incorrect pruning type!")

#print("After pruning there are ",main_module.item_emb.weight.data[main_module.item_emb.weight.data == 0].shape[0], " zero elements.")
#print(main_module.item_emb.weight.data)

### TEST

In [22]:
easy_torch.process.test_model(trainer, model, loaders)

Seed set to 42
You are using a CUDA device ('NVIDIA GeForce RTX 4090') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]


Testing: |          | 0/? [00:00<?, ?it/s]

In [None]:
# Save experiment and print the current configuration
#save_experiment_and_print_config(cfg)
easy_exp.exp.save_experiment(cfg)

# Print completion message
print("Execution completed.")
print("######################################################################")
print()