# Preparation stuff

## Connect to Drive

In [54]:
connect_to_drive = False

In [55]:
#Run command and authorize by popup --> other window
if connect_to_drive:
    from google.colab import drive
    drive.mount('/content/gdrive', force_remount=True)

## Install packages

In [56]:
if connect_to_drive:
    #Install FS code
    #!pip install  --upgrade --no-deps --force-reinstall git+https://github.com/federicosiciliano/easy_lightning.git@fedsic
    !pip install  --upgrade --no-deps --force-reinstall git+https://github.com/PokeResearchLab/easy_lightning.git

    !pip install pytorch_lightning

## IMPORTS

In [57]:
#Put all imports here
import numpy as np
import matplotlib.pyplot as plt
#from copy import deepcopy
#import pickle
import os
import sys
#import cv2
import torch
import csv

## Define paths

In [58]:
#every path should start from the project folder:
project_folder = "../"
if connect_to_drive:
    project_folder = "/content/gdrive/Shareddrives/<SharedDriveName>" #Name of SharedDrive folder
    #project_folder = "/content/gdrive/MyDrive/<MyDriveName>" #Name of MyDrive folder

#Config folder should contain hyperparameters configurations
cfg_folder = os.path.join(project_folder,"cfg")

#Data folder should contain raw and preprocessed data
data_folder = os.path.join(project_folder,"data")
raw_data_folder = os.path.join(data_folder,"raw")
processed_data_folder = os.path.join(data_folder,"processed")

#Source folder should contain all the (essential) source code
source_folder = os.path.join(project_folder,"src")

#The out folder should contain all outputs: models, results, plots, etc.
out_folder = os.path.join(project_folder,"out")
img_folder = os.path.join(out_folder,"img")

## Import own code

In [59]:
#To import from src:

#attach the source folder to the start of sys.path
sys.path.insert(0, project_folder)

#import from src directory
from src.module import *
from src.pruning import *

import easy_exp, easy_rec, easy_torch #easy_data

# MAIN

## Train

### Data

In [60]:
cfg = easy_exp.cfg.load_configuration("config_rec")

In [61]:
cfg["data_params"]["data_folder"] = raw_data_folder

In [62]:
#cfg["data_params"]["test_sizes"] = [cfg["data_params.dataset_params.out_seq_len.val"],cfg["data_params.dataset_params.out_seq_len.test"]]

data, maps = easy_rec.data_generation_utils.preprocess_dataset(**cfg["data_params"])

Ratings data already exists. Skip pre-processing
Filtering by minimum number of users per item: 5
Filtering by minimum number of items per user: 5
Densifying index
Splitting: leave_n_out


In [63]:
datasets = easy_rec.rec_torch.prepare_rec_datasets(data,**cfg["data_params"]["dataset_params"])

In [64]:
cfg["data_params"]["collator_params"]["num_items"] = np.max(list(maps["sid"].values()))

In [65]:
collators = easy_rec.rec_torch.prepare_rec_collators(data, **cfg["data_params"]["collator_params"])

In [66]:
loaders = easy_rec.rec_torch.prepare_rec_data_loaders(datasets, **cfg["model"]["loader_params"], collate_fn=collators)

### MODEL

In [67]:
cfg["model"]["rec_model"]["num_items"] = np.max(list(maps["sid"].values()))
cfg["model"]["rec_model"]["num_users"] = np.max(list(maps["uid"].values()))
cfg["model"]["rec_model"]["lookback"] = cfg["data_params"]["collator_params"]["lookback"]

In [68]:
#load the default SASRec module with the specified parameters
main_module = easy_rec.rec_torch.create_rec_model(**cfg["model"]["rec_model"])
#print(main_module)

Seed set to 42


In [69]:
print(main_module)

SASRec(
  (item_emb): Embedding(3417, 128, padding_idx=0)
  (pos_emb): Embedding(200, 128)
  (dropout): Dropout(p=0.2, inplace=False)
  (encoder): TransformerEncoder(
    (layers): ModuleList(
      (0): TransformerEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
        )
        (linear1): Linear(in_features=128, out_features=512, bias=True)
        (dropout): Dropout(p=0.2, inplace=False)
        (linear2): Linear(in_features=512, out_features=128, bias=True)
        (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
        (dropout1): Dropout(p=0.2, inplace=False)
        (dropout2): Dropout(p=0.2, inplace=False)
        (activation): GELU(approximate='none')
      )
    )
  )
  (last_layernorm): LayerNorm((128,), eps=1e-08, elementwise_affine=True)
)


In [70]:
print(main_module.item_emb.weight.data)

tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 1.9312,  1.0119, -1.4364,  ...,  0.5655,  0.5058,  0.2225],
        [-0.6855,  0.5636, -1.5072,  ...,  0.8541, -0.4901, -0.3595],
        ...,
        [-0.7120,  1.0650, -0.3429,  ...,  0.3673,  0.8504,  0.9029],
        [-0.6463,  0.3462, -2.2223,  ...,  1.0076, -1.2280,  0.0237],
        [ 0.4105, -1.2189,  0.2137,  ...,  0.4162, -0.4763,  1.5021]])


#### TRAINING PROCESS

In [71]:
#experiment_id = 'UPZT6uHteJMJ4bKt'

In [72]:
exp_found, experiment_id = easy_exp.exp.get_set_experiment_id(cfg)
print("Experiment already found:", exp_found, "----> The experiment id is:", experiment_id)

Experiment already found: True ----> The experiment id is: ylwoWR1P52bcZhdw


In [73]:
print(experiment_id)

ylwoWR1P52bcZhdw


In [None]:
#if exp_found: exit() #TODO: make the notebook/script stop here if the experiment is already found

In [74]:
trainer_params = easy_torch.preparation.prepare_experiment_id(cfg["model"]["trainer_params"], experiment_id)

# Prepare callbacks and logger using the prepared trainer_params
trainer_params["callbacks"] = easy_torch.preparation.prepare_callbacks(trainer_params)
trainer_params["logger"] = easy_torch.preparation.prepare_logger(trainer_params)

# Prepare the trainer using the prepared trainer_params
trainer = easy_torch.preparation.prepare_trainer(**trainer_params)

model_params = cfg["model"].copy()

model_params["loss"] = easy_torch.preparation.prepare_loss(cfg["model"]["loss"], easy_rec.losses)

# Prepare the optimizer using configuration from cfg
model_params["optimizer"] = easy_torch.preparation.prepare_optimizer(**cfg["model"]["optimizer"])

# Prepare the metrics using configuration from cfg
model_params["metrics"] = easy_torch.preparation.prepare_metrics(cfg["model"]["metrics"], easy_rec.metrics)

# Create the model using main_module, loss, and optimizer
model = easy_torch.process.create_model(main_module, **model_params)

Seed set to 42
Seed set to 42
Seed set to 42
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
Seed set to 42
Seed set to 42
Seed set to 42
Seed set to 42


In [75]:
# Prepare the emission tracker using configuration from cfg
tracker = easy_torch.preparation.prepare_emission_tracker(**cfg["model"]["emission_tracker"], experiment_id=experiment_id)

### PRUNING

In [76]:
# Load checkpoint of experiment with emb_size = 128 (no cutoff)

models_path = os.path.join(out_folder, "models/july_22")
best_model_path = os.path.join(models_path, experiment_id+'/best.ckpt')

checkpoint = torch.load(best_model_path, map_location=torch.device('cpu'))
state_dict = checkpoint['state_dict']
model.load_state_dict(state_dict)
model.eval()

print(main_module.item_emb.weight.data)

tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.0823,  0.1081,  0.0531,  ...,  0.2140,  0.5176, -0.3505],
        [-0.1262,  0.0024,  0.0386,  ...,  1.2408,  0.5861, -1.2238],
        ...,
        [-0.0541,  0.0687, -0.1329,  ..., -0.0157,  0.3472,  0.9099],
        [ 0.0047,  0.0033,  0.2578,  ...,  0.1306, -1.4768,  0.9026],
        [ 0.0158,  0.0577,  0.0502,  ..., -0.3675, -1.4877,  1.5648]])


In [77]:
print("Before pruning there are ",main_module.item_emb.weight.data[main_module.item_emb.weight.data == 0].shape[0], " zero elements.")

Before pruning there are  128  zero elements.


In [78]:
pruning = "lazy_neuron_pruning"

pruning_percentage = 50

if pruning == "weight_pruning":
    weight_pruning(main_module.item_emb, pruning_percentage)

elif pruning == "neuron_pruning":
    pruned_neurons = neuron_pruning(main_module.item_emb, pruning_percentage)
    print(sorted(pruned_neurons))

elif pruning == "lazy_neuron_pruning":
    lazy_pruned_neurons = lazy_neuron_pruning(main_module.item_emb, pruning_percentage)
    print("Lazy pruned neurons: ", lazy_pruned_neurons)

else:
    print("Incorrect pruning type!")

print("After pruning there are ",main_module.item_emb.weight.data[main_module.item_emb.weight.data == 0].shape[0], " zero elements.")
print(main_module.item_emb.weight.data)

Lazy pruned neurons:  [64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127]
After pruning there are  218752  zero elements.
tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [-0.0823,  0.1081,  0.0531,  ...,  0.0000,  0.0000,  0.0000],
        [-0.1262,  0.0024,  0.0386,  ...,  0.0000,  0.0000,  0.0000],
        ...,
        [-0.0541,  0.0687, -0.1329,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0047,  0.0033,  0.2578,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0158,  0.0577,  0.0502,  ...,  0.0000,  0.0000,  0.0000]])


### TEST

In [79]:
easy_torch.process.test_model(trainer, model, loaders, tracker=tracker)

Seed set to 42


Testing DataLoader 0: 100%|██████████| 48/48 [00:51<00:00,  0.93it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       test_F1_@10          0.0899791270494461
       test_F1_@20          0.06519614160060883
       test_F1_@5           0.11358654499053955
      test_MAP_@10          0.06697871536016464
      test_MAP_@20          0.0531315952539444
       test_MAP_@5          0.08047350496053696
      test_MRR_@10          0.1991126835346222
      test_MRR_@20          0.2124582827091217
       test_MRR_@5          0.17889349162578583
      test_NDCG_@10          0.264716774225235
      test_NDCG_@20         0.31321293115615845
      test_NDCG_@5          0.21516376733779907
   test_Precision_@10      0.048046354204416275
   test_Precision_@20    

In [80]:
# Save experiment and print the current configuration
#save_experiment_and_print_config(cfg)
easy_exp.exp.save_experiment(cfg)

# Print completion message
print("Execution completed.")
print("######################################################################")
print()

Execution completed.
######################################################################

