# Training

In [1]:
import sys, os
sys.path.insert(0, os.path.dirname(os.path.abspath('..')))

In [2]:
import numpy as np
import torch
import pytorch_lightning as pl
import pandas as pd

from torch import nn

from ara.src.datasets import RatingDataset
from ara.src.models import AGREELikeWeighingStaticEmbeds, AGREELikeWighingWithEncoder
from ara.src.models import MoSANLikeWeighing

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print(device)

  from .autonotebook import tqdm as notebook_tqdm


cpu


## Synthetic Movie Dataset

In [3]:
train_dataloader = torch.utils.data.DataLoader(RatingDataset('../data/synthetic_dataset/train.csv'), batch_size=1024)
val_dataloader   = torch.utils.data.DataLoader(RatingDataset('../data/synthetic_dataset/val.csv'), batch_size=1024)
test_dataloader  = torch.utils.data.DataLoader(RatingDataset('../data/synthetic_dataset/test.csv'), batch_size=1024)

group_infos = pd.read_pickle('../data/synthetic_dataset/raw/group_info.pkl')
user_ratings = torch.tensor(np.load('../data/synthetic_dataset/raw/user_ratings.npy')).float()

In [4]:
def train(model, name, version, train_dataloader, val_dataloader):
    checkpoint = pl.callbacks.ModelCheckpoint(dirpath='../logs/saved_models/' + name, filename='{val_loss:.5f}', monitor='val_loss')

    early_stopping = pl.callbacks.early_stopping.EarlyStopping(monitor='val_loss', mode="min", patience=2, min_delta=0.001)

    lr_monitor = pl.callbacks.LearningRateMonitor()

    logger = pl.loggers.tensorboard.TensorBoardLogger("../logs/tensorboard", version=version, name=name)
    
    trainer = pl.Trainer(
        logger=logger, 
        max_epochs=5,
        val_check_interval=800,
        callbacks=[checkpoint, lr_monitor, early_stopping]
    )

    trainer.fit(model=model, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader)

### AGREE-like weighing static Embeddings

In [13]:
config = {
    'user_embed_dim': 64,
    'item_embed_dim': 64,
    'hidden_dim': 64,
    'weight_decay': 0,
    'lr': 1e-2,
    'lr_reduction_patience': 1000,
    'lr_reduction_factor': 0.5,
    'lr_reduction_threshold': 1e-3,
    'lr_reduction_cooldown': 200,
    'device': device
}

# model = AGREELikeWeighingStaticEmbeds.load_from_checkpoint('../logs/saved_models/agree/size=64, val_loss=0.00258.ckpt', config=config, user_ratings=user_ratings, users_by_group=group_infos['users'].values)
model = AGREELikeWeighingStaticEmbeds(config, user_ratings, group_infos['users'].values)
train(model, 'agree/static', 64, train_dataloader, val_dataloader)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")

  | Name        | Type           | Params
-----------------------------------------------
0 | user_embeds | Embedding      | 64.0 K
1 | item_embeds | Embedding      | 320 K 
2 | attention   | AttentionLayer | 8.3 K 
-----------------------------------------------
392 K     Trainable params
0         Non-trainable params
392 K     Total params
1.571     Total estimated model params size (MB)


Sanity Checking DataLoader 0:  50%|█████     | 1/2 [00:00<00:00,  6.49it/s]



                                                                           



Epoch 2:  93%|█████████▎| 2040/2198 [16:08<01:14,  2.11it/s, loss=0.00279, v_num=64, train_loss=0.00286, val_loss=0.00282]


### AGREE-like weighing with Encoder

In [5]:
config = {
    'user_embed_dim': 64,
    'item_embed_dim': 64,
    'hidden_dim': 64,
    'weight_decay': 0,
    'lr': 1e-2,
    'lr_reduction_patience': 100,
    'lr_reduction_factor': 0.5,
    'lr_reduction_threshold': 1e-4,
    'lr_reduction_cooldown': 20,
    'device': device
}

# model = AGREELikeWeighingStaticEmbeds.load_from_checkpoint('../logs/saved_models/agree/size=64, val_loss=0.00258.ckpt', config=config, user_ratings=user_ratings, users_by_group=group_infos['users'].values)
model = AGREELikeWighingWithEncoder(config, user_ratings, group_infos['users'].values)
train(model, 'agree/encoder', 64, train_dataloader, val_dataloader)

  f"Attribute {k!r} is an instance of `nn.Module` and is already saved during checkpointing."
  f"Attribute {k!r} is an instance of `nn.Module` and is already saved during checkpointing."
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")

  | Name        | Type           | Params
-----------------------------------------------
0 | user_embeds | Encoder        | 5.4 M 
1 | item_embeds | Encoder        | 1.3 M 
2 | attention   | AttentionLayer | 8.3 K 
-----------------------------------------------
6.7 M     Trainable params
0         Non-trainable params
6.7 M     Total params
26.850    Total estimated model params size (MB)


Sanity Checking DataLoader 0:   0%|          | 0/2 [00:00<?, ?it/s]



                                                                           



Epoch 0:   7%|▋         | 155/2198 [42:16<9:17:09, 16.36s/it, loss=0.0506, v_num=64, train_loss=0.048] 

  rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")


### MoSAN-like weighing

In [None]:
config = {
    'context_embed_dim': 64,
    'user_embed_dim': 64,
    'item_embed_dim': 64,
    'hidden_dim': 64,
    'weight_decay': 0,
    'lr': 1e-2,
    'lr_reduction_patience': 100,
    'lr_reduction_factor': 0.5,
    'lr_reduction_threshold': 1e-3,
    'lr_reduction_cooldown': 50,
    'device': device
}

model = MoSANLikeWeighing(config, user_ratings, group_infos['users'].values)
train(model, 'mosan', train_dataloader, val_dataloader=val_dataloader)

### Test

In [5]:
checkpoint = "../logs/saved_models/agree/encoder/size=32, val_loss=0.00971.ckpt"
model = AGREELikeWighingWithEncoder.load_from_checkpoint(checkpoint, user_ratings=user_ratings, users_by_groups=group_infos['users'].values)

pl.Trainer().test(model, test_dataloader)

  f"Attribute {k!r} is an instance of `nn.Module` and is already saved during checkpointing."
  f"Attribute {k!r} is an instance of `nn.Module` and is already saved during checkpointing."
GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Testing DataLoader 0: 100%|██████████| 220/220 [05:18<00:00,  1.45s/it]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_loss          0.009736476466059685
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────


[{'test_loss': 0.009736476466059685}]

In [None]:
checkpoint = "../logs/saved_models/mosan/size=16, val_loss=0.00648.ckpt"
model = MoSANLikeWeighing.load_from_checkpoint(checkpoint, user_ratings=user_ratings, users_by_groups=group_infos['users'].values)

pl.Trainer().test(model, test_dataloader)