# Packs 

In [1]:
import pandas as pd
import torch

In [2]:
print("MPS built:", torch.backends.mps.is_built())
print("MPS available:", torch.backends.mps.is_available())

MPS built: True
MPS available: True


# Read Data

In [3]:
train_df = pd.read_parquet('../artifacts/movielens_splits/df_train.parquet') 
val_df = pd.read_parquet('../artifacts/movielens_splits/df_val.parquet')
test_df = pd.read_parquet('../artifacts/movielens_splits/df_test.parquet')

In [4]:
train_df.head()

Unnamed: 0,user_id,seq_actions,seq_times,target,hist_len
0,4671,"[movie_2634, movie_110, movie_3702, movie_1961...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",movie_165,144
1,53,"[movie_902, movie_694, movie_1961, movie_1193,...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",movie_2148,400
2,5377,"[movie_3480, movie_1198, movie_1097, movie_140...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",movie_1036,25
3,2565,"[movie_587, movie_2072, movie_2085, movie_19, ...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",movie_542,180
4,123,"[movie_2762, movie_356, movie_1320, movie_1268...","[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...",movie_1304,65


In [5]:
from src.utils.general import flatten_list_of_lists

train_flows_list = [list(flow) for flow in train_df.seq_actions.tolist()]
train_times_flows_list = [list(times_flow) for times_flow in train_df.seq_times.tolist()]
distinct_new_tokens_list = list(set(flatten_list_of_lists(train_flows_list)))
len(train_flows_list), len(distinct_new_tokens_list)

(695951, 3599)

In [6]:
movies_vocab = {}
for i,m in enumerate(distinct_new_tokens_list):
  movies_vocab[m]=i

len(movies_vocab)

3599

In [7]:
from src.utils.tokenizer import MultiAppBehaviorTokenizer

tokenizer = MultiAppBehaviorTokenizer(vocab=movies_vocab)

In [8]:
train_seq_embeddings_path = '../artifacts/movielens_embeddings/x_train_embeddings.pt' 
val_seq_embeddings_path = '../artifacts/movielens_embeddings/x_val_embeddings.pt'
test_seq_embeddings_path = '../artifacts/movielens_embeddings/x_test_embeddings.pt'

x_train = torch.tensor(torch.load(train_seq_embeddings_path), dtype=torch.float32)
x_val = torch.tensor(torch.load(val_seq_embeddings_path), dtype=torch.float32)
x_test = torch.tensor(torch.load(test_seq_embeddings_path), dtype=torch.float32)

In [9]:
y_train_path = '../artifacts/movielens_embeddings/y_train.pt'
y_val_path = '../artifacts/movielens_embeddings/y_val.pt'
y_test_path = '../artifacts/movielens_embeddings/y_test.pt'

y_train = torch.load(y_train_path)
y_val = torch.load(y_val_path)
y_test = torch.load(y_test_path)

In [10]:
y_test[:10]

tensor([1358, 1181, 2883, 1574, 2222,   58, 2591, 2141, 1086,  407])

In [11]:
x_train.shape, x_val.shape, x_test.shape, y_train.shape, y_val.shape, y_test.shape

(torch.Size([695951, 512]),
 torch.Size([301, 512]),
 torch.Size([603, 512]),
 torch.Size([695951]),
 torch.Size([301]),
 torch.Size([603]))

# Create loaders

In [12]:
train_batch_size = 256
val_batch_size = 128

In [13]:
from torch.utils.data import DataLoader, TensorDataset

g = torch.Generator().manual_seed(42)

train_dataset = TensorDataset(x_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=train_batch_size, shuffle=False, generator=g)

val_dataset = TensorDataset(x_val, y_val)
val_loader = DataLoader(val_dataset, batch_size=val_batch_size, shuffle=False)

test_dataset = TensorDataset(x_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=val_batch_size, shuffle=False, generator=g)


In [14]:
from src.models.mlp_architecture import MLPNextItemPrediction
from src.utils.general import count_model_parameters, set_seed

set_seed(42)
rec_model = MLPNextItemPrediction(recs_size=len(movies_vocab)+1)
count_model_parameters(rec_model)


    Total model parameters: 456655
    Total trainable parameters: 456655
    Total non-trainable parameters: 0
    


(456655, 456655, 0)

# Train

In [15]:
from src.utils.training_functions import train_model

learning_rate = 3e-4
batch_size = 256
# epochs = 60
epochs = 30
set_seed(42)
rec_model = MLPNextItemPrediction(recs_size=len(movies_vocab)+1)
best_val = train_model(model=rec_model,
                       train_loader=train_loader,
                       val_loader=val_loader,
                       learning_rate=learning_rate,
                       epochs=epochs, checkpoints_dir_path='/content', 
                       save_checkpoints=False, apply_scheduler=True)



**Epoch #0


  0%|          | 0/2719 [00:00<?, ?it/s]

finished_train_step, checking loss on train
lr now → 0.000300
checking loss on validation
Train Epoch 1/30 Stats:'
        *Train Loss: 3.6043, Train Top10Hit: 0.5273, Val NDCG@10:0.2899;'
        *Val Loss: 3.8400,     Val Top10Hit: 0.4286,     Val NDCG@10:0.2314;'
        


**Epoch #1


  0%|          | 0/2719 [00:00<?, ?it/s]

finished_train_step, checking loss on train
lr now → 0.000299
checking loss on validation
Train Epoch 2/30 Stats:'
        *Train Loss: 3.4346, Train Top10Hit: 0.5717, Val NDCG@10:0.3245;'
        *Val Loss: 3.6834,     Val Top10Hit: 0.4983,     Val NDCG@10:0.2769;'
        


**Epoch #2


  0%|          | 0/2719 [00:00<?, ?it/s]

finished_train_step, checking loss on train
lr now → 0.000298
checking loss on validation
Train Epoch 3/30 Stats:'
        *Train Loss: 3.3458, Train Top10Hit: 0.5924, Val NDCG@10:0.3423;'
        *Val Loss: 3.5685,     Val Top10Hit: 0.5183,     Val NDCG@10:0.3047;'
        


**Epoch #3


  0%|          | 0/2719 [00:00<?, ?it/s]

finished_train_step, checking loss on train
lr now → 0.000297
checking loss on validation
Train Epoch 4/30 Stats:'
        *Train Loss: 3.2887, Train Top10Hit: 0.6064, Val NDCG@10:0.3538;'
        *Val Loss: 3.5118,     Val Top10Hit: 0.5183,     Val NDCG@10:0.3074;'
        


**Epoch #4


  0%|          | 0/2719 [00:00<?, ?it/s]

finished_train_step, checking loss on train
lr now → 0.000295
checking loss on validation
Train Epoch 5/30 Stats:'
        *Train Loss: 3.2473, Train Top10Hit: 0.6162, Val NDCG@10:0.3619;'
        *Val Loss: 3.5084,     Val Top10Hit: 0.5216,     Val NDCG@10:0.3155;'
        


**Epoch #5


  0%|          | 0/2719 [00:00<?, ?it/s]

finished_train_step, checking loss on train
lr now → 0.000293
checking loss on validation
Train Epoch 6/30 Stats:'
        *Train Loss: 3.2165, Train Top10Hit: 0.6216, Val NDCG@10:0.3680;'
        *Val Loss: 3.4666,     Val Top10Hit: 0.5681,     Val NDCG@10:0.3239;'
        


**Epoch #6


  0%|          | 0/2719 [00:00<?, ?it/s]

finished_train_step, checking loss on train
lr now → 0.000290
checking loss on validation
Train Epoch 7/30 Stats:'
        *Train Loss: 3.1955, Train Top10Hit: 0.6275, Val NDCG@10:0.3721;'
        *Val Loss: 3.4467,     Val Top10Hit: 0.5615,     Val NDCG@10:0.3266;'
        


**Epoch #7


  0%|          | 0/2719 [00:00<?, ?it/s]

finished_train_step, checking loss on train
lr now → 0.000287
checking loss on validation
Train Epoch 8/30 Stats:'
        *Train Loss: 3.1706, Train Top10Hit: 0.6322, Val NDCG@10:0.3773;'
        *Val Loss: 3.4740,     Val Top10Hit: 0.5349,     Val NDCG@10:0.3208;'
        


**Epoch #8


  0%|          | 0/2719 [00:00<?, ?it/s]

finished_train_step, checking loss on train
lr now → 0.000284
checking loss on validation
Train Epoch 9/30 Stats:'
        *Train Loss: 3.1637, Train Top10Hit: 0.6341, Val NDCG@10:0.3793;'
        *Val Loss: 3.4669,     Val Top10Hit: 0.5615,     Val NDCG@10:0.3212;'
        


**Epoch #9


  0%|          | 0/2719 [00:00<?, ?it/s]

finished_train_step, checking loss on train
lr now → 0.000281
checking loss on validation
Train Epoch 10/30 Stats:'
        *Train Loss: 3.1419, Train Top10Hit: 0.6391, Val NDCG@10:0.3829;'
        *Val Loss: 3.4719,     Val Top10Hit: 0.5482,     Val NDCG@10:0.3237;'
        


**Epoch #10


  0%|          | 0/2719 [00:00<?, ?it/s]

finished_train_step, checking loss on train
lr now → 0.000277
checking loss on validation
Train Epoch 11/30 Stats:'
        *Train Loss: 3.1263, Train Top10Hit: 0.6440, Val NDCG@10:0.3869;'
        *Val Loss: 3.4870,     Val Top10Hit: 0.5349,     Val NDCG@10:0.3162;'
        


**Epoch #11


  0%|          | 0/2719 [00:00<?, ?it/s]

finished_train_step, checking loss on train
lr now → 0.000272
checking loss on validation
Train Epoch 12/30 Stats:'
        *Train Loss: 3.1111, Train Top10Hit: 0.6464, Val NDCG@10:0.3890;'
        *Val Loss: 3.4533,     Val Top10Hit: 0.5449,     Val NDCG@10:0.3335;'
        


**Epoch #12


  0%|          | 0/2719 [00:00<?, ?it/s]

finished_train_step, checking loss on train
lr now → 0.000268
checking loss on validation
Train Epoch 13/30 Stats:'
        *Train Loss: 3.1037, Train Top10Hit: 0.6479, Val NDCG@10:0.3907;'
        *Val Loss: 3.4752,     Val Top10Hit: 0.5648,     Val NDCG@10:0.3211;'
        


**Epoch #13


  0%|          | 0/2719 [00:00<?, ?it/s]

finished_train_step, checking loss on train
lr now → 0.000263
checking loss on validation
Train Epoch 14/30 Stats:'
        *Train Loss: 3.0887, Train Top10Hit: 0.6521, Val NDCG@10:0.3934;'
        *Val Loss: 3.5007,     Val Top10Hit: 0.5781,     Val NDCG@10:0.3289;'
        


**Epoch #14


  0%|          | 0/2719 [00:00<?, ?it/s]

finished_train_step, checking loss on train
lr now → 0.000258
checking loss on validation
Train Epoch 15/30 Stats:'
        *Train Loss: 3.0813, Train Top10Hit: 0.6535, Val NDCG@10:0.3956;'
        *Val Loss: 3.4974,     Val Top10Hit: 0.5349,     Val NDCG@10:0.3201;'
        


**Epoch #15


  0%|          | 0/2719 [00:00<?, ?it/s]

finished_train_step, checking loss on train
lr now → 0.000252
checking loss on validation
Train Epoch 16/30 Stats:'
        *Train Loss: 3.0769, Train Top10Hit: 0.6543, Val NDCG@10:0.3965;'
        *Val Loss: 3.4757,     Val Top10Hit: 0.5548,     Val NDCG@10:0.3177;'
        


**Epoch #16


  0%|          | 0/2719 [00:00<?, ?it/s]

finished_train_step, checking loss on train
lr now → 0.000246
checking loss on validation
Train Epoch 17/30 Stats:'
        *Train Loss: 3.0616, Train Top10Hit: 0.6586, Val NDCG@10:0.3994;'
        *Val Loss: 3.5063,     Val Top10Hit: 0.5316,     Val NDCG@10:0.3173;'
        


**Epoch #17


  0%|          | 0/2719 [00:00<?, ?it/s]

finished_train_step, checking loss on train
lr now → 0.000240
checking loss on validation
Train Epoch 18/30 Stats:'
        *Train Loss: 3.0558, Train Top10Hit: 0.6590, Val NDCG@10:0.4000;'
        *Val Loss: 3.5090,     Val Top10Hit: 0.5648,     Val NDCG@10:0.3235;'
        


**Epoch #18


  0%|          | 0/2719 [00:00<?, ?it/s]

finished_train_step, checking loss on train
lr now → 0.000234
checking loss on validation
Train Epoch 19/30 Stats:'
        *Train Loss: 3.0506, Train Top10Hit: 0.6594, Val NDCG@10:0.4010;'
        *Val Loss: 3.5189,     Val Top10Hit: 0.5415,     Val NDCG@10:0.3165;'
        


**Epoch #19


  0%|          | 0/2719 [00:00<?, ?it/s]

finished_train_step, checking loss on train
lr now → 0.000228
checking loss on validation
Train Epoch 20/30 Stats:'
        *Train Loss: 3.0398, Train Top10Hit: 0.6636, Val NDCG@10:0.4041;'
        *Val Loss: 3.5222,     Val Top10Hit: 0.5382,     Val NDCG@10:0.3112;'
        


**Epoch #20


  0%|          | 0/2719 [00:00<?, ?it/s]

finished_train_step, checking loss on train
lr now → 0.000221
checking loss on validation
Train Epoch 21/30 Stats:'
        *Train Loss: 3.0361, Train Top10Hit: 0.6647, Val NDCG@10:0.4051;'
        *Val Loss: 3.4955,     Val Top10Hit: 0.5183,     Val NDCG@10:0.3040;'
        


**Epoch #21


  0%|          | 0/2719 [00:00<?, ?it/s]

finished_train_step, checking loss on train
lr now → 0.000214
checking loss on validation
Train Epoch 22/30 Stats:'
        *Train Loss: 3.0300, Train Top10Hit: 0.6653, Val NDCG@10:0.4056;'
        *Val Loss: 3.5026,     Val Top10Hit: 0.5316,     Val NDCG@10:0.3177;'
        


**Epoch #22


  0%|          | 0/2719 [00:00<?, ?it/s]

finished_train_step, checking loss on train
lr now → 0.000207
checking loss on validation
Train Epoch 23/30 Stats:'
        *Train Loss: 3.0208, Train Top10Hit: 0.6668, Val NDCG@10:0.4077;'
        *Val Loss: 3.5203,     Val Top10Hit: 0.5216,     Val NDCG@10:0.2999;'
        


**Epoch #23


  0%|          | 0/2719 [00:00<?, ?it/s]

finished_train_step, checking loss on train
lr now → 0.000200
checking loss on validation
Train Epoch 24/30 Stats:'
        *Train Loss: 3.0164, Train Top10Hit: 0.6688, Val NDCG@10:0.4089;'
        *Val Loss: 3.5194,     Val Top10Hit: 0.5249,     Val NDCG@10:0.3113;'
        


**Epoch #24


  0%|          | 0/2719 [00:00<?, ?it/s]

finished_train_step, checking loss on train
lr now → 0.000193
checking loss on validation
Train Epoch 25/30 Stats:'
        *Train Loss: 3.0110, Train Top10Hit: 0.6688, Val NDCG@10:0.4091;'
        *Val Loss: 3.5213,     Val Top10Hit: 0.5216,     Val NDCG@10:0.3055;'
        


**Epoch #25


  0%|          | 0/2719 [00:00<?, ?it/s]

finished_train_step, checking loss on train
lr now → 0.000185
checking loss on validation
Train Epoch 26/30 Stats:'
        *Train Loss: 3.0005, Train Top10Hit: 0.6727, Val NDCG@10:0.4116;'
        *Val Loss: 3.5439,     Val Top10Hit: 0.5615,     Val NDCG@10:0.3162;'
        


**Epoch #26


  0%|          | 0/2719 [00:00<?, ?it/s]

finished_train_step, checking loss on train
lr now → 0.000178
checking loss on validation
Train Epoch 27/30 Stats:'
        *Train Loss: 3.0039, Train Top10Hit: 0.6712, Val NDCG@10:0.4111;'
        *Val Loss: 3.5353,     Val Top10Hit: 0.5282,     Val NDCG@10:0.3061;'
        


**Epoch #27


  0%|          | 0/2719 [00:00<?, ?it/s]

finished_train_step, checking loss on train
lr now → 0.000170
checking loss on validation
Train Epoch 28/30 Stats:'
        *Train Loss: 3.0010, Train Top10Hit: 0.6707, Val NDCG@10:0.4113;'
        *Val Loss: 3.5493,     Val Top10Hit: 0.5216,     Val NDCG@10:0.3025;'
        


**Epoch #28


  0%|          | 0/2719 [00:00<?, ?it/s]

finished_train_step, checking loss on train
lr now → 0.000163
checking loss on validation
Train Epoch 29/30 Stats:'
        *Train Loss: 2.9998, Train Top10Hit: 0.6721, Val NDCG@10:0.4116;'
        *Val Loss: 3.5834,     Val Top10Hit: 0.5349,     Val NDCG@10:0.3094;'
        


**Epoch #29


  0%|          | 0/2719 [00:00<?, ?it/s]

finished_train_step, checking loss on train
lr now → 0.000155
checking loss on validation
Train Epoch 30/30 Stats:'
        *Train Loss: 2.9911, Train Top10Hit: 0.6745, Val NDCG@10:0.4135;'
        *Val Loss: 3.5253,     Val Top10Hit: 0.5282,     Val NDCG@10:0.3073;'
        
Training complete!


In [44]:
from src.utils.evaluation_functions import evaluate_recommender
loss_fn = torch.nn.BCEWithLogitsLoss()
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

loss, hr10, ndcg10 = evaluate_recommender(model=rec_model,
                     device=device,
                     k=10,
                     data_loader=test_loader,
                     loss_fn=loss_fn,
                     n_sampling_eval=100)
loss, hr10, ndcg10

(3.4570388865115036, 0.6069651776878395, 0.3439032772484901)

In [None]:
# chosen model is best HR@10 after 15 epochs, which here is epoch #17.
# Note that may have differences with Machine used, this notebook ran over CPU(M3), while paper results ran over L4(CUDA)