In [1]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

total_df = pd.read_pickle("../../data/processed/rating_engage.pkl")
rating_df = pd.read_pickle("../../data/processed/rating_session.pkl")
rating_df

Unnamed: 0,user_id,date,item_id,rating
0,0,2014-05-15,"[15400, 8141, 26820, 39007]","[5.0, 5.0, 5.0, 5.0]"
1,0,2014-05-15,"[8141, 26820, 39007, 4646]","[5.0, 5.0, 5.0, 3.0]"
2,0,2014-12-06,"[18665, 21455, 23236, 21297]","[5.0, 5.0, 5.0, 5.0]"
3,0,2015-02-11,"[25341, 50734, 59076, 12715]","[4.0, 5.0, 2.0, 2.0]"
4,0,2015-02-11,"[50734, 59076, 12715, 13616]","[5.0, 2.0, 2.0, 5.0]"
...,...,...,...,...
37260,1996,2020-01-28,"[31772, 76693, 49373, 47628]","[5.0, 2.0, 5.0, 5.0]"
37261,1998,2014-12-03,"[27463, 5156, 28049, 24243]","[5.0, 5.0, 4.0, 5.0]"
37262,1998,2018-12-26,"[57897, 83702, 54822, 55331]","[5.0, 5.0, 5.0, 5.0]"
37263,1998,2019-02-10,"[55864, 57289, 55227, 55751]","[4.0, 4.0, 3.0, 5.0]"


In [2]:
total_df

Unnamed: 0,user_id,item_id,rating,item_len
0,0,556,2.0,93
0,0,843,2.0,93
0,0,1039,5.0,93
0,0,3865,5.0,93
0,0,4646,3.0,93
...,...,...,...,...
1999,1999,60601,0,106
1999,1999,65713,0,106
1999,1999,16802,0,106
1999,1999,24396,0,106


In [3]:
len(total_df["item_id"].unique()), total_df["item_id"].max()

(97718, 97717)

In [4]:
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, Dataset

item_sequences = rating_df['item_id'].tolist()  # item_id를 리스트로 변환
user_ids = rating_df['user_id'].tolist()


class SessionDataset(Dataset):
    def __init__(self, sequences):
        self.sequences = sequences

    def __len__(self):
        return len(self.sequences)

    def __getitem__(self, idx):
        sequence = self.sequences[idx]
        return torch.tensor(sequence[:-1]), torch.tensor(sequence[-1])
    
train_data, test_data = train_test_split(item_sequences, test_size=0.2, random_state=42)

# Hyperparameters
embedding_dim = 128
hidden_dim = 128
num_epochs = 1000
learning_rate = 5e-5
batch_size = 512
# 512 1e-4 0.4369
# 512 53-5 0.4404

train_loader = DataLoader(SessionDataset(train_data), batch_size=batch_size, shuffle=True)
test_loader = DataLoader(SessionDataset(test_data), batch_size=batch_size, shuffle=False)

In [5]:
import torch.optim as optim
import sys
sys.path.append("../")
from model import GRURecommender

num_items = 368228

model = GRURecommender(num_items, embedding_dim, hidden_dim).cuda()

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [6]:
from sklearn.metrics import precision_score, recall_score
from tqdm import tqdm
from copy import deepcopy
import warnings
warnings.filterwarnings('always')

model_parameters = deepcopy(model.state_dict())

def precision_at_k(preds, target, k=20):
    top_k_preds = preds.topk(k, dim=1).indices.cpu().numpy()
    target = target.cpu().numpy()
    
    y_true = np.isin(top_k_preds, target[:, None]) 
    y_pred = np.ones_like(y_true)  
    
    precision_scores = []
    for true, pred in zip(y_true, y_pred):
        if np.sum(pred) == 0:  
            precision_scores.append(0)
        else:
            precision_scores.append(precision_score(true, pred, zero_division=0))
    return np.mean(precision_scores)

def train_and_evaluate(model, train_loader, val_loader, criterion, optimizer, num_epochs, early_stopping_patience=5):
    global model_parameters
    best_val_precision = -float('inf')
    patience_counter = 0
    
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        for inputs, target in tqdm(train_loader):
            optimizer.zero_grad()
            outputs = model(inputs.cuda())
            loss = criterion(outputs, target.cuda())

            loss.backward()
            optimizer.step()
            total_loss += loss.detach().cpu().item()

        val_loss, val_precision = evaluate(model, val_loader, criterion)
        
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}, '
              f'Val Loss: {val_loss:.4f}, Precision@20: {val_precision:.4f}')
        
        if val_precision > best_val_precision:
            best_val_precision = val_precision
            patience_counter = 0
            model_parameters = deepcopy(model.state_dict())
        else:
            patience_counter += 1
            if patience_counter >= early_stopping_patience:
                print("Early stopping triggered")
                break

def evaluate(model, data_loader, criterion):
    model.eval()
    total_loss = 0
    total_precision = 0
    total_samples = 0
    
    with torch.no_grad():
        for inputs, target in tqdm(data_loader):
            outputs = model(inputs.cuda())
            loss = criterion(outputs, target.cuda())
            total_loss += loss.detach().cpu().item()
            
            total_precision += precision_at_k(outputs, target, k=20) * inputs.size(0)
            total_samples += inputs.size(0)
            
    avg_loss = total_loss / len(data_loader)
    avg_precision = total_precision / total_samples
    return avg_loss, avg_precision

train_and_evaluate(model, train_loader, test_loader, criterion, optimizer, num_epochs)


  0%|          | 0/59 [00:00<?, ?it/s]

100%|██████████| 59/59 [00:03<00:00, 15.05it/s]
100%|██████████| 15/15 [00:10<00:00,  1.42it/s]


Epoch [1/1000], Loss: 12.8200, Val Loss: 12.8141, Precision@20: 0.0026


100%|██████████| 59/59 [00:03<00:00, 15.68it/s]
100%|██████████| 15/15 [00:10<00:00,  1.45it/s]


Epoch [2/1000], Loss: 12.7674, Val Loss: 12.7977, Precision@20: 0.0074


100%|██████████| 59/59 [00:03<00:00, 15.69it/s]
100%|██████████| 15/15 [00:10<00:00,  1.45it/s]


Epoch [3/1000], Loss: 12.7124, Val Loss: 12.7802, Precision@20: 0.0192


100%|██████████| 59/59 [00:03<00:00, 15.65it/s]
100%|██████████| 15/15 [00:10<00:00,  1.44it/s]


Epoch [4/1000], Loss: 12.6515, Val Loss: 12.7609, Precision@20: 0.0378


100%|██████████| 59/59 [00:03<00:00, 15.18it/s]
100%|██████████| 15/15 [00:10<00:00,  1.45it/s]


Epoch [5/1000], Loss: 12.5835, Val Loss: 12.7388, Precision@20: 0.0554


100%|██████████| 59/59 [00:03<00:00, 15.70it/s]
100%|██████████| 15/15 [00:10<00:00,  1.45it/s]


Epoch [6/1000], Loss: 12.5051, Val Loss: 12.7130, Precision@20: 0.0710


100%|██████████| 59/59 [00:03<00:00, 15.67it/s]
100%|██████████| 15/15 [00:10<00:00,  1.45it/s]


Epoch [7/1000], Loss: 12.4121, Val Loss: 12.6816, Precision@20: 0.0853


100%|██████████| 59/59 [00:03<00:00, 15.48it/s]
100%|██████████| 15/15 [00:10<00:00,  1.43it/s]


Epoch [8/1000], Loss: 12.2978, Val Loss: 12.6415, Precision@20: 0.0996


100%|██████████| 59/59 [00:03<00:00, 15.66it/s]
100%|██████████| 15/15 [00:10<00:00,  1.44it/s]


Epoch [9/1000], Loss: 12.1480, Val Loss: 12.5863, Precision@20: 0.1139


100%|██████████| 59/59 [00:03<00:00, 15.63it/s]
100%|██████████| 15/15 [00:10<00:00,  1.44it/s]


Epoch [10/1000], Loss: 11.9366, Val Loss: 12.5028, Precision@20: 0.1304


100%|██████████| 59/59 [00:03<00:00, 15.16it/s]
100%|██████████| 15/15 [00:10<00:00,  1.46it/s]


Epoch [11/1000], Loss: 11.6133, Val Loss: 12.3669, Precision@20: 0.1491


100%|██████████| 59/59 [00:03<00:00, 15.60it/s]
100%|██████████| 15/15 [00:10<00:00,  1.46it/s]


Epoch [12/1000], Loss: 11.1107, Val Loss: 12.1739, Precision@20: 0.1761


100%|██████████| 59/59 [00:03<00:00, 15.60it/s]
100%|██████████| 15/15 [00:10<00:00,  1.45it/s]


Epoch [13/1000], Loss: 10.4970, Val Loss: 12.0595, Precision@20: 0.2148


100%|██████████| 59/59 [00:03<00:00, 15.36it/s]
100%|██████████| 15/15 [00:10<00:00,  1.39it/s]


Epoch [14/1000], Loss: 10.0463, Val Loss: 12.1073, Precision@20: 0.2555


100%|██████████| 59/59 [00:03<00:00, 15.55it/s]
100%|██████████| 15/15 [00:10<00:00,  1.47it/s]


Epoch [15/1000], Loss: 9.8256, Val Loss: 12.1900, Precision@20: 0.2839


100%|██████████| 59/59 [00:03<00:00, 15.62it/s]
100%|██████████| 15/15 [00:10<00:00,  1.43it/s]


Epoch [16/1000], Loss: 9.7065, Val Loss: 12.2484, Precision@20: 0.2900


100%|██████████| 59/59 [00:03<00:00, 15.68it/s]
100%|██████████| 15/15 [00:10<00:00,  1.47it/s]


Epoch [17/1000], Loss: 9.6188, Val Loss: 12.2924, Precision@20: 0.2871


100%|██████████| 59/59 [00:03<00:00, 15.60it/s]
100%|██████████| 15/15 [00:10<00:00,  1.48it/s]


Epoch [18/1000], Loss: 9.5460, Val Loss: 12.3296, Precision@20: 0.2859


100%|██████████| 59/59 [00:03<00:00, 15.94it/s]
100%|██████████| 15/15 [00:10<00:00,  1.43it/s]


Epoch [19/1000], Loss: 9.4849, Val Loss: 12.3653, Precision@20: 0.2854


100%|██████████| 59/59 [00:03<00:00, 15.46it/s]
100%|██████████| 15/15 [00:10<00:00,  1.44it/s]


Epoch [20/1000], Loss: 9.4272, Val Loss: 12.4015, Precision@20: 0.2843


100%|██████████| 59/59 [00:03<00:00, 15.49it/s]
100%|██████████| 15/15 [00:10<00:00,  1.46it/s]

Epoch [21/1000], Loss: 9.3788, Val Loss: 12.4354, Precision@20: 0.2834
Early stopping triggered





In [7]:
model.load_state_dict(model_parameters)

avg_loss, avg_precision = evaluate(model, test_loader, criterion)
print(avg_loss, avg_precision)

100%|██████████| 15/15 [00:10<00:00,  1.47it/s]

12.248351796468098 0.29000402522474167





In [8]:
torch.save({
    "state_dict": model.cpu().state_dict(),
    "num_items": num_items,
    "embedding_dim":embedding_dim,
    "hidden_dim": hidden_dim
}, "../parameters/session.pth")