# Libraries

In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
import warnings
import os
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt
from datetime import datetime
from transformers import get_linear_schedule_with_warmup
warnings.simplefilter(action='ignore', category=pd.errors.SettingWithCopyWarning)

2023-11-21 22:47:43.875882: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-11-21 22:47:43.875930: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-11-21 22:47:43.877295: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


# Hyper-Parameters

In [2]:
class HyperParameters:
    def __init__(self, batch_size=512, 
                 learning_rate=0.0001, 
                 epochs=10, # 1 for testing
                 hidden_size = 256,
                 embedding_dim = 128,
                 lstm_num_layers = 3,
                 train_size = 0.8,
                 device = torch.device("cuda" if torch.cuda.is_available() else "cpu"),
                 save_dir = "../model/encoder-decoder/"
                 ):
        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.hidden_size = hidden_size
        self.embedding_dim = embedding_dim
        self.lstm_num_layers = lstm_num_layers
        self.train_size = train_size
        self.device = device
        self.save_dir = save_dir

    def get_batch_size(self):
        return self.batch_size

    def get_learning_rate(self):
        return self.learning_rate

    def get_epochs(self):
        return self.epochs

    def get_hidden_size(self):
        return self.hidden_size
    
    def get_embedding_dim(self):
        return self.embedding_dim
    
    def get_lstm_num_layers(self):
        return self.lstm_num_layers
    
    def get_train_size(self):
        return self.train_size
    
    def get_device(self):
        return self.device
    
    def get_save_dir(self):
        return self.save_dir
    
hyperparams = HyperParameters()

# Load Data

In [3]:
# Read the Parquet file and drop duplicates based on "song_id" column
df = pd.read_parquet("../data/meta_song.parquet").drop_duplicates("song_id")
# Count the number of missing values (NaNs) in the DataFrame
na_count = df.isna().sum()
print("Number of missing values (NaNs) in the dataset: meta_song.parquet")
print(na_count)

Number of missing values (NaNs) in the dataset: meta_song.parquet
song_id             0
artist_id      128866
song_length    128866
album_id       323497
language_id    323497
album_month    323523
dtype: int64


In [4]:
def merge_feacture_dataset(df:pd.DataFrame)->pd.DataFrame:
    shape = df.shape[0]
    df1 = pd.read_parquet("../data/meta_song_composer.parquet").drop_duplicates("song_id")
    print(len(set(df['song_id'].unique())-set(df1['song_id'].unique())))
    df = pd.merge(df, df1, how='left') 
    assert df.shape[0] == shape, f"origin shape: {shape}, merge after shape: {df.shape[0]}"
    df2 = pd.read_parquet("../data/meta_song_genre.parquet").drop_duplicates("song_id")
    print(len(set(df['song_id'].unique())-set(df2['song_id'].unique())))
    df = pd.merge(df, df2, how='left') 
    assert df.shape[0] == shape, f"origin shape: {shape}, merge after shape: {df.shape[0]}"
    df3 = pd.read_parquet("../data/meta_song_lyricist.parquet").drop_duplicates("song_id")
    print(len(set(df['song_id'].unique())-set(df3['song_id'].unique())))
    df = pd.merge(df, df3, how='left') 
    assert df.shape[0] == shape, f"origin shape: {shape}, merge after shape: {df.shape[0]}"
    df4 = pd.read_parquet("../data/meta_song_producer.parquet").drop_duplicates("song_id")
    print(len(set(df['song_id'].unique())-set(df4['song_id'].unique())))
    df = pd.merge(df, df4, how='left') 
    assert df.shape[0] == shape, f"origin shape: {shape}, merge after shape: {df.shape[0]}"
    df5 = pd.read_parquet("../data/meta_song_titletext.parquet").drop_duplicates("song_id")
    print(len(set(df['song_id'].unique())-set(df5['song_id'].unique())))
    df = pd.merge(df, df5, how='left') 
    assert df.shape[0] == shape, f"origin shape: {shape}, merge after shape: {df.shape[0]}"
    df6 = pd.read_parquet("../data/meta_song.parquet").drop_duplicates("song_id")
    print(len(set(df['song_id'].unique())-set(df6['song_id'].unique())))
    df = pd.merge(df, df6, how='left') 
    assert df.shape[0] == shape, f"origin shape: {shape}, merge after shape: {df.shape[0]}"
    print(f"Merge finish!, now shape is : {df.shape}")
    return df

# load from directory
train_source = pd.read_parquet("../data/label_train_source.parquet")
train_target = pd.read_parquet("../data/label_train_target.parquet")
test_source  = pd.read_parquet("../data/label_test_source.parquet")
# sort the data by session_id, listening_order
train_source = train_source.sort_values(by=['session_id', 'listening_order'], ascending=[True, True])
train_target = train_target.sort_values(by=['session_id', 'listening_order'], ascending=[True, True])
test_source  = test_source.sort_values( by=['session_id', 'listening_order'], ascending=[True, True])

#train_source = merge_feacture_dataset(train_source)
#train_target = merge_feacture_dataset(train_target)
#test_source  = merge_feacture_dataset(test_source)

# Encode the feactures

In [5]:
def fill_NaNs(df:pd.DataFrame, numerical_columns:list=None, string_columns:list=None)->pd.DataFrame:
    for column in numerical_columns:
        df[column].fillna(df[column].mean(), inplace=True)
    for column in string_columns:
        df[column].fillna(0)
    return df

def encode_unix_time(df:pd.DataFrame, sin_cos = False):
    # Convert 'unix_played_at' to a datetime column
    df['played_at_datetime'] = pd.to_datetime(df['unix_played_at'], unit='s')
    if sin_cos:
        df['hour_sin'] = np.sin(2 * np.pi * df['played_at_datetime'].dt.hour / 24)
        df['hour_cos'] = np.cos(2 * np.pi * df['played_at_datetime'].dt.hour / 24)
        df['minute_sin'] = np.sin(2 * np.pi * df['played_at_datetime'].dt.minute / 60)
        df['minute_cos'] = np.cos(2 * np.pi * df['played_at_datetime'].dt.minute / 60)
        df['second_sin'] = np.sin(2 * np.pi * df['played_at_datetime'].dt.second / 60)
        df['second_cos'] = np.cos(2 * np.pi * df['played_at_datetime'].dt.second / 60)
        df['month_sin'] = np.sin(2 * np.pi * df['played_at_datetime'].dt.month / 12)
        df['month_cos'] = np.cos(2 * np.pi * df['played_at_datetime'].dt.month / 12)
        df['year_sin'] = np.sin(2 * np.pi * df['played_at_datetime'].dt.year / 2023)
        df['year_cos'] = np.cos(2 * np.pi * df['played_at_datetime'].dt.year / 2023)
    else:
        df['hour_of_day'] = df['played_at_datetime'].dt.hour / 24
        df['minute_of_hour'] = df['played_at_datetime'].dt.minute / 60
        df['second_of_minute'] = df['played_at_datetime'].dt.second / 60
        df['month'] = df['played_at_datetime'].dt.month / 12
        df['year'] = df['played_at_datetime'].dt.year / 2023
    # Drop the specified columns from the DataFrame
    df.drop(columns=['unix_played_at', 'played_at_datetime'], inplace=True)
    return df

def get_song_ID_encode_dict(train:pd.DataFrame, test:pd.DataFrame)->dict:
    unique_song_ids = set(train['song_id'].tolist()+test['song_id'].tolist())
    ID_IDX = {song_id:i+1 for i,song_id in enumerate(unique_song_ids)}
    ID_IDX["SOS"]=0
    return ID_IDX

def encode_song_id(id2idx:dict, source_df:pd.DataFrame, target_df:pd.DataFrame=None):
    source_df['song_id'] = source_df['song_id'].map(id2idx)
    if target_df is not None:
        target_df['song_id'] = target_df['song_id'].map(id2idx)
        return source_df, target_df
    else:
        return source_df, None

def convert_per_N(df:pd.DataFrame, n:int, label = False):
    data = []
    pre_session_id = int(df['session_id'].iloc[0])
    if label:
        row = [0]
        for i in tqdm(range(df.shape[0])):
            # next session id
            if pre_session_id != int(df['session_id'].iloc[i]):
                data.append((pre_session_id, np.array(row).reshape(-1, 6)))
                pre_session_id, row = int(df['session_id'].iloc[i]), [0]
            # append 5 values
            row.append(df['song_id'].iloc[i])
        # append last session id
        data.append((df['session_id'].iloc[-1], np.array(row).reshape(-1, 6))) #last one
    else:
        row = []
        song_id = []
        for i in tqdm(range(df.shape[0])):
            # next session id
            if pre_session_id != int(df['session_id'].iloc[i]):
                data.append((pre_session_id, 
                             np.array(row).reshape(-1, 20), 
                             np.array(song_id).reshape(-1, 20)))
                pre_session_id, row,song_id = int(df['session_id'].iloc[i]), [], []
            # append 20 values
            song_id.append(df['song_id'].iloc[i])
            row.append([df['play_status'].iloc[i],
                        df['login_type'].iloc[i],
                        df['second_of_minute'].iloc[i],
                        df['minute_of_hour'].iloc[i],
                        df['hour_of_day'].iloc[i],
                        df['month'].iloc[i], 
                        df['year'].iloc[i]])
        # append last session id
        data.append((df['session_id'].iloc[-1], 
                     np.array(row).reshape(-1, 20),
                     np.array(song_id).reshape(-1, 20)))
    return data

In [6]:
# data preprocessing
ID_IDX = get_song_ID_encode_dict(train_source, test_source)
# encode song_id
train_source, train_target = encode_song_id(ID_IDX, train_source, train_target)
test_source, _ =  encode_song_id(ID_IDX, test_source, None)
# encode unix_time_at
train_source = encode_unix_time(train_source)
train_target = encode_unix_time(train_target)
test_source  = encode_unix_time(test_source)
# covert to sequential data, top 20 songs for input, last 5 for output
train_source_data  = convert_per_N(train_source, 20)
train_source_label = convert_per_N(train_target, 5, label=True)
test_source_data   = convert_per_N(test_source, 20)

100%|██████████| 11445180/11445180 [08:44<00:00, 21837.33it/s]
100%|██████████| 2861295/2861295 [00:32<00:00, 87361.13it/s]
100%|██████████| 2861280/2861280 [02:10<00:00, 21857.78it/s]


# Convert to Dataset and Dataloader with batch size

In [7]:
class RankingDatset(Dataset):
    def __init__(self, data, label=None, train = True):
        self.session_id = [session_id for session_id,_,_ in data]
        self.feature    = [feature    for _,feature,_    in data]
        self.song_id    = [song_id    for _,_,song_id    in data]
        if train:
            self.label  = [label for _,label in label]
        else:
            self.label  = [0 for _ in data]
    def __len__(self):
        return len(self.session_id)
    def __getitem__(self, idx):
        session_id = self.session_id[idx]
        feature = torch.tensor(self.feature[idx], dtype=torch.long)
        song_id = torch.tensor(self.song_id[idx], dtype=torch.long)
        label = torch.tensor(self.label[idx], dtype=torch.long)
        return {'session_id': session_id, 'feature': feature, 'song_id': song_id, 'label': label}

In [8]:
train_dataset = RankingDatset(train_source_data, train_source_label)
train_size = int(hyperparams.get_train_size() * len(train_dataset))
val_size = len(train_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(train_dataset, [train_size, val_size])
test_dataset  = RankingDatset(test_source_data, train=False)
train_dataset[0]

{'session_id': 96605,
 'feature': tensor([[ 1, 49,  0,  0,  0,  0,  0,  1, 49,  0,  0,  0,  0,  0,  1, 49,  0,  0,
           0,  0],
         [ 0,  1, 49,  0,  0,  0,  0,  0,  1, 49,  0,  0,  0,  0,  0,  1, 49,  0,
           0,  0],
         [ 0,  0,  1, 49,  0,  0,  0,  0,  0,  1, 49,  0,  0,  0,  0,  0,  1, 49,
           0,  0],
         [ 0,  0,  0,  1, 49,  0,  0,  0,  0,  0,  1, 49,  0,  0,  0,  0,  0,  1,
          49,  0],
         [ 0,  0,  0,  0,  1, 49,  0,  0,  0,  0,  0,  1, 49,  0,  0,  0,  0,  0,
           1, 49],
         [ 0,  0,  0,  0,  0,  1, 49,  0,  0,  0,  0,  0,  1, 49,  0,  0,  0,  0,
           0,  1],
         [49,  0,  0,  0,  0,  0,  1, 49,  0,  0,  0,  0,  0,  1, 49,  0,  0,  0,
           0,  0]]),
 'song_id': tensor([[ 82580, 606893, 208757, 527901, 232236,  62746, 177114, 211785, 481046,
          213249, 624855, 222640, 168755, 292540, 592736,  74856, 221164, 298480,
          335012, 277550]]),
 'label': tensor([[     0, 572953, 239671, 477962, 685

In [9]:
train_dataloader = DataLoader(train_dataset, batch_size=hyperparams.get_batch_size(), shuffle=True)
val_dataloader   = DataLoader(val_dataset, batch_size=hyperparams.get_batch_size())
test_dataloader  = DataLoader(test_dataset, batch_size=hyperparams.get_batch_size())

# Model

In [38]:
# Define the Encoder
class Encoder(nn.Module):
    def __init__(self, num_songs, embedding_dim, hidden_size, num_feature, num_layers=1):
        super(Encoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers

        self.embedding = nn.Embedding(num_songs, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim+num_feature, hidden_size, num_layers, batch_first=True)

    def forward(self, song_ids, feature):
        # song_ids: torch.Size([batch size, 1, 20])
        # feature : torch.Size([batch size, 7, 20])
        embedded = self.embedding(song_ids) .squeeze(1)
        # input feature cat with song embed
        lstm_input = torch.cat((embedded, feature.transpose(1,2)), dim=2)  # Concatenate along the feature dimension
        # Forward propagate LSTM
        out, hidden = self.lstm(lstm_input)  # out: tensor of shape (batch_size, seq_length, hidden_size)
        return out, hidden

# Define the Decoder
class Decoder(nn.Module):
    def __init__(self, num_songs, embedding_dim, enc_hidden_size, hidden_size, num_layers=1):
        super(Decoder, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        self.embedding = nn.Embedding(num_songs, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim+enc_hidden_size, hidden_size, num_layers, batch_first=True)
        self.fc1 = nn.Linear(20, 1)
        self.fc2 = nn.Linear(hidden_size, num_songs)

    def forward(self, decode_song_ids, last_hidden, encoder_hidden):
        # Forward propagate LSTM
        embedded = self.embedding(decode_song_ids).squeeze(1) # torch.Size([64, 1, 128])
        # encoder_hidden: torch.Size([batch size, 20, 256])
        encoder_hidden = self.fc1(encoder_hidden.transpose(1,2)) # torch.Size([64, 256, 1])
        lstm_input = torch.cat((embedded, encoder_hidden.transpose(1,2)), dim=2) # torch.Size([64, 1, 386])
        out, lstm_hidden = self.lstm(lstm_input, last_hidden)
        out = self.fc2(out.squeeze(1)) # torch.Size([64, 716557])
        return out, lstm_hidden

# Seq2Seq model
class Seq2Seq(nn.Module):
    def __init__(self, encoder, decoder):
        super(Seq2Seq, self).__init__()
        self.encoder = encoder
        self.decoder = decoder

    def forward(self, song_ids, features, target_song_ids=None, use_teacher_forcing=True):
        encoder_output, hidden = self.encoder(song_ids, features)
        batch_size = song_ids.shape[0]  # 64
        target_song_ids = torch.zeros(batch_size, 6).long().to(hyperparams.get_device()) if target_song_ids is None else target_song_ids.squeeze(1)
        target_len = target_song_ids.size(-1) # 6
        target_song_size = self.decoder.fc2.out_features
        
        outputs = torch.zeros(batch_size, target_len, target_song_size) # torch.Size([64, 6, 716557])
        decoder_input = target_song_ids[:, 0].unsqueeze(1)  # SOS token as the first input, torch.Size([64, 1])

        for t in range(1, target_len):
            decoder_output, hidden = self.decoder(decoder_input.unsqueeze(1), hidden, encoder_output)
            outputs[:, t, :] = decoder_output
            # using teacher forcing
            if use_teacher_forcing:
                assert target_song_ids is not None, "Teacher forcing must have a target; it shouldn't be None."
                decoder_input = target_song_ids[:, t].unsqueeze(1)
            else:
                decoder_input = torch.argmax(decoder_output.squeeze(0),dim = 1).unsqueeze(1)

        return torch.argmax(outputs, dim = 2)

In [39]:
encoder = Encoder(len(list(ID_IDX.values())), 
                  hyperparams.get_embedding_dim(), 
                  hyperparams.get_hidden_size(), 
                  train_dataset[0]['feature'].shape[0], 
                  hyperparams.get_lstm_num_layers()
                  )
decoder = Decoder(len(list(ID_IDX.values())), 
                  hyperparams.get_embedding_dim(), 
                  hyperparams.get_hidden_size(), 
                  hyperparams.get_hidden_size(), 
                  hyperparams.get_lstm_num_layers()
                  )
model = Seq2Seq(encoder, decoder).to(hyperparams.get_device())

In [40]:
def testing_model(model=model, train_dataloader=train_dataloader, inference=False):
    # last batch
    batch_sample = None
    for idx, data in enumerate(train_dataloader):
        if idx == len(train_dataloader)-1:
            batch_sample = data
    #batch_sample = next(iter(train_dataloader))
    song_ids = batch_sample['song_id'].to(hyperparams.get_device())
    print("song_ids: ", song_ids.shape)
    features = batch_sample['feature'].to(hyperparams.get_device())
    print("features: ", features.shape)
    target   = batch_sample['label'].to(hyperparams.get_device())
    print("target: ", target.shape)
    # Set the model to evaluation mode
    with torch.no_grad():
        if inference:
            output = model(song_ids, features, None, False)
        else:
            output = model(song_ids, features, target)
        return output.shape #, output
testing_model(inference=True)

torch.Size([79, 1, 20])
torch.Size([79, 7, 20])
torch.Size([79, 1, 6])


torch.Size([79, 6])

# Loss function, Optimizer, Scheduler

In [26]:
class NDCG_Score(torch.nn.Module):
    def __init__(self, sigma=1.0):
        super(NDCG_Score, self).__init__()
        self.sigma = sigma
    def forward(self, predictions, labels):
        # Calculate nDCG loss
        # Compare predictions and labels element-wise
        gain = (predictions != labels).float()
        weightage = torch.tensor([1.0, 0.63, 0.5, 0.43, 0.38], requires_grad=True).float()
        return sum(gain @ weightage)

class listNetLoss(torch.nn.Module):
    def __init__(self, eps=1e-10, padded_value_indicator=-1):
        super(listNetLoss, self).__init__()
        self.eps = eps
        self.padded_value_indicator = padded_value_indicator
        self.softmax = torch.nn.Softmax(dim=1)
    def forward(self, y_pred, y_true):
        """
        ListNet loss introduced in "Learning to Rank: From Pairwise Approach to Listwise Approach".
        :param y_pred: predictions from the model, shape [batch_size, slate_length]
        :param y_true: ground truth labels, shape [batch_size, slate_length]
        :param eps: epsilon value, used for numerical stability
        :param padded_value_indicator: an indicator of the y_true index containing a padded item, e.g. -1
        :return: loss value, a torch.Tensor
        """
        y_pred = y_pred.float()
        y_true = y_true.float()
        y_pred = y_pred.detach().requires_grad_(True)
        y_pred,_ = torch.sort(y_pred, dim=1)
        y_true,_ = torch.sort(y_true, dim=1)
        preds_smax = self.softmax(y_pred) + self.eps
        true_smax  = self.softmax(y_true)
        preds_smax = preds_smax 
        preds_log = torch.log(preds_smax)
        return torch.mean(-torch.sum(true_smax * preds_log, dim=1))

def test_Loss(loss_fn=listNetLoss()):
    # Example usage:
    predictions = torch.tensor(
        [[     0, 627674, 217020, 131695, 131695, 131695],
         [     0,  43503, 502994, 472149, 639739, 585053],
         [     0,  43503, 169674, 169674, 217020, 585053],
         [     0, 696212, 231735, 231735, 272798, 272798],
         [     0,  43503, 512256, 512256, 667592, 137733],
         [     0,  43503, 231735, 169674, 169674, 169674],
         [     0,  43503, 144857, 667592, 667592, 137733],
         [     0,  43503, 217020, 585053, 667592, 585053],
         [     0,  43503, 231735, 217020, 667592, 634661],
         [     0,  43503, 144857, 355345, 137733, 137733],
         [     0,  43503, 169674, 169674, 169674, 169674],
         [     0,  43503, 231735, 512256, 639739, 169674],
         [     0, 696212, 231735, 231735, 231735, 231232],
         [     0,  43503, 231735, 217020, 169674, 639739],
         [     0,  43503, 562567, 169674, 634661, 634661],
         [     0,  43503, 217020, 667592, 667592, 634661]]).float() # Example predicted scores
    labels = torch.tensor(
        [[     0, 627674, 217020, 131695, 131695, 131695],
         [     0,  43503, 502994, 472149, 639739, 585053],
         [     0,  43503, 169674, 169674, 217020, 585053],
         [     0, 696212, 231735, 231735, 272798, 272798],
         [     0,  43503, 512256, 512256, 667592, 137733],
         [     0,  43503, 2, 3, 169674, 169674],
         [     0,  43503, 144857, 667592, 667592, 137733],
         [     0,  43503, 217020, 585053, 667592, 585053],
         [     0,  43503, 231735, 217020, 667592, 634661],
         [     0,  43503, 144857, 355345, 137733, 137733],
         [     0,  43503, 169674, 169674, 5, 169674],
         [     0,  43503, 231735, 512256, 639739, 169674],
         [     0, 696212, 231735, 4, 231735, 231232],
         [     0,  43503, 231735, 217020, 169674, 639739],
         [     0,  43503, 562567, 169674, 634661, 634661],
         [     0,  43503, 217020, 667592, 667592, 634661]]).float() # Example true relevance scores 
    # Define nDCG loss criterion
    criterion = loss_fn
    # Calculate nDCG loss
    loss = criterion(predictions[:,1:].detach().requires_grad_(True), labels[:,1:])
    loss.backward()
    print(f"Loss: {loss.item()}")
    
test_Loss()

Loss: 0.9361662864685059


In [27]:
loss_fn = listNetLoss()
# loss_fn = nn.MSELoss()
# loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.AdamW(model.parameters(), lr=hyperparams.get_learning_rate())
total_steps = len(train_dataloader) * hyperparams.get_epochs()
scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

# Training

In [29]:
def train(model, data_loader, optimizer, scheduler, device, num_classes=None):
    model.train()
    total_loss = 0.0
    for batch in tqdm(data_loader):
        optimizer.zero_grad()
        song_ids = batch['song_id'].to(device)
        features = batch['feature'].to(device)
        target   = batch['label'].squeeze(1).to(device)
        outputs = model(song_ids, features, target)
        loss = loss_fn(outputs[:,1:].cpu(), target[:,1:].cpu())
        # loss = loss_fn(
        #      outputs.cpu(),
        #      torch.tensor(torch.eye(num_classes)[target.cpu()]).transpose(1,2)
        #     )
        total_loss+=loss.item()
        loss.backward()
        optimizer.step()
        scheduler.step()
    return total_loss

def evaluate(model, data_loader, device, num_classes=None):
    model.eval()
    total_loss = 0.0
    with torch.no_grad():
        for batch in tqdm(data_loader):
            song_ids = batch['song_id'].to(device)
            features = batch['feature'].to(device)
            target   = batch['label'].squeeze(1).to(device)
            outputs = model(song_ids, features, target)
            loss = loss_fn(outputs[:,1:], target[:,1:])
            # loss = loss_fn(
            #      outputs.cpu(),
            #      torch.tensor(torch.eye(num_classes)[target.cpu()]).transpose(1,2)
            # )
            total_loss+=loss.item()
    return total_loss

def training_Process():
    train_losses, valid_losses = [], []
    num_class = len(ID_IDX)
    print("\nStart Training:")
    for epoch in range(hyperparams.get_epochs()):
        print(f"Epoch {epoch + 1}/{hyperparams.get_epochs()}")
        train_loss = train(model, train_dataloader, optimizer, scheduler, hyperparams.get_device(), num_class)
        valid_loss = evaluate(model, val_dataloader, hyperparams.get_device(), num_class)
        train_losses.append(train_loss)
        valid_losses.append(valid_loss)
        print(f"Training loss   : {train_loss:.4f}")
        print(f"Validation loss : {valid_loss:.4f}")
    return train_losses, valid_losses

train_losses, valid_losses = training_Process()


Start Training:
Epoch 1/1


100%|█████████▉| 894/895 [1:19:00<00:05,  5.30s/it]


RuntimeError: shape '[512, 20, -1]' is invalid for input of size 11060

In [None]:
def save_model(model, folder_path, valid_losses):
    # Create the folder if it doesn't exist
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    # Save the model state dict
    mean_valid_loss = sum(valid_losses) / len(valid_losses)
    torch.save(model.state_dict(), os.path.join(folder_path, f'model_loss_{mean_valid_loss:.4f}.pth'))

def load_model(model, model_path):
    # Load the model state dict
    model.load_state_dict(torch.load(model_path))
    # Set the model to evaluation mode after loading
    model.eval()  
    return model

# save model
save_model(model, hyperparams.get_save_dir(), valid_losses)

# Plot the training loss

In [None]:
 # Plotting
plt.figure(figsize=(8, 6))
plt.plot(train_losses, label='Training Loss')
plt.plot(valid_losses, label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Training and Validation Losses')
plt.legend()
plt.show()

# Generate Submittion

In [None]:
# compute coverage
# torch.unique(torch.tensor([[0, 1, 2, 3, 4, 5],[6, 7, 8, 9, 10, 11]]))  
# --> tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [None]:
def generate_prediction(model, test_dataloader, device, batch=False):
    if batch:
        batch = next(iter(test_dataloader))
        session_id = batch['session_id']
        song_ids = batch['song_id'].to(device)
        features = batch['feature'].to(device)
        outputs = model(song_ids, features, target_song_ids=None, use_teacher_forcing=False)
        return session_id, outputs[:,1:]
    else:
        total_session_ids = torch.zeros(len(test_dataloader)*hyperparams.get_batch_size())
        total_outputs = torch.zeros(len(test_dataloader)*hyperparams.get_batch_size(), 5)
        # batch = next(iter(test_dataloader))
        for idx, batch in tqdm(enumerate(test_dataloader)):
            session_id = batch['session_id']
            song_ids = batch['song_id'].to(device)
            features = batch['feature'].to(device)
            outputs = model(song_ids, features, target_song_ids=None, use_teacher_forcing=False, prediction=True)
            total_session_ids[hyperparams.get_batch_size()*idx:hyperparams.get_batch_size()*(idx+1)] = session_id
            total_outputs[hyperparams.get_batch_size()*idx:hyperparams.get_batch_size()*(idx+1), :] = outputs[:,1:]
        return total_session_ids, total_outputs

total_session_ids, total_outputs = generate_prediction(model, test_dataloader, hyperparams.get_device(), batch=True)

In [None]:
final_submittion_df = pd.DataFrame({
    "session_id":total_session_ids.numpy(),
    "top1":total_outputs[:,0].numpy(),
    "top2":total_outputs[:,1].numpy(),
    "top3":total_outputs[:,2].numpy(),
    "top4":total_outputs[:,3].numpy(),
    "top5":total_outputs[:,4].numpy()
})
# convert IDs back 
IDX_ID = {v: k for k, v in ID_IDX.items()}
final_submittion_df['top1'] = final_submittion_df['top1'].map(IDX_ID)
final_submittion_df['top2'] = final_submittion_df['top2'].map(IDX_ID)
final_submittion_df['top3'] = final_submittion_df['top3'].map(IDX_ID)
final_submittion_df['top4'] = final_submittion_df['top4'].map(IDX_ID)
final_submittion_df['top5'] = final_submittion_df['top5'].map(IDX_ID)
# save the final submission
# Get today's date
current_time = datetime.now().strftime("%Y-%m-%d-%H-%M")
save_folder = "../submission/"
file_name = f'{current_time}.csv'
final_submittion_df.to_csv(save_folder+file_name, index=False)