In [182]:
# Standard libraries
import math
import os
import urllib.request
from functools import partial
from urllib.error import HTTPError

# Plotting
import matplotlib
import matplotlib.pyplot as plt
import matplotlib_inline.backend_inline
import numpy as np

# PyTorch Lightning
import pytorch_lightning as pl
import seaborn as sns

# PyTorch
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.utils.data as data

# Torchvision
import torchvision
from pytorch_lightning.callbacks import ModelCheckpoint
from torchvision import transforms
from torchvision.datasets import CIFAR100
from tqdm.notebook import tqdm

plt.set_cmap("cividis")
%matplotlib inline
matplotlib_inline.backend_inline.set_matplotlib_formats("svg", "pdf")  # For export
matplotlib.rcParams["lines.linewidth"] = 2.0
sns.reset_orig()

# Setting the seed
pl.seed_everything(42)

# Ensure that all operations are deterministic on GPU (if used) for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

device = None
if torch.cuda.is_available():
    device = torch.device("cuda:0")
elif torch.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")
    
print("Device:", device)

Seed set to 42


Device: mps


<Figure size 640x480 with 0 Axes>

In [183]:
class PositionalEncoding(nn.Module):
    def __init__(self, d_model: int, seq_len: int, dropout: float) -> None:
        super().__init__()

        self.d_model = d_model
        self.seq_len = seq_len
        self.dropout = nn.Dropout(dropout)

        pe = torch.zeros(seq_len, d_model) # (seq_len, d_model)
        position = torch.arange(0, seq_len, dtype=torch.float).unsqueeze(1) # (seq_len, 1)
        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) # (d_model / 2)
        pe[:, 0::2] = torch.sin(position * div_term) # sin(position * (10000 ** (2i / d_model)) # (seq_len, d_model)
        pe[:, 1::2] = torch.cos(position * div_term) # cos(position * (10000 ** (2i / d_model)) # (seq_len, d_model)
        pe = pe.unsqueeze(0) # (1, seq_len, d_model)
        self.register_buffer('pe', pe, persistent=False)

    def forward(self, x:torch.Tensor):
        x = x + (self.pe[:, :x.shape[1], :]).requires_grad_(False) # (batch, seq_len, d_model)   
        return self.dropout(x)

In [184]:
def attention(q:torch.Tensor, k:torch.Tensor, v:torch.Tensor, mask=None):
    d_k = q.size()[-1] # q,k,v : (batch, head, seq_len, embed_size_per_head)
    attn_logits = torch.matmul(q, k.transpose(-2, -1)) # (batch, head, seq_len, seq_len)
    attn_logits = attn_logits / math.sqrt(d_k)
    if mask is not None:
        attn_logits = attn_logits.masked_fill(mask == 0, -9e15)
    attention = F.softmax(attn_logits, dim=-1)
    values = torch.matmul(attention, v) # (batch, head, seq_len, embed_size_per_head)
    return values, attention

def init_weights(x:nn.Linear):
    with torch.no_grad():
        nn.init.xavier_uniform_(x.weight)
        x.bias.data.fill_(0)

class MultiHeadAttentionBlock(nn.Module):
    def __init__(self, input_dim:int, d_model: int, h: int) -> None:
        super().__init__()
        self.d_model = d_model
        self.h = h

        assert d_model % h == 0, "d_model is not divisible by h"

        self.d_k = d_model // h

        self.w_q = nn.Linear(input_dim, d_model) # Wq
        self.w_k = nn.Linear(input_dim, d_model) # Wk
        self.w_v = nn.Linear(input_dim, d_model) # Wv
        self.w_o = nn.Linear(d_model, d_model) # Wo

        init_weights(self.w_q)
        init_weights(self.w_k)
        init_weights(self.w_v)
        init_weights(self.w_o)

    def forward(self, q_x:torch.Tensor, k_x:torch.Tensor, v_x:torch.Tensor, mask=None):
        q:torch.Tensor = self.w_q(q_x) # (batch, seq_len, d_model)
        k:torch.Tensor = self.w_k(k_x) # (batch, seq_len, d_model)
        v:torch.Tensor = self.w_v(v_x) # (batch, seq_len, d_model)

        q_h = q.reshape(q.shape[0], q.shape[1], self.h, self.d_k).transpose(1, 2) # (batch, head, seq_len, d_k)
        k_h = k.reshape(k.shape[0], k.shape[1], self.h, self.d_k).transpose(1, 2) # (batch, head, seq_len, d_k)
        v_h = v.reshape(v.shape[0], v.shape[1], self.h, self.d_k).transpose(1, 2) # (batch, head, seq_len, d_k)

        attn_out, _ = attention(q_h, k_h, v_h, mask) # (batch, head, seq_len, embed_size_per_head)
        attn_out = attn_out.transpose(1, 2) # (batch, seq_len, head, embed_size_per_head)
        attn_out = attn_out.reshape(attn_out.shape[0], attn_out.shape[1], attn_out.shape[2]*attn_out.shape[3]) # (batch, seq_len, d_model)

        return self.w_o(attn_out) # (batch, seq_len, d_model)

In [185]:
class EncoderBlock(nn.Module):
    def __init__(self, input_dim, num_heads, dim_feedforward, dropout=0.0):
        super().__init__()

        self.self_attn = MultiHeadAttentionBlock(input_dim, input_dim, num_heads)

        self.ffn_1 = nn.Linear(input_dim, dim_feedforward)
        self.ffn_2 = nn.Linear(dim_feedforward, input_dim)

        init_weights(self.ffn_1)
        init_weights(self.ffn_2)

        self.ffn = nn.Sequential(
            self.ffn_1,
            nn.Dropout(dropout),
            nn.GELU(),
            self.ffn_2,
        )

        self.norm1 = nn.LayerNorm(input_dim)
        self.norm2 = nn.LayerNorm(input_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, mask=None):
        attn_out = self.self_attn(x, x, x, mask=mask) # (batch, seq_len, input_dim)
        x = x + self.dropout(attn_out) # (batch, seq_len, input_dim)
        x = self.norm1(x) # (batch, seq_len, input_dim)

        ffn_out = self.ffn(x) # (batch, seq_len, input_dim)
        x = x + self.dropout(ffn_out) # (batch, seq_len, input_dim)
        x = self.norm2(x) # (batch, seq_len, input_dim)

        return x

In [186]:
class Encoder(nn.Module):
    def __init__(self, num_layers, d_model, num_heads, dim_feedforward, dropout=0.0):
        super().__init__()
        self.layers = nn.ModuleList([EncoderBlock(d_model, num_heads, dim_feedforward, dropout) for _ in range(num_layers)])

    def forward(self, x, mask=None):
        for layer in self.layers:
            x = layer(x, mask=mask)
        return x

In [187]:
class DecoderBlock(nn.Module):
    def __init__(self, input_dim, num_heads, dim_feedforward, dropout=0.0)->None:
        super().__init__()

        self.self_attn = MultiHeadAttentionBlock(input_dim, input_dim, num_heads)
        self.crss_attn = MultiHeadAttentionBlock(input_dim, input_dim, num_heads)

        self.ffn_1 = nn.Linear(input_dim, dim_feedforward)
        self.ffn_2 = nn.Linear(dim_feedforward, input_dim)

        init_weights(self.ffn_1)
        init_weights(self.ffn_2)
        
        self.ffn = nn.Sequential(
            self.ffn_1,
            nn.Dropout(dropout),
            nn.GELU(),
            self.ffn_2,
        )

        self.norm1 = nn.LayerNorm(input_dim)
        self.norm2 = nn.LayerNorm(input_dim)
        self.norm3 = nn.LayerNorm(input_dim)
        
        self.dropout = nn.Dropout(dropout)

    def forward(self, x, encoder_output, pred_mask, pad_mask):
        self_attn_out = self.self_attn(x, x, x, mask=pred_mask) # (batch, seq_len, input_dim)
        x = x + self.dropout(self_attn_out) # (batch, seq_len, input_dim)
        x = self.norm1(x) # (batch, seq_len, input_dim)

        crss_attn_out = self.crss_attn(x, encoder_output, encoder_output, mask=pad_mask) # (batch, seq_len, input_dim)
        x = x + self.dropout(crss_attn_out) # (batch, seq_len, input_dim)
        x = self.norm2(x) # (batch, seq_len, input_dim)

        ffn_out = self.ffn(x) # (batch, seq_len, input_dim)
        x = x + self.dropout(ffn_out) # (batch, seq_len, input_dim)
        x = self.norm3(x) # (batch, seq_len, input_dim)

        return x

In [188]:
class Decoder(nn.Module):
    def __init__(self, num_layers, d_model, num_heads, dim_feedforward, dropout):
        super().__init__()
        self.layers = nn.ModuleList([DecoderBlock(d_model, num_heads, dim_feedforward, dropout) for _ in range(num_layers)])

    def forward(self, x, encoder_output, pred_mask=None, pad_mask=None):
        for layer in self.layers:
            x = layer(x, encoder_output, pred_mask, pad_mask)
        return x

In [254]:
class MovieEncoder(nn.Module):
    def __init__(
            self, 
            movie_vocab_size, 
            genres_vocab_size, 
            years_vocab_size, 
            embedding_size, 
            dropout=0.0
        ) -> None:
        
        super(MovieEncoder, self).__init__()
        
        self.movie_embedding_layer = nn.Embedding(movie_vocab_size, embedding_size)
        self.years_embedding_layer = nn.Embedding(years_vocab_size, 32)

        self.genres_encoder_layer = nn.Linear(genres_vocab_size, 4)
        init_weights(self.genres_encoder_layer)

        self.fc_concat = nn.Linear(embedding_size + 36, embedding_size)
        init_weights(self.fc_concat)

        self.fc = nn.Sequential(
            self.fc_concat,
            nn.GELU()
        )

        self.dropout = nn.Dropout(dropout)

    def forward(self, movies, genres, years):
        movie_embedding = self.movie_embedding_layer(movies)
        genres_embedding = self.genres_encoder_layer(genres)
        years_embedding = self.years_embedding_layer(years)

        movie_embedding = torch.concat([movie_embedding, genres_embedding, years_embedding], dim=-1)
        movie_embedding = self.fc(movie_embedding)
        movie_embedding = self.dropout(movie_embedding)

        return movie_embedding

In [255]:
class UserEncoder(nn.Module):
    def __init__(
            self, 
            user_vocab_size, 
            movie_vocab_size, 
            genres_vocab_size, 
            years_vocab_size, 
            embedding_size, 
            movie_seq_len, 
            num_encoder_layers, 
            num_heads, 
            dim_ff,
            dropout=0.0
        ) -> None:

        super(UserEncoder, self).__init__()
        
        self.embedding_size = embedding_size
        self.movie_seq_len = movie_seq_len
        self.num_encoder_layers = num_encoder_layers
        self.num_heads = num_heads
        self.dim_ff = dim_ff

        self.user_embedding_layer = nn.Embedding(user_vocab_size, embedding_size)
        self.movie_encoder = MovieEncoder(movie_vocab_size, genres_vocab_size, years_vocab_size, embedding_size, dropout)

        self.positional_encoding = PositionalEncoding(embedding_size, movie_seq_len, dropout)
        self.encoder_block = Encoder(num_encoder_layers, embedding_size, num_heads, dim_ff, dropout)

        self.fc_concat = nn.Linear(2*embedding_size, embedding_size)
        init_weights(self.fc_concat)

        self.fc = nn.Sequential(
            self.fc_concat,
            nn.GELU()
        )

        self.dropout = nn.Dropout(dropout)


    def forward(
            self, 
            user_ids, 
            rated_movie_ids, 
            rated_movie_genres, 
            rated_movie_years, 
            rated_movie_ratings):
        
        user_embedding = self.user_embedding_layer(user_ids)
        
        movie_embeddings = self.movie_encoder(rated_movie_ids, rated_movie_genres, rated_movie_years)
        movie_embeddings = self.positional_encoding(movie_embeddings)
        movie_embeddings = self.encoder_block(movie_embeddings, None)

        rated_movie_ratings = F.softmax(rated_movie_ratings, dim=-1)
        rated_movie_ratings = rated_movie_ratings.unsqueeze(1)
        movie_embeddings = torch.matmul(rated_movie_ratings, movie_embeddings)

        user_embedding = torch.concat([user_embedding, movie_embeddings], dim=-1)
        user_embedding = self.fc(user_embedding)
        user_embedding = self.dropout(user_embedding)

        return user_embedding

In [347]:
class RecommenderSystem(nn.Module):
    def __init__(
            self, 
            user_vocab_size, 
            movie_vocab_size, 
            genres_vocab_size, 
            years_vocab_size, 
            embedding_size, 
            movie_seq_len, 
            num_encoder_layers, 
            num_heads, 
            dim_ff,
            dropout=0.0
        ) -> None:

        super(RecommenderSystem, self).__init__()

        self.embedding_size = embedding_size
        self.movie_seq_len = movie_seq_len
        self.num_encoder_layers = num_encoder_layers
        self.num_heads = num_heads
        self.dim_ff = dim_ff

        self.movie_encoder = \
            MovieEncoder\
            (
                movie_vocab_size, 
                genres_vocab_size, 
                years_vocab_size, 
                embedding_size, 
                dropout
            )
        
        self.user_encoder = \
            UserEncoder\
            (
                user_vocab_size, 
                movie_vocab_size, 
                genres_vocab_size, 
                years_vocab_size, 
                embedding_size, 
                movie_seq_len, 
                num_encoder_layers, 
                num_heads, 
                dim_ff,
                dropout
            )

        self.fc_concat = nn.Linear(2*embedding_size, embedding_size)
        init_weights(self.fc_concat)

        self.fc = nn.Sequential(
            self.fc_concat,
            nn.GELU()
        )

        self.fc_ratings_linear = nn.Linear(embedding_size, 1)
        init_weights(self.fc_ratings_linear)

        self.fc_ratings = nn.Sequential(
            self.fc_ratings_linear,
            nn.ReLU(inplace=True)
        )

        self.dropout = nn.Dropout(dropout)


    def forward(
            self, 
            user_ids, 
            movie_ids, 
            rated_movie_ids, 
            rated_movie_genres, 
            rated_movie_years, 
            rated_movie_ratings, 
            movie_genres, 
            movie_years):
        
        movie_embeddings = \
            self.movie_encoder(movie_ids, movie_genres, movie_years)
        
        user_embeddings = \
            self.user_encoder\
                (
                    user_ids, 
                    rated_movie_ids, 
                    rated_movie_genres, 
                    rated_movie_years, 
                    rated_movie_ratings
                )

        encoded = torch.concat([user_embeddings, movie_embeddings], dim=-1)
        encoded = self.fc(encoded)
        encoded = self.dropout(encoded)
        output = self.fc_ratings(encoded)
        output = torch.clamp(output, min=0.0, max=5.0).squeeze(1)

        return output

In [123]:
user_vocab_size = 100
movie_vocab_size = 200
genres_vocab_size = 10
years_vocab_size = 50
embedding_size = 128
movie_seq_len = 20
num_encoder_layers = 1
num_heads = 1
dropout = 0.0
batch_size = 128

rec = RecommenderSystem(user_vocab_size, movie_vocab_size, genres_vocab_size, years_vocab_size, embedding_size, movie_seq_len, num_encoder_layers, num_heads, dropout)

user_ids = torch.randint(0, user_vocab_size-1, (batch_size, 1))
movie_ids = torch.randint(0, movie_vocab_size-1, (batch_size, 1))
rated_movie_ids = torch.randint(0, movie_vocab_size-1, (batch_size, movie_seq_len))
rated_movie_genres = torch.randint(0, 1, (batch_size, movie_seq_len, genres_vocab_size)).to(dtype=torch.float32)
rated_movie_years = torch.randint(0, years_vocab_size-1, (batch_size, movie_seq_len))
rated_movie_ratings = torch.randn(batch_size, movie_seq_len)
movie_genres = torch.randint(0, 1, (batch_size, 1, genres_vocab_size)).to(dtype=torch.float32)
movie_years = torch.randint(0, years_vocab_size-1, (batch_size, 1))

out = rec(user_ids, movie_ids, rated_movie_ids, rated_movie_genres, rated_movie_years, rated_movie_ratings, movie_genres, movie_years)


torch.Size([128, 1, 128])
torch.Size([128, 1, 128])


In [None]:
class Transformer(nn.Module):
    def __init__(self, user_vocab_size, movie_vocab_size, genres_vocab_size, years_vocab_size, src_seq_len, tgt_seq_len, d_model, num_heads, dim_feedforward, num_encoder_layers, num_decoder_layers, dropout=0.0) -> None:
        super(Transformer, self).__init__()

        self.user_embedding = nn.Embedding(user_vocab_size, d_model)
        self.years_embedding = nn.Embedding(years_vocab_size, d_model)
        self.movie_embedding = nn.Embedding(movie_vocab_size, d_model)

        self.src_positional_encoding = PositionalEncoding(d_model, src_seq_len, dropout)
        self.tgt_positional_encoding = PositionalEncoding(d_model, tgt_seq_len, dropout)

        self.encoder_block = Encoder(num_encoder_layers, d_model, num_heads, dim_feedforward, dropout)
        self.decoder_block = Decoder(num_decoder_layers, d_model, num_heads, dim_feedforward, dropout)

        self.genres_encoder = nn.Linear(genres_vocab_size, d_model)
        init_weights(self.genres_encoder)

        self.fc_hidden = nn.Linear(4*d_model, d_model)
        init_weights(self.fc_hidden)

        self.fc = nn.Linear(d_model, movie_vocab_size)
        init_weights(self.fc)

        self.dropout = nn.Dropout(dropout)
        self.softmax = nn.Softmax(dim=-1)       


    def generate_mask(self, src:torch.Tensor, tgt:torch.Tensor):
        src_mask = (src != 0).unsqueeze(1).unsqueeze(2) # (batch, 1, 1, seq_len)
        tgt_mask = (tgt != 0).unsqueeze(1).unsqueeze(3) # (batch, 1, seq_len, 1)
        seq_length = tgt.size(1)
        nopeak_mask = (1 - torch.triu(torch.ones(1, seq_length, seq_length), diagonal=1)).bool().to(device=device) # (1, seq_len, seq_len)
        tgt_mask = tgt_mask & nopeak_mask # (batch, 1, seq_len, seq_len)
        return src_mask, tgt_mask
    

    def forward(
            self, 
            user_ids_src:torch.Tensor, 
            user_ids_tgt:torch.Tensor, 
            genres_src:torch.Tensor, 
            genres_tgt:torch.Tensor, 
            years_src:torch.Tensor,
            years_tgt:torch.Tensor,
            ratings_src:torch.Tensor, 
            ratings_tgt:torch.Tensor,
            src_movie_ids:torch.Tensor, 
            tgt_movie_ids:torch.Tensor):
        
        src_mask, tgt_mask = self.generate_mask(src_movie_ids, tgt_movie_ids)


        src_movie_embedding = self.movie_embedding(src_movie_ids) # (batch, seq_len, d_model)
        

        src_movie_embedding = self.movie_embedding(src_movie_ids) # (batch, seq_len, d_model)
        tgt_movie_embedding = self.movie_embedding(tgt_movie_ids) # (batch, seq_len, d_model)

        src_genres_embedding = self.genres_encoder(genres_src)
        tgt_genres_embedding = self.genres_encoder(genres_tgt)

        src_years_embedding = self.years_embedding(years_src)
        tgt_years_embedding = self.years_embedding(years_tgt)

        src_user_embedding = self.user_embedding(user_ids_src) # (batch, seq_len, d_model)
        tgt_user_embedding = self.user_embedding(user_ids_tgt) # (batch, seq_len, d_model)

        src_movie_embedding = torch.concat([src_movie_embedding, src_user_embedding, src_years_embedding, src_genres_embedding], dim=2) # (batch, seq_len, 5*d_model)
        src_movie_embedding = self.fc_hidden(src_movie_embedding) # (batch, seq_len, d_model)

        tgt_movie_embedding = torch.concat([tgt_movie_embedding, tgt_user_embedding, tgt_years_embedding, tgt_genres_embedding], dim=2) # (batch, seq_len, 5*d_model)
        tgt_movie_embedding = self.fc_hidden(tgt_movie_embedding) # (batch, seq_len, d_model)

        src_movie_embedding = self.src_positional_encoding(src_movie_embedding) # (batch, seq_len, d_model)
        tgt_movie_embedding = self.tgt_positional_encoding(tgt_movie_embedding) # (batch, seq_len, d_model)

        src_movie_embedding = self.dropout(src_movie_embedding) # (batch, seq_len, d_model)
        tgt_movie_embedding = self.dropout(tgt_movie_embedding) # (batch, seq_len, d_model)

        enc_output = self.encoder_block(src_movie_embedding, ratings_src, src_mask) # (batch, seq_len, d_model)
        dec_output = self.decoder_block(tgt_movie_embedding, ratings_src, ratings_tgt, enc_output, tgt_mask, src_mask) # (batch, seq_len,d_model)

        return self.fc(dec_output) # (batch, seq_len, tgt_vocab_size)

In [325]:
class CosineWarmupScheduler(optim.lr_scheduler._LRScheduler):
    def __init__(self, optimizer, warmup, max_iters):
        self.warmup = warmup
        self.max_num_iters = max_iters
        super().__init__(optimizer)

    def get_lr(self):
        lr_factor = self.get_lr_factor(epoch=self.last_epoch)
        return [base_lr * lr_factor for base_lr in self.base_lrs]

    def get_lr_factor(self, epoch):
        lr_factor = 0.5 * (1 + np.cos(np.pi * epoch / self.max_num_iters))
        if epoch <= self.warmup:
            lr_factor *= epoch * 1.0 / self.warmup
        return lr_factor

In [3]:
a = torch.randint(0, 99, (5, 4))

In [11]:
a

tensor([[ 6, 95, 97, 58],
        [90, 65, 25, 77],
        [85,  2, 67, 76],
        [80, 81, 90, 22],
        [92, 58, 39, 31]])

In [5]:
b = torch.zeros(5, 4, 100)

In [13]:
b = F.one_hot(a, num_classes=100)

In [14]:
b[0]

tensor([[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
         0, 0, 0, 0],
        [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

In [19]:
y_pred = torch.randn(128, 20, 100)
y_true = torch.randint(0, 99, (128, 20))
ratings = torch.randint(0, 49, (128, 20))/10.0

In [20]:
y_pred = F.softmax(y_pred, dim=-1)
y_pred_mask = (y_pred > 0)
y_pred = y_pred.where(y_pred_mask, 1.0e-5)

In [22]:
y_true = F.one_hot(y_true, num_classes=y_pred.shape[2])

In [23]:
ratings = F.softmax(ratings, dim=-1)

In [None]:
(((-y_true*torch.log(y_pred)).sum(dim=-1)/y_pred.shape[2])*ratings).sum()/y_pred.shape[0]

tensor(5.1400)

In [312]:
class CustomLoss(nn.Module):
    def __init__(self):
        super(CustomLoss, self).__init__()

    def forward(self, y_pred:torch.Tensor, y_true:torch.Tensor):
        y_pred = F.softmax(y_pred, dim=-1)
        y_pred_mask = (y_pred > 0)
        y_pred = y_pred.where(y_pred_mask, 1.0e-15)

        y_true = F.one_hot(y_true, num_classes=y_pred.shape[2])

        # ratings = F.softmax(ratings, dim=-1)
        # return ((-y_true*torch.log(y_pred)).sum(dim=-1)*ratings).sum()/y_pred.shape[0]
        return (-y_true*torch.log(y_pred)).sum()/(y_pred.shape[0]*y_pred.shape[1])

In [None]:
class CustomMSELoss(nn.Module):
    def __init__(self):
        super(CustomLoss, self).__init__()

    def forward(self, y_pred:torch.Tensor, y_true:torch.Tensor, ratings:torch.Tensor):
        y_pred = F.softmax(y_pred, dim=-1)
        y_pred_mask = (y_pred > 0)
        y_pred = y_pred.where(y_pred_mask, 1.0e-15)

        y_true = F.one_hot(y_true, num_classes=y_pred.shape[2])

        ratings = F.softmax(ratings, dim=-1)
        return ((-y_true*torch.log(y_pred)).sum(dim=-1)*ratings).sum()/y_pred.shape[0]
        # return (-y_true*torch.log(y_pred)).sum()/(y_pred.shape[0]*y_pred.shape[1])

In [326]:
import pandas as pd

ratings_path = '/Users/amondal/recsys/datasets/ml-32m/ratings.csv'
genres_path = '/Users/amondal/recsys/datasets/ml-32m/movies.csv'

rating_column_names = ['userId', 'movieId', 'rating', 'timestamp']
genres_column_names = ['movieId', 'title', 'genres']

df_rating = pd.read_csv(ratings_path, sep=',', names=rating_column_names, dtype={'userId':'int32', 'movieId':'int32', 'rating':float, 'timestamp':'int64'}, header=0)
df_genres = pd.read_csv(genres_path, sep=',', names=genres_column_names, dtype={'movieId':'int32', 'title':'object', 'genres':'object'}, header=0)

df_rating.dropna(inplace=True, subset=['userId', 'movieId', 'rating'])
df_genres.dropna(inplace=True, subset=['movieId', 'title', 'genres'])

df_genres['genres'] = df_genres['genres'].apply(lambda x: x.split('|'))
df_genres['movie_year'] = df_genres['title'].str.extract(r'\((\d{4})\)').fillna("2025").astype('int')
df_genres.drop(columns=['title'], inplace=True)

df = df_rating.merge(df_genres, on=['movieId'], how='left')

In [284]:
df

Unnamed: 0,userId,movieId,rating,timestamp,rating_class,genres,movie_year
0,1,17,4.0,944249077,8,"[Drama, Romance]",1995
1,1,25,1.0,944250228,2,"[Drama, Romance]",1995
2,1,29,2.0,943230976,4,"[Adventure, Drama, Fantasy, Mystery, Sci-Fi]",1995
3,1,30,5.0,944249077,10,"[Crime, Drama]",1995
4,1,32,5.0,943228858,10,"[Mystery, Sci-Fi, Thriller]",1995
...,...,...,...,...,...,...,...
32000199,200948,79702,4.5,1294412589,9,"[Action, Comedy, Fantasy, Musical, Romance]",2010
32000200,200948,79796,1.0,1287216292,2,"[Action, Adventure, Drama, Thriller, War]",2010
32000201,200948,80350,0.5,1294412671,1,[Comedy],2010
32000202,200948,80463,3.5,1350423800,7,[Drama],2010


In [327]:
all_genres = df['genres'].tolist()

genres_set = set()
for x in all_genres:
    genres_set.update(set(x))

genres_set = list(genres_set)
inv_idx = {genres_set[i]:i for i in range(len(genres_set))}

genres_mh = []
for x in all_genres:
    h = [0]*len(genres_set)
    for y in x:
        h[inv_idx[y]] = 1
    genres_mh += [h]

df['genres_mh'] = genres_mh
df.drop(columns=['genres'], inplace=True)

In [208]:
df

Unnamed: 0,userId,movieId,rating,timestamp,movie_year,genres_mh
0,1,17,4.0,944249077,1995,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
1,1,25,1.0,944250228,1995,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
2,1,29,2.0,943230976,1995,"[0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, ..."
3,1,30,5.0,944249077,1995,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
4,1,32,5.0,943228858,1995,"[0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ..."
...,...,...,...,...,...,...
32000199,200948,79702,4.5,1294412589,2010,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, ..."
32000200,200948,79796,1.0,1287216292,2010,"[0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, ..."
32000201,200948,80350,0.5,1294412671,2010,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ..."
32000202,200948,80463,3.5,1350423800,2010,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."


In [209]:
# import pandas as pd
# import os

# path = '/Users/amondal/recsys/datasets/ml-1m/ratings.dat'
# user_ids, movie_ids, ratings, timestamps = [], [], [], []
# with open(path) as f:
#     data = f.readlines()
#     data = [x.rstrip().split('::') for x in data]
#     for x in data:
#         user_ids += [int(x[0])]
#         movie_ids += [int(x[1])]
#         ratings += [float(x[2])]
#         timestamps += [int(x[3])]

# df = pd.DataFrame(data={'userId':user_ids, 'movieId':movie_ids, 'rating':ratings, 'timestamp':timestamps})

In [328]:
df = df.sort_values(by='timestamp')
df2 = df[["userId", "movieId"]].groupby(by=["userId"]).agg(list).reset_index()
df2 = df2[df2.movieId.apply(len) > 10]
df = df.merge(df2, on=["userId"], how="inner", suffixes=("", "_right"))
df.drop(columns=['movieId_right'], inplace=True)

n = df.shape[0]
m = int(0.8*n)

df_train = df[:m]
df_test = df[m:]

In [287]:
df_train

Unnamed: 0,userId,movieId,rating,timestamp,rating_class,movie_year,genres_mh
0,25062,1176,4.0,789652004,8,1991,"[1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, ..."
1,30917,1079,3.0,789652009,6,1988,"[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ..."
2,30917,47,5.0,789652009,10,1995,"[0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ..."
3,30917,21,3.0,789652009,6,1995,"[0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, ..."
4,38835,2,4.0,822873600,8,1995,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, ..."
...,...,...,...,...,...,...,...
25600158,52251,46974,2.0,1538551214,4,2006,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ..."
25600159,52251,61465,2.0,1538551232,4,2008,"[0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, ..."
25600160,47557,3300,4.0,1538551239,8,2000,"[0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, ..."
25600161,52251,2469,3.0,1538551295,6,1986,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, ..."


In [148]:
len(genres_set)

20

In [149]:
len(df['genres_mh'][0])

20

In [None]:
torch.randint(0, 99, (10,))

tensor([53])

In [87]:
a = torch.randint(0, 99, (5,))
b = a.reshape(5, 1).repeat(1, 2)

In [88]:
a

tensor([65, 59, 57, 84, 14])

In [89]:
b

tensor([[65, 65],
        [59, 59],
        [57, 57],
        [84, 84],
        [14, 14]])

In [295]:
df['rating_class'].unique()

array([ 8,  6, 10,  2,  4,  7,  9,  3,  1,  5])

In [329]:
import random

prev_seq_len = 10

def get_movies_data(df:pd.DataFrame):
    df2 = df.groupby(by=["userId"]).agg(list).reset_index()

    user_ids, movie_ids, genres, years, ratings = [], [], [], [], []
    prev_movie_ids = []
    prev_movie_genres = []
    prev_movie_years = []
    prev_movie_ratings = []

    for i in range(df2.shape[0]):
        movie_ids_seq = df2.loc[i, 'movieId']
        user_id = df2.loc[i, 'userId']
        genres_seq = df2.loc[i, 'genres_mh']
        ratings_seq = df2.loc[i, 'rating']
        years_seq = df2.loc[i, 'movie_year']

        m = len(movie_ids_seq)-prev_seq_len
        if m > 0:
            indices = random.sample(range(prev_seq_len, len(movie_ids_seq)), k=min(m, 20))

            for j in indices:
                rated_movie_ids = movie_ids_seq[max(0, j-prev_seq_len):j]
                rated_movie_genres = genres_seq[max(0, j-prev_seq_len):j]
                rated_movie_years = years_seq[max(0, j-prev_seq_len):j]
                rated_movie_ratings = ratings_seq[max(0, j-prev_seq_len):j]

                user_ids += [user_id]
                movie_ids += [movie_ids_seq[j]]
                genres += [genres_seq[j]]
                years += [years_seq[j]]
                ratings += [ratings_seq[j]]

                prev_movie_ids += [rated_movie_ids]
                prev_movie_genres += [rated_movie_genres]
                prev_movie_years += [rated_movie_years]
                prev_movie_ratings += [rated_movie_ratings]
    
    user_ids = torch.tensor(user_ids, dtype=torch.int32)
    movie_ids = torch.tensor(movie_ids, dtype=torch.int32)
    genres = torch.tensor(genres, dtype=torch.int8)
    years = torch.tensor(years, dtype=torch.int32)
    ratings = torch.tensor(ratings, dtype=torch.float32)

    prev_movie_ids = torch.tensor(prev_movie_ids, dtype=torch.int32)
    prev_movie_genres = torch.tensor(prev_movie_genres, dtype=torch.int8)
    prev_movie_years = torch.tensor(prev_movie_years, dtype=torch.int32)
    prev_movie_ratings = torch.tensor(prev_movie_ratings, dtype=torch.float32)

    return user_ids, movie_ids, genres, years, ratings, prev_movie_ids, prev_movie_genres, prev_movie_years, prev_movie_ratings

In [330]:
user_id_vocab_size = int(df["userId"].max()+1)
movie_id_vocab_size = int(df["movieId"].max()+1)
genres_vocab_size = len(genres_set)
years_vocab_size = int(df["movie_year"].max()+1)

In [331]:
user_ids_train, movie_ids_train, genres_train, years_train, ratings_train, prev_movie_ids_train, prev_movie_genres_train, prev_movie_years_train, prev_movie_ratings_train = get_movies_data(df_train)

In [332]:
user_ids_test, movie_ids_test, genres_test, years_test, ratings_test, prev_movie_ids_test, prev_movie_genres_test, prev_movie_years_test, prev_movie_ratings_test = get_movies_data(df_test)

In [354]:
embedding_size = 256
movie_seq_len = 10
num_encoder_layers = 4
num_heads = 4
dropout = 0.0
dff = 32
batch_size = 128

rec = RecommenderSystem(user_id_vocab_size, movie_id_vocab_size, genres_vocab_size, years_vocab_size, embedding_size, movie_seq_len, num_encoder_layers, num_heads, dff, dropout).to(device=device)

In [355]:
n_epochs = 10    # number of epochs to run
batch_size = 128  # size of each batch
batches_per_epoch = user_ids_train.shape[0] // batch_size

criterion = nn.MSELoss()
optimizer = optim.Adam(rec.parameters(), lr=0.001)
lr_scheduler = CosineWarmupScheduler(optimizer, warmup=50, max_iters=batches_per_epoch*n_epochs)

rec.train()

for epoch in range(n_epochs):
    indices = torch.randperm(user_ids_train.shape[0])

    for i in range(batches_per_epoch):
        optimizer.zero_grad()
        start = i * batch_size
        batch_indices = indices[start:start+batch_size]

        user_ids_batch = user_ids_train[batch_indices].unsqueeze(1).to(device=device)
        movie_ids_batch = movie_ids_train[batch_indices].unsqueeze(1).to(device=device)
        genres_batch = genres_train[batch_indices].unsqueeze(1).to(dtype=torch.float32).to(device=device)
        years_batch = years_train[batch_indices].unsqueeze(1).to(device=device)
        ratings_batch = ratings_train[batch_indices].unsqueeze(1).to(device=device)

        prev_movie_ids_batch = prev_movie_ids_train[batch_indices].to(device=device)
        prev_movie_genres_batch = prev_movie_genres_train[batch_indices].to(dtype=torch.float32).to(device=device)
        prev_movie_ids_years = prev_movie_years_train[batch_indices].to(device=device)
        prev_movie_ids_ratings = prev_movie_ratings_train[batch_indices].to(device=device)

        output:torch.Tensor = \
            rec(
                user_ids_batch, 
                movie_ids_batch, 
                prev_movie_ids_batch, 
                prev_movie_genres_batch, 
                prev_movie_ids_years, 
                prev_movie_ids_ratings, 
                genres_batch, 
                years_batch
            )
        
        loss:torch.Tensor = criterion(output.contiguous(), ratings_batch.contiguous())

        loss.backward()
        optimizer.step()
        lr_scheduler.step()
        
        print(f"Epoch: {epoch+1}, Batch: {i+1}, Loss: {loss.item()}")

Epoch: 1, Batch: 1, Loss: 11.454397201538086
Epoch: 1, Batch: 2, Loss: 10.663384437561035
Epoch: 1, Batch: 3, Loss: 11.048120498657227
Epoch: 1, Batch: 4, Loss: 9.489564895629883
Epoch: 1, Batch: 5, Loss: 7.159118175506592
Epoch: 1, Batch: 6, Loss: 4.362736701965332
Epoch: 1, Batch: 7, Loss: 3.5247654914855957
Epoch: 1, Batch: 8, Loss: 1.7789807319641113
Epoch: 1, Batch: 9, Loss: 1.952982783317566
Epoch: 1, Batch: 10, Loss: 1.942875623703003
Epoch: 1, Batch: 11, Loss: 1.651390790939331
Epoch: 1, Batch: 12, Loss: 2.44199538230896
Epoch: 1, Batch: 13, Loss: 2.6236963272094727
Epoch: 1, Batch: 14, Loss: 1.9092648029327393
Epoch: 1, Batch: 15, Loss: 1.320547342300415
Epoch: 1, Batch: 16, Loss: 1.6867759227752686
Epoch: 1, Batch: 17, Loss: 1.3643767833709717
Epoch: 1, Batch: 18, Loss: 1.743270754814148
Epoch: 1, Batch: 19, Loss: 1.6397294998168945
Epoch: 1, Batch: 20, Loss: 1.7455768585205078
Epoch: 1, Batch: 21, Loss: 1.5866683721542358
Epoch: 1, Batch: 22, Loss: 1.5606889724731445
Epoch: 

KeyboardInterrupt: 

In [320]:
df['rating_class'].unique()

array([ 8,  6, 10,  2,  4,  7,  9,  3,  1,  5])

In [306]:
ratings_batch.shape

torch.Size([128, 1])

In [360]:
rec.eval()

batch_size = 10000
res = torch.empty(user_ids_test.shape[0], 1).to(device=device)

for i in range(user_ids_test.shape[0]):
    print(i)
    batch_indices = list(range(i, i+batch_size))

    user_ids_batch = user_ids_test[batch_indices].unsqueeze(1).to(device=device)
    movie_ids_batch = movie_ids_test[batch_indices].unsqueeze(1).to(device=device)
    genres_batch = genres_test[batch_indices].unsqueeze(1).to(dtype=torch.float32).to(device=device)
    years_batch = years_test[batch_indices].unsqueeze(1).to(device=device)
    ratings_batch = ratings_test[batch_indices].unsqueeze(1).to(device=device)

    prev_movie_ids_batch = prev_movie_ids_test[batch_indices].to(device=device)
    prev_movie_genres_batch = prev_movie_genres_test[batch_indices].to(dtype=torch.float32).to(device=device)
    prev_movie_ids_years = prev_movie_years_test[batch_indices].to(device=device)
    prev_movie_ids_ratings = prev_movie_ratings_test[batch_indices].to(device=device)

    output:torch.Tensor = \
        rec(
            user_ids_batch, 
            movie_ids_batch, 
            prev_movie_ids_batch, 
            prev_movie_genres_batch, 
            prev_movie_ids_years, 
            prev_movie_ids_ratings, 
            genres_batch, 
            years_batch
        )
    
    res[batch_indices] = output

0
1
2
3
4
5
6
7
8


: 

In [358]:
output.shape

torch.Size([20, 1])

In [357]:
print(ratings_batch)

tensor([[5.0000],
        [4.5000],
        [4.0000],
        [4.5000],
        [4.5000],
        [4.5000],
        [4.0000],
        [5.0000],
        [5.0000],
        [4.0000],
        [4.5000],
        [4.0000],
        [4.0000],
        [3.5000],
        [4.5000],
        [4.5000],
        [5.0000],
        [3.5000],
        [5.0000],
        [4.5000]], device='mps:0')


In [None]:
def get_before_end_token(seq, end_token):
    out = []
    for i in range(len(seq)):
        h = []
        for j in range(len(seq[i])):
            if seq[i][j] != end_token:
                h += [seq[i][j]]
            else:
                break
        out += [h]
    return out

def check_lis(pred, actual):
    if len(pred) != len(actual):
        return False
    for i in range(1, len(pred)):
        if pred[i] <= pred[i-1]:
            return False
    return True

def predict(model:nn.Module, n=100):
    model.eval()
    with torch.no_grad():
        preds:torch.Tensor = model(data_src_test[:n,:].to(device=device), data_tgt_test[:n, :-1].to(device=device))
        preds = preds.argmax(dim=-1)
        return preds
    
def evaluate(model:nn.Module, n=100):
    preds:torch.Tensor = predict(model, n)
    preds = preds.tolist()
    actuals = data_tgt_test[:n, 1:].tolist()
    preds = get_before_end_token(preds, tgt_vocab_size-1)
    actuals = get_before_end_token(actuals, tgt_vocab_size-1)
    s = 0
    u = 0
    for i in range(len(preds)):
        s += 1 if check_lis(preds[i], actuals[i]) else 0
        u += 1
    return s/u

In [None]:
df_train = df.groupby(by=["userId"]).agg(list).reset_index()

In [None]:
3900

In [32]:
df2[df2.movieId.apply(len) > 100]

Unnamed: 0,userId,movieId,rating,timestamp
0,1,"[2997, 2966, 2890, 3078, 2882, 541, 838, 1136,...","[4.0, 1.0, 4.0, 2.0, 1.0, 5.0, 5.0, 1.0, 5.0, ...","[943226846, 943226846, 943226916, 943226986, 9..."
2,3,"[2012, 466, 2268, 168, 1544, 4306, 1485, 2617,...","[3.0, 1.0, 4.0, 3.5, 4.0, 3.5, 4.0, 4.0, 3.5, ...","[1084484354, 1084484362, 1084484382, 108448438..."
9,10,"[48, 2006, 1954, 1909, 1690, 5218, 858, 733, 4...","[3.5, 2.0, 3.5, 2.5, 2.0, 4.0, 5.0, 4.5, 3.5, ...","[1169260535, 1169260570, 1169260574, 116926059..."
15,16,"[1196, 2571, 7153, 2353, 3994, 2006, 1198, 499...","[0.5, 2.0, 4.0, 4.5, 3.5, 1.0, 3.5, 4.0, 4.0, ...","[1517020327, 1517020360, 1517020362, 151702040..."
17,18,"[3252, 1894, 2467, 3159, 4823, 4681, 64839, 48...","[4.0, 0.5, 4.0, 4.5, 1.0, 4.0, 3.5, 3.0, 2.5, ...","[1251917373, 1251917516, 1251917545, 125191760..."
...,...,...,...,...
200939,200940,"[2020, 2915, 2064, 830, 637, 743, 2116, 3704, ...","[4.0, 3.5, 3.5, 2.5, 2.0, 2.0, 0.5, 1.0, 4.5, ...","[1194106282, 1194106296, 1194106299, 119410632..."
200942,200943,"[1957, 4321, 2478, 2686, 1779, 2046, 2528, 309...","[3.0, 2.0, 1.0, 4.5, 2.0, 1.5, 2.0, 2.5, 2.0, ...","[1225217623, 1225217626, 1225217651, 122521766..."
200943,200944,"[260, 1196, 318, 2571, 1291, 7153, 1210, 13413...","[4.0, 3.5, 5.0, 5.0, 3.5, 5.0, 4.0, 5.0, 3.0, ...","[1454247309, 1454247312, 1454247318, 145424732..."
200944,200945,"[318, 8874, 2762, 92259, 79132, 593, 1246, 168...","[5.0, 2.5, 4.0, 5.0, 5.0, 4.0, 3.5, 4.0, 4.0, ...","[1517070023, 1517070056, 1517070090, 151707009..."


In [22]:
2**32

4294967296

In [25]:
user_ids = torch.tensor(df2['userId'].tolist(), dtype=torch.uint32).unsqueeze(1)

In [40]:
len(df2.loc[1, 'movieId'][-100:])

52

In [47]:
movie_ids_src, movie_ids_tgt = [], []
for i in range(df2.shape[0]):
    h = df2.loc[i, 'movieId'][-100:]
    h = h + [0]*(100-len(h))
    movie_ids_src += [h[:50]]
    movie_ids_tgt += [h[50:]]
    
movie_ids_src = torch.tensor(movie_ids_src, dtype=torch.uint32)
movie_ids_tgt = torch.tensor(movie_ids_tgt, dtype=torch.uint32)

In [48]:
ts_src, ts_tgt = [], []
for i in range(df2.shape[0]):
    h = df2.loc[i, 'timestamp'][-100:]
    for j in range(len(h)-1, 0, -1):
        h[j] = h[j]-h[j-1]+1
    h[0] = 1
    h = h + [0]*(100-len(h))
    ts_src += [h[:50]]
    ts_tgt += [h[50:]]
    
ts_src = torch.tensor(ts_src, dtype=torch.uint64)
ts_tgt = torch.tensor(ts_tgt, dtype=torch.uint64)

In [None]:
n = df2.shape[0]
m = int(0.8*n)

user_ids

tensor([  1,   2,   1,  36,   1,   1,   1,  30,   1,   1,  19,  25,   1,   1,
        122,   1,  17,   1,   1,  25,  44,   1,  91,   1,  23,  54, 194,   1,
          1,  17,   1,   1,  17,   1,  20,   1,  15,  25,  12,  15,   1,  15,
          1,  13,  18,   1,  53,   1,  13,  19,  17, 117,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,   0,
          0,   0], dtype=torch.uint64)

In [None]:
class UserNetwork(nn.Module):
    def __init__(self, user_vocab_size, user_d_model, user_ffd, dropout=0.0) -> None:
        super(Transformer, self).__init__()

        self.user_embedding = nn.Embedding(user_vocab_size, user_d_model)

        self.ffn_1 = nn.Linear(user_d_model, user_ffd)
        self.ffn_2 = nn.Linear(user_ffd, user_d_model)

        init_weights(self.ffn_1)
        init_weights(self.ffn_2)

        self.ffn = \
            nn.Sequential(
                self.ffn_1,
                nn.Dropout(dropout),
                nn.ReLU(inplace=True),
                self.ffn_2
            ) 

    def forward(self, x:torch.Tensor):
        user_embed = self.user_embedding(x)
        return self.ffn(user_embed)

In [None]:
user_id, predict next sequence of movies to watch
user_id, predict next sequence of movies to watch (attention scores weighted by ratings)
user_id  predict rating for next movie based on watch history



In [None]:
longest increasing subsequence

In [48]:
def longest_increasing_subsequence(arr):
    f = [float("inf")]*len(arr) # f[i] - smallest value corresponding to last element for i+1 length increasing subsequence
    g = [0]*len(arr)

    max_p = 0
    for i in range(len(arr)):
        u = arr[i]
        left, right = 0, len(f)-1
        p = -1
        while left <= right:
            mid = int((left+right)/2)
            if f[mid] <= u:
                p = mid
                left = mid+1
            else:
                right = mid-1

        f[p+1] = min(f[p+1], u)
        max_p = max(max_p, p+2)
        g[i] = p+2

    out = []
    h = max_p
    for i in range(len(arr)-1, -1, -1):
        if g[i] == h and (len(out) == 0 or arr[i] <= out[-1]):
            out += [arr[i]]
            h -= 1

    return out[::-1]

In [49]:
import random
h = random.sample(range(1, 1000), k=100)
print(h)

[501, 925, 159, 179, 292, 610, 643, 234, 553, 793, 742, 132, 842, 250, 770, 348, 758, 624, 236, 650, 435, 815, 790, 558, 51, 823, 923, 35, 153, 708, 178, 623, 930, 756, 182, 666, 781, 693, 652, 773, 894, 996, 319, 711, 822, 133, 748, 575, 354, 998, 45, 792, 49, 492, 52, 131, 297, 549, 255, 601, 215, 352, 785, 281, 266, 999, 599, 715, 527, 683, 72, 130, 302, 224, 543, 873, 759, 936, 197, 468, 430, 926, 96, 37, 412, 136, 315, 256, 583, 422, 491, 434, 723, 617, 864, 476, 544, 698, 561, 363]


In [50]:
longest_increasing_subsequence(h)

[159, 179, 234, 250, 348, 435, 558, 623, 666, 693, 711, 748, 785, 873, 926]

In [None]:
df_train = pd.read_csv("datasets/avazu-ctr-prediction/train.gz", compression='gzip')
df_test = pd.read_csv("datasets/avazu-ctr-prediction/test.gz", compression='gzip')

In [141]:
df

Unnamed: 0,id,click,hour,C1,banner_pos,site_id,site_domain,site_category,app_id,app_domain,...,device_type,device_conn_type,C14,C15,C16,C17,C18,C19,C20,C21
0,1.000009e+18,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,...,1,2,15706,320,50,1722,0,35,-1,79
1,1.000017e+19,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,...,1,0,15704,320,50,1722,0,35,100084,79
2,1.000037e+19,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,...,1,0,15704,320,50,1722,0,35,100084,79
3,1.000064e+19,0,14102100,1005,0,1fbe01fe,f3845767,28905ebd,ecad2386,7801e8d9,...,1,0,15706,320,50,1722,0,35,100084,79
4,1.000068e+19,0,14102100,1005,1,fe8cc448,9166c161,0569f928,ecad2386,7801e8d9,...,1,0,18993,320,50,2161,0,35,-1,157
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
40428962,9.998753e+18,1,14103023,1005,1,e151e245,7e091613,f028772b,ecad2386,7801e8d9,...,1,0,17262,320,50,1872,3,39,100173,23
40428963,9.999038e+18,0,14103023,1005,0,85f751fd,c4e18dd6,50e219e0,9c13b419,2347f47a,...,1,2,23160,320,50,2667,0,47,-1,221
40428964,9.999585e+18,0,14103023,1005,1,f61eaaae,6b59f079,f028772b,ecad2386,7801e8d9,...,1,0,20969,320,50,2372,0,813,-1,46
40428965,9.999636e+18,1,14103023,1005,0,85f751fd,c4e18dd6,50e219e0,3c4b944d,2347f47a,...,1,0,16859,320,50,1887,3,39,100194,23


In [145]:
len(df.id.unique())

40428967

In [143]:
df.dtypes

id                  float64
click                 int64
hour                  int64
C1                    int64
banner_pos            int64
site_id              object
site_domain          object
site_category        object
app_id               object
app_domain           object
app_category         object
device_id            object
device_ip            object
device_model         object
device_type           int64
device_conn_type      int64
C14                   int64
C15                   int64
C16                   int64
C17                   int64
C18                   int64
C19                   int64
C20                   int64
C21                   int64
dtype: object

In [None]:
user_id, movie_id, rating

user_id, past N movies ratings weighted embedding (genre, year)

movie_id, genre, year

output - rating