In [1]:
import pandas as pd
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
rating_df = pd.read_csv("data/ml-25m/ratings.csv")
rating_df.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,296,5.0,1147880044
1,1,306,3.5,1147868817
2,1,307,5.0,1147868828
3,1,665,5.0,1147878820
4,1,899,3.5,1147868510


In [3]:
movies_df = pd.read_csv("data/ml-25m/movies.csv")

In [4]:
movies_df.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [5]:
no_items = movies_df['movieId'].max()

In [6]:
rating_df.shape

(25000095, 4)

In [7]:
no_users = rating_df['userId'].max()

In [8]:
# sort dataset by id and timestamp
rating_df = rating_df.sort_values(['userId', 'timestamp'])

In [9]:
# split dataset in train and eval set

In [10]:
def func1(df):
    n_rows = df.shape[0]
    df['percentile'] = np.linspace(0, 100, n_rows)
    return df

rating_df = rating_df.groupby('userId').apply(func1).reset_index(drop=True)

In [11]:
train_size = 80 # percent
train_data = rating_df[rating_df['percentile'] <= train_size]
test_data = rating_df[rating_df['percentile'] > train_size]

In [12]:
train_data.shape, test_data.shape

((19968907, 5), (5031188, 5))

In [13]:
del train_data['percentile'], test_data['percentile']

In [14]:
train_data.head()

Unnamed: 0,userId,movieId,rating,timestamp
0,1,5952,4.0,1147868053
1,1,2012,2.5,1147868068
2,1,2011,2.5,1147868079
3,1,1653,4.0,1147868097
4,1,1250,4.0,1147868414


In [15]:
train_data['rating'].value_counts()

rating
4.0    5346555
3.0    3892905
5.0    3006138
3.5    2476949
4.5    1743999
2.0    1297363
2.5     976141
1.0     611705
1.5     310786
0.5     306366
Name: count, dtype: int64

In [16]:
# assuming rating more than 3 means user like the movie else he/she didn't
rating_threshold = 3
from data import Data
from tqdm import tqdm

In [17]:
trainds = Data(train_data)
testds = Data(test_data)

In [18]:
batch_size = 2**14
n_workers = 4

traindl = torch.utils.data.DataLoader(trainds, batch_size=batch_size, shuffle=True, num_workers=1)

testdl = torch.utils.data.DataLoader(testds, batch_size=batch_size, shuffle=True, num_workers=1)
print(batch_size)

16384


In [19]:
item_feature_name = "movieId"
user_feature_name = "userId"

class ItemTower(nn.Module):
    def __init__(self, config):
        super().__init__()
        self.features = config['item']['features']
        if item_feature_name in self.features:
            self.item_module = nn.Embedding(config['item']['n_items'], config['item']['emb'])
        else:
            self.item_module = nn.Identity()
        self.mlp = nn.Sequential(
            nn.Linear(config['item']['emb'], config['item']['emb']),
            nn.GELU(),
            nn.LayerNorm(config['item']['emb'])
        )
        
    def forward(self, batch):
        item_emb = self.item_module(batch[item_feature_name].long())
        return self.mlp(item_emb)


class UserTower(nn.Module):
    def __init__(self,config):
        super().__init__()
        self.features = config['user']['features']
        if user_feature_name in self.features:
            self.user_module = nn.Embedding(config['user']['n_users'], config['user']['emb'])
        else:
            self.item_module = nn.Identity()
        self.mlp = nn.Sequential(
            nn.Linear(config['user']['emb'], config['user']['emb']),
            nn.GELU(),
            nn.LayerNorm(config['user']['emb'])
        )
        
    def forward(self, batch):
        user_emb = self.user_module(batch[user_feature_name].long())
        return self.mlp(user_emb)

class CombinerTower(nn.Module):
    def __init__(self,config):
        super().__init__()
        self.item_tower = ItemTower(config)
        self.user_tower = UserTower(config)
        user_dim, item_dim = config['user']['emb'], config['item']['emb']
        self.mlp = nn.Sequential(
            nn.Linear(user_dim+item_dim, config['emb']),
            nn.GELU(),
            nn.LayerNorm(config['emb'])
        )
        self.tasks = nn.ModuleDict({
            task_name: nn.Linear(config['emb'], out) for task_name, out in config['tasks']
        })
        
    def forward(self, batch):
        item_emb = self.item_tower(batch)
        user_emb = self.user_tower(batch)
        x = torch.concat([user_emb, item_emb], axis=1)
        x = self.mlp(x)
        res = {}
        for task_name, mod in self.tasks.items():
            res[task_name] = mod(x)
        return res

config = {
    'user':{
        'features':[
            'userId'
        ],
        'n_users':no_users + 2**10,
        'emb': 64
    },
    'item':{
        'features':[
            'movieId'
        ],
        'n_items':no_items + 2**10,
        'emb': 64
    },
    'emb':64,
    'tasks':[
        ['rating', 1]
    ]
}

In [20]:
model = CombinerTower(config)

In [21]:
import torchmetrics
from torchmetrics.classification import average_precision, ROC, Recall, Accuracy

In [22]:
class Metric:
    def __init__(self, device=torch.device("cpu"), task="binary"):
        device = device
        self.counter = 0
        self._metric = {
            "mse": torchmetrics.functional.mean_squared_error,
            "r2_score": torchmetrics.functional.r2_score
            
        }
        self._accelerator = {
            "mse": 0,
            "r2_score": 0
        }
        
    def __call__(self, output, actual) -> dict:
        out = {}
        for metric_name, mod in self._metric.items():
            out[metric_name] = mod(output, actual)
            self._accelerator[metric_name] += out[metric_name]
        self.counter += 1
        return out
    
    def get(self):
        return {
            k:v/self.counter for k,v in self._accelerator.items()
        }

    def to(self, device):
        for i in self._metric:
            if hasattr(self._metric[i], "to"):
                self._metric[i].to(device)
        return self

In [23]:
metric = Metric()
output, actual = torch.randn(20, 1).view(-1), torch.randint(0, 2, size=(20, 1)).view(-1)
print(actual, output)
print(metric(output, actual))
metric.get()

tensor([1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1]) tensor([ 0.2168,  0.6964,  0.2561,  1.2451, -1.1166,  0.8830, -0.1210, -0.5632,
         0.7163,  0.7436,  1.4557,  0.9875,  1.1642, -0.6338,  1.2480,  0.9197,
        -0.0825,  0.5866, -1.5255, -0.3285])
{'mse': tensor(0.9646), 'r2_score': tensor(-2.8973)}


{'mse': tensor(0.9646), 'r2_score': tensor(-2.8973)}

In [27]:
class Trainer:
    def __init__(self,
                model: torch.nn.Module,
                train_dataloader: torch.utils.data.DataLoader,
                eval_dataloader: torch.utils.data.DataLoader = None,
                loss_fn = torch.nn.functional.mse_loss,
                epochs: int = 5,
                optimizer_clz = torch.optim.SGD,
                optim_params: dict = {'lr':1e-2},
                device: torch.device = torch.device("cpu"),
                metric_collection = None,
                verbose = 1,
                log_step = 500,
                *args, **kwargs
                ):
        self.log_step = log_step
        self.verbose = verbose
        self.model = model.to(device)
        self.train_dl = train_dataloader
        self.eval_dl = eval_dataloader
        self.loss_fn = loss_fn
        self.optimizer = optimizer_clz(self.model.parameters(), **optim_params)
        self.epochs = epochs
        self.device = device
        self.metric_collection = metric_collection.to(device) if metric_collection else None

    def compute_loss(self, batch, output):
        loss = {}
        for k in output:
            if k in batch:
                loss[k] = self.loss_fn(output[k].view(-1).float(), batch[k].view(-1).float())
        return loss

    def compute_metric(self, batch, output, show_tensor=False):
        res = {}
        for k in output:
            if k in batch:
                y = (batch[k] >= rating_threshold).long()
                if show_tensor:
                    print(batch[k].sum(), batch[k].shape[0])
                res[k] = self.metric_collection(output[k].view(-1), y.view(-1))
        return res
    
    def set_output(self, _iter, loss, avg_loss, train=True):
        _iter.set_description(
            "{}: step loss: {:.3f}, avg_loss: {:.3f}".format(
                "Train" if train else "Eval",
                loss,
                avg_loss
            )
        )
    def train_epoch(self):
        self.model = self.model.train()
        
        _iter = tqdm(self.train_dl)
        n_batches = len(self.train_dl)
        total_loss = 0
        for _i, batch in enumerate(_iter):
            self.optimizer.zero_grad()
            output = self.model(batch)
            loss = self.compute_loss(batch, output)
            # multiple task we need better way to gives weights, ryt now it is uniform
            loss = sum([loss[l] for l in loss])/(len(loss) + 1e-6)
            
            loss.backward()
            self.optimizer.step()
            total_loss += loss.cpu().item()
            
            self.set_output(_iter, loss.item(), total_loss/(_i+1), train=True)
            show_tensor = False
            if self.verbose > 0 \
                and _i % self.log_step == 0 and _i != 0:
                print(self.metric_collection.get())
                show_tensor = True
            with torch.no_grad():
                self.compute_metric(batch, output, show_tensor)
        return {
            'loss': total_loss/(n_batches),
            'mode': 'Train'
        }
    
    @torch.no_grad()
    def eval_epoch(self):
        self.model = self.model.eval()
        
        _iter = tqdm(self.eval_dl)
        n_batches = len(self.eval_dl)
        total_loss = 0
        for _i, batch in enumerate(_iter):
            output = self.model(batch)
            loss = self.compute_loss(batch, output)
            # multiple task we need better way to gives weights, ryt now it is uniform
            loss = sum([loss[l] for l in loss])/(len(loss) + 1e-6)
            
            total_loss += loss.cpu().item()
            self.set_output(_iter, loss.item(), total_loss/(_i+1), train=False)
            
            show_tensor = False
            if self.verbose > 0 \
                and _i % self.log_step == 0 and _i != 0:
                print(self.metric_collection.get())
                show_tensor = True
            self.compute_metric(batch, output, show_tensor=show_tensor)
                
        return {
            'loss': total_loss/(n_batches),
            'mode': 'Eval'
        }

    def fit(self):
        for epoch in range(1, self.epochs+1):
            print("EPOCH:", epoch)
            self.train_epoch()
            eval_output = self.eval_epoch()
            print(eval_output)

In [28]:
trainer = Trainer(
    model=model, train_dataloader=traindl, eval_dataloader=testdl, 
    loss_fn=F.mse_loss,
    metric_collection = Metric(),
    log_step = 100
)

In [29]:
trainer.fit()

EPOCH: 1


Train: step loss: 1.196, avg_loss: 1.745:   8%|▊         | 101/1219 [01:57<20:24,  1.10s/it]  

{'mse': tensor(7.0783), 'r2_score': tensor(-47.8666)}
tensor(58193., dtype=torch.float64) 16384


Train: step loss: 1.166, avg_loss: 1.464:  16%|█▋        | 201/1219 [03:48<18:46,  1.11s/it]

{'mse': tensor(7.3138), 'r2_score': tensor(-49.5219)}
tensor(58097., dtype=torch.float64) 16384


Train: step loss: 1.130, avg_loss: 1.361:  25%|██▍       | 301/1219 [05:39<17:12,  1.12s/it]

{'mse': tensor(7.3951), 'r2_score': tensor(-50.0997)}
tensor(58301.5000, dtype=torch.float64) 16384


Train: step loss: 1.115, avg_loss: 1.305:  33%|███▎      | 401/1219 [07:30<15:04,  1.11s/it]

{'mse': tensor(7.4394), 'r2_score': tensor(-50.4454)}
tensor(58160., dtype=torch.float64) 16384


Train: step loss: 1.149, avg_loss: 1.270:  41%|████      | 501/1219 [09:21<13:19,  1.11s/it]

{'mse': tensor(7.4647), 'r2_score': tensor(-50.6014)}
tensor(58035.5000, dtype=torch.float64) 16384


Train: step loss: 1.124, avg_loss: 1.245:  49%|████▉     | 601/1219 [11:12<11:36,  1.13s/it]

{'mse': tensor(7.4824), 'r2_score': tensor(-50.7331)}
tensor(58401.5000, dtype=torch.float64) 16384


Train: step loss: 1.103, avg_loss: 1.227:  58%|█████▊    | 701/1219 [13:03<09:31,  1.10s/it]

{'mse': tensor(7.4957), 'r2_score': tensor(-50.8221)}
tensor(58018.5000, dtype=torch.float64) 16384


Train: step loss: 1.120, avg_loss: 1.212:  66%|██████▌   | 801/1219 [14:54<07:42,  1.11s/it]

{'mse': tensor(7.5053), 'r2_score': tensor(-50.8946)}
tensor(58080.5000, dtype=torch.float64) 16384


Train: step loss: 1.095, avg_loss: 1.200:  74%|███████▍  | 901/1219 [16:45<05:54,  1.11s/it]

{'mse': tensor(7.5139), 'r2_score': tensor(-50.9561)}
tensor(58217.5000, dtype=torch.float64) 16384


Train: step loss: 1.099, avg_loss: 1.190:  82%|████████▏ | 1001/1219 [18:36<04:03,  1.12s/it]

{'mse': tensor(7.5211), 'r2_score': tensor(-51.0101)}
tensor(58089., dtype=torch.float64) 16384


Train: step loss: 1.082, avg_loss: 1.181:  90%|█████████ | 1101/1219 [20:28<02:12,  1.12s/it]

{'mse': tensor(7.5267), 'r2_score': tensor(-51.0456)}
tensor(58118., dtype=torch.float64) 16384


Train: step loss: 1.077, avg_loss: 1.174:  99%|█████████▊| 1201/1219 [22:22<00:20,  1.15s/it]

{'mse': tensor(7.5317), 'r2_score': tensor(-51.0836)}
tensor(58250.5000, dtype=torch.float64) 16384


Train: step loss: 1.095, avg_loss: 1.172: 100%|██████████| 1219/1219 [22:49<00:00,  1.12s/it]
Eval: step loss: 1.087, avg_loss: 1.107:  33%|███▎      | 101/308 [01:58<04:02,  1.17s/it]

{'mse': tensor(7.5364), 'r2_score': tensor(-50.8364)}
tensor(56874.5000, dtype=torch.float64) 16384


Eval: step loss: 1.106, avg_loss: 1.107:  65%|██████▌   | 201/308 [03:59<01:56,  1.09s/it]

{'mse': tensor(7.5397), 'r2_score': tensor(-50.6178)}
tensor(56958., dtype=torch.float64) 16384


Eval: step loss: 1.118, avg_loss: 1.108:  98%|█████████▊| 301/308 [06:01<00:08,  1.24s/it]

{'mse': tensor(7.5429), 'r2_score': tensor(-50.4252)}
tensor(56822., dtype=torch.float64) 16384


Eval: step loss: 1.156, avg_loss: 1.108: 100%|██████████| 308/308 [06:13<00:00,  1.21s/it]


{'loss': 1.1081227544066194, 'mode': 'Eval'}
EPOCH: 2


Train: step loss: 1.085, avg_loss: 1.088:   8%|▊         | 101/1219 [02:17<22:41,  1.22s/it]

{'mse': tensor(7.5457), 'r2_score': tensor(-50.4717)}
tensor(58209.5000, dtype=torch.float64) 16384


Train: step loss: 1.070, avg_loss: 1.087:  16%|█▋        | 201/1219 [04:25<21:11,  1.25s/it]

{'mse': tensor(7.5479), 'r2_score': tensor(-50.5253)}
tensor(58238.5000, dtype=torch.float64) 16384


Train: step loss: 1.067, avg_loss: 1.085:  25%|██▍       | 301/1219 [06:32<19:10,  1.25s/it]

{'mse': tensor(7.5505), 'r2_score': tensor(-50.5843)}
tensor(58388.5000, dtype=torch.float64) 16384


Train: step loss: 1.072, avg_loss: 1.083:  33%|███▎      | 401/1219 [08:37<17:24,  1.28s/it]

{'mse': tensor(7.5528), 'r2_score': tensor(-50.6373)}
tensor(58010.5000, dtype=torch.float64) 16384


Train: step loss: 1.108, avg_loss: 1.081:  41%|████      | 501/1219 [10:39<13:25,  1.12s/it]

{'mse': tensor(7.5547), 'r2_score': tensor(-50.6749)}
tensor(57747.5000, dtype=torch.float64) 16384


Train: step loss: 1.060, avg_loss: 1.080:  49%|████▉     | 601/1219 [14:28<24:24,  2.37s/it]

{'mse': tensor(7.5567), 'r2_score': tensor(-50.7140)}
tensor(58230.5000, dtype=torch.float64) 16384


Train: step loss: 1.049, avg_loss: 1.079:  58%|█████▊    | 701/1219 [18:19<19:56,  2.31s/it]

{'mse': tensor(7.5585), 'r2_score': tensor(-50.7495)}
tensor(58039.5000, dtype=torch.float64) 16384


Train: step loss: 1.061, avg_loss: 1.077:  66%|██████▌   | 801/1219 [20:11<07:42,  1.11s/it]

{'mse': tensor(7.5600), 'r2_score': tensor(-50.7831)}
tensor(58312., dtype=torch.float64) 16384


Train: step loss: 1.064, avg_loss: 1.076:  74%|███████▍  | 901/1219 [22:01<05:51,  1.11s/it]

{'mse': tensor(7.5615), 'r2_score': tensor(-50.8139)}
tensor(57938.5000, dtype=torch.float64) 16384


Train: step loss: 1.057, avg_loss: 1.075:  82%|████████▏ | 1001/1219 [23:50<03:59,  1.10s/it]

{'mse': tensor(7.5630), 'r2_score': tensor(-50.8413)}
tensor(58183., dtype=torch.float64) 16384


Train: step loss: 1.076, avg_loss: 1.074:  90%|█████████ | 1101/1219 [25:47<02:11,  1.12s/it]

{'mse': tensor(7.5646), 'r2_score': tensor(-50.8709)}
tensor(57838.5000, dtype=torch.float64) 16384


Train: step loss: 1.080, avg_loss: 1.073:  99%|█████████▊| 1201/1219 [27:49<00:21,  1.22s/it]

{'mse': tensor(7.5661), 'r2_score': tensor(-50.9014)}
tensor(57956.5000, dtype=torch.float64) 16384


Train: step loss: 1.039, avg_loss: 1.072: 100%|██████████| 1219/1219 [28:16<00:00,  1.39s/it]
Eval: step loss: 1.065, avg_loss: 1.082:  33%|███▎      | 101/308 [02:10<04:31,  1.31s/it]

{'mse': tensor(7.5653), 'r2_score': tensor(-50.7823)}
tensor(56884.5000, dtype=torch.float64) 16384


Eval: step loss: 1.075, avg_loss: 1.082:  65%|██████▌   | 201/308 [04:20<02:19,  1.30s/it]

{'mse': tensor(7.5644), 'r2_score': tensor(-50.6667)}
tensor(57075., dtype=torch.float64) 16384


Eval: step loss: 1.093, avg_loss: 1.082:  98%|█████████▊| 301/308 [06:39<00:09,  1.38s/it]

{'mse': tensor(7.5634), 'r2_score': tensor(-50.5623)}
tensor(56647.5000, dtype=torch.float64) 16384


Eval: step loss: 1.186, avg_loss: 1.082: 100%|██████████| 308/308 [06:52<00:00,  1.34s/it]


{'loss': 1.082247995323949, 'mode': 'Eval'}
EPOCH: 3


Train: step loss: 1.087, avg_loss: 1.056:   8%|▊         | 101/1219 [02:30<27:11,  1.46s/it]

{'mse': tensor(7.5647), 'r2_score': tensor(-50.5902)}
tensor(57935., dtype=torch.float64) 16384


Train: step loss: 1.049, avg_loss: 1.055:  16%|█▋        | 201/1219 [04:49<23:10,  1.37s/it]

{'mse': tensor(7.5660), 'r2_score': tensor(-50.6194)}
tensor(58223., dtype=torch.float64) 16384


Train: step loss: 1.061, avg_loss: 1.055:  25%|██▍       | 301/1219 [07:08<20:56,  1.37s/it]

{'mse': tensor(7.5671), 'r2_score': tensor(-50.6473)}
tensor(58056., dtype=torch.float64) 16384


Train: step loss: 1.042, avg_loss: 1.054:  33%|███▎      | 401/1219 [09:28<19:09,  1.40s/it]

{'mse': tensor(7.5682), 'r2_score': tensor(-50.6759)}
tensor(58189., dtype=torch.float64) 16384


Train: step loss: 1.043, avg_loss: 1.053:  41%|████      | 501/1219 [11:46<16:50,  1.41s/it]

{'mse': tensor(7.5693), 'r2_score': tensor(-50.7027)}
tensor(58203., dtype=torch.float64) 16384


Train: step loss: 1.041, avg_loss: 1.052:  49%|████▉     | 601/1219 [14:03<14:01,  1.36s/it]

{'mse': tensor(7.5703), 'r2_score': tensor(-50.7285)}
tensor(57725.5000, dtype=torch.float64) 16384


Train: step loss: 1.048, avg_loss: 1.052:  58%|█████▊    | 701/1219 [16:22<12:05,  1.40s/it]

{'mse': tensor(7.5713), 'r2_score': tensor(-50.7507)}
tensor(58327., dtype=torch.float64) 16384


Train: step loss: 1.052, avg_loss: 1.051:  66%|██████▌   | 801/1219 [18:41<09:07,  1.31s/it]

{'mse': tensor(7.5723), 'r2_score': tensor(-50.7757)}
tensor(58085.5000, dtype=torch.float64) 16384


Train: step loss: 1.063, avg_loss: 1.050:  74%|███████▍  | 901/1219 [20:53<07:40,  1.45s/it]

{'mse': tensor(7.5732), 'r2_score': tensor(-50.7934)}
tensor(58009., dtype=torch.float64) 16384


Train: step loss: 1.027, avg_loss: 1.050:  82%|████████▏ | 1001/1219 [23:06<04:44,  1.30s/it]

{'mse': tensor(7.5742), 'r2_score': tensor(-50.8140)}
tensor(57979., dtype=torch.float64) 16384


Train: step loss: 1.031, avg_loss: 1.049:  90%|█████████ | 1101/1219 [25:17<02:33,  1.30s/it]

{'mse': tensor(7.5751), 'r2_score': tensor(-50.8333)}
tensor(57851., dtype=torch.float64) 16384


Train: step loss: 1.045, avg_loss: 1.048:  99%|█████████▊| 1201/1219 [27:20<00:21,  1.22s/it]

{'mse': tensor(7.5760), 'r2_score': tensor(-50.8531)}
tensor(58244.5000, dtype=torch.float64) 16384


Train: step loss: 1.032, avg_loss: 1.048: 100%|██████████| 1219/1219 [27:46<00:00,  1.37s/it]
Eval: step loss: 1.067, avg_loss: 1.067:  33%|███▎      | 101/308 [01:52<03:51,  1.12s/it]

{'mse': tensor(7.5761), 'r2_score': tensor(-50.7844)}
tensor(56835.5000, dtype=torch.float64) 16384


Eval: step loss: 1.074, avg_loss: 1.067:  65%|██████▌   | 201/308 [03:40<02:01,  1.14s/it]

{'mse': tensor(7.5760), 'r2_score': tensor(-50.7130)}
tensor(56894.5000, dtype=torch.float64) 16384


Eval: step loss: 1.055, avg_loss: 1.066:  98%|█████████▊| 301/308 [05:26<00:07,  1.12s/it]

{'mse': tensor(7.5759), 'r2_score': tensor(-50.6461)}
tensor(57063., dtype=torch.float64) 16384


Eval: step loss: 1.029, avg_loss: 1.066: 100%|██████████| 308/308 [05:38<00:00,  1.10s/it]


{'loss': 1.0656904818175674, 'mode': 'Eval'}
EPOCH: 4


Train: step loss: 1.056, avg_loss: 1.038:   8%|▊         | 101/1219 [01:55<21:14,  1.14s/it]

{'mse': tensor(7.5767), 'r2_score': tensor(-50.6621)}
tensor(58061., dtype=torch.float64) 16384


Train: step loss: 1.011, avg_loss: 1.037:  16%|█▋        | 201/1219 [03:47<19:42,  1.16s/it]

{'mse': tensor(7.5777), 'r2_score': tensor(-50.6838)}
tensor(58283., dtype=torch.float64) 16384


Train: step loss: 1.057, avg_loss: 1.037:  25%|██▍       | 301/1219 [05:38<17:38,  1.15s/it]

{'mse': tensor(7.5784), 'r2_score': tensor(-50.7030)}
tensor(58000., dtype=torch.float64) 16384


Train: step loss: 1.036, avg_loss: 1.036:  33%|███▎      | 401/1219 [07:28<15:43,  1.15s/it]

{'mse': tensor(7.5792), 'r2_score': tensor(-50.7225)}
tensor(58130., dtype=torch.float64) 16384


Train: step loss: 1.022, avg_loss: 1.035:  41%|████      | 501/1219 [09:19<13:06,  1.10s/it]

{'mse': tensor(7.5799), 'r2_score': tensor(-50.7405)}
tensor(58085.5000, dtype=torch.float64) 16384


Train: step loss: 1.028, avg_loss: 1.035:  49%|████▉     | 601/1219 [11:09<10:51,  1.05s/it]

{'mse': tensor(7.5806), 'r2_score': tensor(-50.7575)}
tensor(58350., dtype=torch.float64) 16384


Train: step loss: 1.051, avg_loss: 1.034:  58%|█████▊    | 701/1219 [13:01<10:02,  1.16s/it]

{'mse': tensor(7.5813), 'r2_score': tensor(-50.7754)}
tensor(58086.5000, dtype=torch.float64) 16384


Train: step loss: 1.020, avg_loss: 1.034:  66%|██████▌   | 801/1219 [14:52<08:09,  1.17s/it]

{'mse': tensor(7.5820), 'r2_score': tensor(-50.7918)}
tensor(58189., dtype=torch.float64) 16384


Train: step loss: 1.052, avg_loss: 1.033:  74%|███████▍  | 901/1219 [16:43<05:59,  1.13s/it]

{'mse': tensor(7.5826), 'r2_score': tensor(-50.8091)}
tensor(58047., dtype=torch.float64) 16384


Train: step loss: 1.029, avg_loss: 1.033:  82%|████████▏ | 1001/1219 [18:34<04:11,  1.15s/it]

{'mse': tensor(7.5832), 'r2_score': tensor(-50.8245)}
tensor(58149., dtype=torch.float64) 16384


Train: step loss: 1.017, avg_loss: 1.032:  90%|█████████ | 1101/1219 [20:26<02:17,  1.17s/it]

{'mse': tensor(7.5838), 'r2_score': tensor(-50.8388)}
tensor(58235., dtype=torch.float64) 16384


Train: step loss: 1.012, avg_loss: 1.031:  99%|█████████▊| 1201/1219 [22:18<00:21,  1.18s/it]

{'mse': tensor(7.5845), 'r2_score': tensor(-50.8561)}
tensor(58249.5000, dtype=torch.float64) 16384


Train: step loss: 1.008, avg_loss: 1.031: 100%|██████████| 1219/1219 [22:43<00:00,  1.12s/it]
Eval: step loss: 1.035, avg_loss: 1.053:  33%|███▎      | 101/308 [01:50<03:36,  1.05s/it]

{'mse': tensor(7.5830), 'r2_score': tensor(-50.7957)}
tensor(56894.5000, dtype=torch.float64) 16384


Eval: step loss: 1.044, avg_loss: 1.051:  65%|██████▌   | 201/308 [03:38<01:51,  1.04s/it]

{'mse': tensor(7.5815), 'r2_score': tensor(-50.7342)}
tensor(56901.5000, dtype=torch.float64) 16384


Eval: step loss: 1.053, avg_loss: 1.052:  98%|█████████▊| 301/308 [05:26<00:07,  1.14s/it]

{'mse': tensor(7.5801), 'r2_score': tensor(-50.6755)}
tensor(56823., dtype=torch.float64) 16384


Eval: step loss: 1.008, avg_loss: 1.051: 100%|██████████| 308/308 [05:37<00:00,  1.10s/it]


{'loss': 1.0514305605516805, 'mode': 'Eval'}
EPOCH: 5


Train: step loss: 1.029, avg_loss: 1.023:   8%|▊         | 101/1219 [01:56<21:53,  1.18s/it]

{'mse': tensor(7.5807), 'r2_score': tensor(-50.6887)}
tensor(58309., dtype=torch.float64) 16384


Train: step loss: 1.038, avg_loss: 1.023:  16%|█▋        | 201/1219 [03:47<19:11,  1.13s/it]

{'mse': tensor(7.5813), 'r2_score': tensor(-50.7053)}
tensor(58179., dtype=torch.float64) 16384


Train: step loss: 0.998, avg_loss: 1.023:  25%|██▍       | 301/1219 [05:38<16:42,  1.09s/it]

{'mse': tensor(7.5819), 'r2_score': tensor(-50.7198)}
tensor(58329.5000, dtype=torch.float64) 16384


Train: step loss: 1.014, avg_loss: 1.022:  33%|███▎      | 401/1219 [07:29<15:37,  1.15s/it]

{'mse': tensor(7.5825), 'r2_score': tensor(-50.7360)}
tensor(58229.5000, dtype=torch.float64) 16384


Train: step loss: 1.022, avg_loss: 1.022:  41%|████      | 501/1219 [09:20<13:59,  1.17s/it]

{'mse': tensor(7.5832), 'r2_score': tensor(-50.7511)}
tensor(58236., dtype=torch.float64) 16384


Train: step loss: 1.019, avg_loss: 1.021:  49%|████▉     | 601/1219 [11:10<11:24,  1.11s/it]

{'mse': tensor(7.5837), 'r2_score': tensor(-50.7657)}
tensor(58145.5000, dtype=torch.float64) 16384


Train: step loss: 1.029, avg_loss: 1.021:  58%|█████▊    | 701/1219 [13:00<09:32,  1.11s/it]

{'mse': tensor(7.5842), 'r2_score': tensor(-50.7773)}
tensor(57981., dtype=torch.float64) 16384


Train: step loss: 1.008, avg_loss: 1.020:  66%|██████▌   | 801/1219 [14:50<07:51,  1.13s/it]

{'mse': tensor(7.5848), 'r2_score': tensor(-50.7913)}
tensor(58048., dtype=torch.float64) 16384


Train: step loss: 1.050, avg_loss: 1.020:  74%|███████▍  | 901/1219 [16:39<05:58,  1.13s/it]

{'mse': tensor(7.5854), 'r2_score': tensor(-50.8046)}
tensor(57985.5000, dtype=torch.float64) 16384


Train: step loss: 1.002, avg_loss: 1.020:  82%|████████▏ | 1001/1219 [18:29<04:15,  1.17s/it]

{'mse': tensor(7.5860), 'r2_score': tensor(-50.8168)}
tensor(58183.5000, dtype=torch.float64) 16384


Train: step loss: 1.016, avg_loss: 1.019:  90%|█████████ | 1101/1219 [20:31<02:26,  1.24s/it]

{'mse': tensor(7.5865), 'r2_score': tensor(-50.8311)}
tensor(58010., dtype=torch.float64) 16384


Train: step loss: 0.996, avg_loss: 1.019:  99%|█████████▊| 1201/1219 [22:30<00:21,  1.19s/it]

{'mse': tensor(7.5871), 'r2_score': tensor(-50.8440)}
tensor(58099.5000, dtype=torch.float64) 16384


Train: step loss: 1.000, avg_loss: 1.019: 100%|██████████| 1219/1219 [22:58<00:00,  1.13s/it]
Eval: step loss: 1.054, avg_loss: 1.041:  33%|███▎      | 101/308 [02:01<03:54,  1.13s/it]

{'mse': tensor(7.5850), 'r2_score': tensor(-50.7909)}
tensor(56552.5000, dtype=torch.float64) 16384


Eval: step loss: 1.040, avg_loss: 1.040:  65%|██████▌   | 201/308 [04:00<02:02,  1.15s/it]

{'mse': tensor(7.5830), 'r2_score': tensor(-50.7362)}
tensor(57068.5000, dtype=torch.float64) 16384


Eval: step loss: 1.032, avg_loss: 1.040:  98%|█████████▊| 301/308 [05:56<00:08,  1.15s/it]

{'mse': tensor(7.5810), 'r2_score': tensor(-50.6842)}
tensor(56946.5000, dtype=torch.float64) 16384


Eval: step loss: 0.964, avg_loss: 1.040: 100%|██████████| 308/308 [06:08<00:00,  1.20s/it]

{'loss': 1.0402555423123496, 'mode': 'Eval'}





#### Evaluation Metric

`{'map': tensor(0.8403), 'accuracy': tensor(0.8199), 'recall': tensor(0.9982)}`

In [30]:
torch.save(model.state_dict(), "./checkpoint/nb_regression_rating_chk_v1.pt")

In [31]:
model.item_tower({"movieId":torch.tensor([100]).view(1,1)})

tensor([[[-0.3723, -0.6123, -0.3078,  2.0979, -0.5481, -0.0403,  0.1010,
          -0.7463, -0.9075, -0.8572,  3.5413, -0.5817,  0.1744, -0.8414,
          -0.8148,  1.6080,  1.0704, -0.4259,  0.8676,  1.5981, -0.7697,
          -0.0850, -0.7220, -0.3151, -0.9655,  0.5732, -0.6461, -0.8815,
          -0.5222,  0.1153,  1.7526, -0.0661,  2.0812, -0.0909, -0.0560,
          -0.5846,  2.7871,  0.6511, -0.0607, -0.5849, -0.9713, -0.4278,
          -0.9372, -0.6656, -0.0122, -0.3765,  1.2492, -0.7099, -0.2800,
          -0.9229, -1.0000,  1.8982, -0.7793, -0.9426,  1.2269, -0.9466,
          -0.4407, -0.8189, -0.2898,  0.7152,  1.0276,  0.8229, -0.7092,
          -0.9390]]], grad_fn=<NativeLayerNormBackward0>)

#### Generate embeddings

In [32]:
import torch
import pandas as pd

In [33]:
model = CombinerTower(config)

model.load_state_dict(torch.load("./checkpoint/nb_regression_rating_chk_v1.pt"))

<All keys matched successfully>

In [34]:
movie_df = pd.read_csv("./data/ml-25m/movies.csv")

In [35]:
movie_df.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


In [36]:
movie_df.shape

(62423, 3)

In [37]:
emb_dim = 64
with torch.no_grad():
    movie_df['embedding'] = movie_df['movieId'].map(lambda x: model.item_tower({"movieId": torch.tensor([x])}).view(emb_dim))

In [38]:
emb = torch.stack(movie_df['embedding'].values.tolist(), axis=0)

In [39]:
emb = emb/(emb.norm(dim=1).view(-1,1) + 1e-7)

In [40]:
movie_df['norm_embedding'] = emb.unbind(dim=0)

In [41]:
movie_df.head()

Unnamed: 0,movieId,title,genres,embedding,norm_embedding
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,"[tensor(2.7775), tensor(-0.9256), tensor(0.829...","[tensor(0.3366), tensor(-0.1122), tensor(0.100..."
1,2,Jumanji (1995),Adventure|Children|Fantasy,"[tensor(2.1974), tensor(-0.2702), tensor(0.746...","[tensor(0.2667), tensor(-0.0328), tensor(0.090..."
2,3,Grumpier Old Men (1995),Comedy|Romance,"[tensor(-0.8682), tensor(-0.9068), tensor(-0.5...","[tensor(-0.1061), tensor(-0.1109), tensor(-0.0..."
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,"[tensor(0.6428), tensor(0.7264), tensor(-0.700...","[tensor(0.0783), tensor(0.0885), tensor(-0.085..."
4,5,Father of the Bride Part II (1995),Comedy,"[tensor(-0.7625), tensor(-0.5361), tensor(-0.3...","[tensor(-0.0918), tensor(-0.0645), tensor(-0.0..."


In [43]:
def get_similar(movie_df, movie_id=None, k=20):
    if movie_id is None:
        movie_id = np.random.randint(1, movie_df.shape[0])        
    query = movie_df.iloc[movie_id]
    query_emb = query['norm_embedding']
    sim_scores = (emb @ query_emb.view(1,-1).T)
    sorted_idx = torch.argsort(sim_scores.view(-1),descending=True)
    top_k_idx = sorted_idx[1:k+1]
    res = movie_df.iloc[top_k_idx]
    titles = res['title'].values
    genres = res['genres'].values
    scores = sim_scores[top_k_idx]
    return {
        "query": query[['movieId', 'title', 'genres']].to_dict(),
        'result': [[t,g,s] for t,g,s in zip(titles, genres, scores)]
    }

In [78]:
get_similar(movie_df, k=10)

{'query': {'movieId': 8477,
  'title': 'Jetée, La (1962)',
  'genres': 'Romance|Sci-Fi'},
 'result': [['Gappa: The Triphibian Monsters (AKA Monster from a Prehistoric Planet) (Daikyojû Gappa) (1967)',
   'Sci-Fi',
   tensor([0.7777])],
  ['And Now... Ladies and Gentlemen... (2002)',
   'Romance|Thriller',
   tensor([0.7185])],
  ['Death Trance (2005)', 'Action|Sci-Fi|Thriller', tensor([0.6945])],
  ['The Outlaws Is Coming (1965)', 'Comedy|Western', tensor([0.6753])],
  ['Nastazja (1994)', 'Drama', tensor([0.6747])],
  ['Original Kings of Comedy, The (2000)',
   'Comedy|Documentary',
   tensor([0.6724])],
  ['Adventures of Don Juan (1948)', 'Adventure|Romance', tensor([0.6546])],
  ['One and Only, The (2002)', 'Comedy|Romance', tensor([0.6543])],
  ['Halvdan Viking (2018)', 'Children', tensor([0.6533])],
  ['100 Days (1991)', 'Horror|Romance|Thriller', tensor([0.6481])]]}

In [61]:
rating_df.head()

Unnamed: 0,userId,movieId,rating,timestamp,percentile
0,1,5952,4.0,1147868053,0.0
1,1,2012,2.5,1147868068,1.449275
2,1,2011,2.5,1147868079,2.898551
3,1,1653,4.0,1147868097,4.347826
4,1,1250,4.0,1147868414,5.797101


In [62]:
test_data.head()

Unnamed: 0,userId,movieId,rating,timestamp
56,1,5912,3.0,1147878698
57,1,5767,5.0,1147878729
58,1,665,5.0,1147878820
59,1,2573,4.0,1147878923
60,1,27266,4.5,1147879365


In [66]:
model({"userId":torch.tensor([1]), 'movieId':torch.tensor([27266])})

{'rating': tensor([[3.6266]], grad_fn=<AddmmBackward0>)}