# Modelos: Feature Weighted Linear Stacking

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
BASE_PATH     = '../..'
LIB_PATH      = f'{BASE_PATH}/lib'
DATASET_PATH  = f'{BASE_PATH}/datasets'
WEIGHTS_PATH  = f'{BASE_PATH}/weights'
STACKING_PATH = f'{DATASET_PATH}/stacking'

METRICS_PATH  = f'{BASE_PATH}/metrics/stacking'

MODELS        = ['knn_user', 'knn_item', 'knn_ensemble', 'gmf', 'biased_gmf', 'nnmf', 'deep_fm']

DATABASE_PATH = 'mysql://root:1234@localhost/example'

In [3]:
import sys
sys.path.append(LIB_PATH)

import torch
from torch.optim import Adam
from torch.utils.data import DataLoader

from torchmetrics import R2Score
from torch.nn import MSELoss

import pytorch_common.util as pu
from pytorch_common.callbacks.output import Logger

import data.dataset as ds

import metric as mt

import pandas as pd

import model as ml
import data.plot as pl

import util as ut

from bunch import Bunch

import logging

from abc import ABC

import optuna

2023-10-03 19:23:54.275672: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-10-03 19:23:55.514989: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:996] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-10-03 19:23:55.602734: W tensorflow/core/common_runtime/gpu/gpu_device.cc:1956] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your 

<Figure size 640x480 with 0 Axes>

## Helper classes

Preditor base on an static datatable:

In [4]:
class Predictor(ABC):
    def predict(self, x):
        pass

class StaticPredictor(Predictor):
    def __init__(
        self, 
        df,
        rating_col,
        user_id_col ='user_id',
        item_id_col ='movie_id'
    ):  
        self.index = ut.ValueIndex(df, rating_col, [user_id_col, item_id_col])

    def __call__(self, user_item_batch): 
        return torch.tensor(self.index[user_item_batch.tolist()])

    def __repr__(self): return str(self.index)
    def __str__(self): return  str(self.index)

## Model

In [5]:
def load_dataset(path, models, post_fix):
    datasets = []
    for model in MODELS:
        ds = ut.load_df(f'{path}/{model}_{post_fix}.json')
        ds['model'] = model
        datasets.append(ds)

    return pd.concat(datasets)


def build_dataset(df):
    df = df.pivot(
        index=['user_id', 'movie_id', 'rating'],
        columns='model',
        values=['prediction']
    )
    df.columns = df.columns.droplevel(0)
    return df.reset_index(names=['user_id', 'movie_id', 'rating']).dropna()

In [6]:
def create_data_loader(df):
    data_set = ds.BasicDataset(
        df, 
        feature_cols = ['user_id', 'movie_id'], 
        target_col   = 'rating', 
        feat_type    = torch.long
    )

    return DataLoader(
        data_set,
        batch_size  = 256,
        num_workers = 24,
        pin_memory  = True,
        shuffle     = False
    )

## Setup

In [7]:
pu.LoggerBuilder().on_console().build()

<RootLogger root (INFO)>

In [8]:
import pytorch_common
pytorch_common.__version__

'0.3.8'

In [9]:
torch.__version__

'2.0.1+cu118'

In [10]:
pu.set_device_name('gpu')

pu.get_device(), torch.cuda.is_available()

(device(type='cuda', index=0), True)

In [11]:
train_df = build_dataset(load_dataset(STACKING_PATH, MODELS, 'train'))
test_df  = build_dataset(load_dataset(STACKING_PATH, MODELS, 'test'))

In [12]:
test_df.head(2)

model,user_id,movie_id,rating,biased_gmf,deep_fm,gmf,knn_ensemble,knn_item,knn_user,nnmf
0,647,1968,4,3.276734,3.663881,3.571568,3.776731,3.623924,3.929539,3.467732
1,647,5903,4,3.717464,3.544173,3.405541,3.701526,3.669733,3.733319,3.49981


In [13]:
test_df.shape

(4475, 10)

In [14]:
a = set(train_df[['user_id', 'movie_id']].itertuples(index=False))
b = set(test_df[['user_id', 'movie_id']].itertuples(index=False))

a.intersection(b)

set()

In [15]:
fc = ml.FeatureWeightFunction(ml.Range(end=3), ml.Range(begin=3)).to(pu.get_device())

In [16]:
user_item = torch.randint(0, 5, (5, 2))
ratings = torch.randint(1, 5, (5, 1))

user_item, ratings

(tensor([[3, 1],
         [2, 4],
         [3, 2],
         [1, 0],
         [1, 0]]),
 tensor([[2],
         [4],
         [1],
         [4],
         [4]]))

In [18]:
fc(user_item)

tensor([0.6232, 0.6232, 0.6232, 0.6232, 0.6232], device='cuda:0',
       grad_fn=<StackBackward0>)

In [19]:
predictor_names = train_df.columns[3:]
predictor_names

Index(['biased_gmf', 'deep_fm', 'gmf', 'knn_ensemble', 'knn_item', 'knn_user',
       'nnmf'],
      dtype='object', name='model')

In [22]:
predictors = [StaticPredictor(pd.concat([train_df, test_df]), name) for name in predictor_names]

In [23]:
input_ = torch.tensor([[91, 110433], [647, 316]])
input_

tensor([[    91, 110433],
        [   647,    316]])

In [24]:
predictors[0](input_)

tensor([2.8406, 3.5948], dtype=torch.float64)

In [25]:
train_dl = create_data_loader(train_df)
test_dl  = create_data_loader(test_df)

In [29]:
class FWLSParamsSampler(ml.HyperParamsSampler):
    def sample(self, trial):
        return Bunch({
            'conditions' : trial.suggest_int('conditions', 1, 3),
            'epochs'     : trial.suggest_int('epochs', 1, 10),
            'lr'         : trial.suggest_float('lr', 0.001, 0.1, log=True)
        })

conditions_cases = {
    1: [ml.Range(1, 10), ml.Range(11)],
    2: [ml.Range(1, 10), ml.Range(11, 20), ml.Range(21)],   
    3: [ml.Range(1, 10), ml.Range(11, 20), ml.Range(21, 30), ml.Range(31)]
}

def train_fn(df, hyper_params):
    model = ml.FeatureWeightLinearStacking(predictors, *conditions_cases[hyper_params.conditions])

    model.fit(
        df,
        epochs      = hyper_params.epochs,
        loss_fn     =  ml.MSELossFn(),
        optimizer   = Adam(params= model.parameters(), lr=hyper_params.lr),
        callbacks   = [Logger(['time', 'epoch', 'train_loss'])]
    )

    return model

In [34]:
study1 = optuna.create_study(
    study_name     = 'fwls_loss_optimization',
    storage        = DATABASE_PATH,
    load_if_exists = True,
    direction      = 'minimize'
)

study1.optimize(
    ml.OptunaTrainer(
        params_sampler = FWLSParamsSampler(),
        train_fn       = train_fn,
        train_dl       = train_dl,
        eval_dl        = test_dl,
        eval_metric    = ml.MSELossFn()
    ),
    n_trials=1
)

[32m[I 2023-10-02 21:21:56,725][0m Using an existing study with name 'fwls_loss_optimization' instead of creating a new one.[0m
2023-10-02 21:22:02,840 - INFO - {'time': '0:00:06.06', 'epoch': 1, 'train_loss': 13.725671107278151}
2023-10-02 21:22:09,039 - INFO - {'time': '0:00:06.20', 'epoch': 2, 'train_loss': 1.1596347882467157}
2023-10-02 21:22:14,986 - INFO - {'time': '0:00:05.95', 'epoch': 3, 'train_loss': 0.8204599347184686}
2023-10-02 21:22:21,145 - INFO - {'time': '0:00:06.16', 'epoch': 4, 'train_loss': 0.814571647959597}
2023-10-02 21:22:27,047 - INFO - {'time': '0:00:05.90', 'epoch': 5, 'train_loss': 0.813648027532241}
2023-10-02 21:22:33,060 - INFO - {'time': '0:00:06.01', 'epoch': 6, 'train_loss': 0.8116900044329026}
2023-10-02 21:22:38,985 - INFO - {'time': '0:00:05.92', 'epoch': 7, 'train_loss': 0.8096745759248734}
2023-10-02 21:22:44,870 - INFO - {'time': '0:00:05.88', 'epoch': 8, 'train_loss': 0.807580977678299}
[32m[I 2023-10-02 21:22:46,992][0m Trial 46 finished w

In [35]:
model = train_fn(train_dl, Bunch(study1.best_params))

R2Score()(*model.evaluate(train_dl)), ml.MSELossFn()(*model.evaluate(test_dl))

2023-10-02 21:22:59,393 - INFO - {'time': '0:00:05.90', 'epoch': 1, 'train_loss': 5.49488182628856}
2023-10-02 21:23:05,341 - INFO - {'time': '0:00:05.95', 'epoch': 2, 'train_loss': 1.080047871260082}
2023-10-02 21:23:11,367 - INFO - {'time': '0:00:06.03', 'epoch': 3, 'train_loss': 0.8699855646666359}
2023-10-02 21:23:17,134 - INFO - {'time': '0:00:05.77', 'epoch': 4, 'train_loss': 0.8314020835301456}
2023-10-02 21:23:22,931 - INFO - {'time': '0:00:05.80', 'epoch': 5, 'train_loss': 0.8156573456876418}
2023-10-02 21:23:28,935 - INFO - {'time': '0:00:06.00', 'epoch': 6, 'train_loss': 0.8084085890475441}
2023-10-02 21:23:34,931 - INFO - {'time': '0:00:06.00', 'epoch': 7, 'train_loss': 0.8052807117209715}


(tensor(0.2444), tensor(0.7211))

In [44]:
study2 = optuna.create_study(
    study_name     = 'fwls_r2_optimization',
    storage        = DATABASE_PATH,
    load_if_exists = True,
    direction      = 'maximize'
)

study2.optimize(
    ml.OptunaTrainer(
        params_sampler = FWLSParamsSampler(),
        train_fn       = train_fn,
        train_dl       = train_dl,
        eval_dl        = train_dl,
        eval_metric    = R2Score()
    ),
    n_trials=20
)

[32m[I 2023-10-02 21:34:54,019][0m Using an existing study with name 'fwls_r2_optimization' instead of creating a new one.[0m
2023-10-02 21:35:01,540 - INFO - {'time': '0:00:07.50', 'epoch': 1, 'train_loss': 41.394580602645874}
2023-10-02 21:35:09,643 - INFO - {'time': '0:00:08.10', 'epoch': 2, 'train_loss': 14.890405793400372}
[32m[I 2023-10-02 21:35:13,986][0m Trial 10 pruned. [0m
2023-10-02 21:35:20,878 - INFO - {'time': '0:00:06.87', 'epoch': 1, 'train_loss': 56.238348105374506}
2023-10-02 21:35:27,683 - INFO - {'time': '0:00:06.80', 'epoch': 2, 'train_loss': 3.7531288897289947}
2023-10-02 21:35:34,689 - INFO - {'time': '0:00:07.01', 'epoch': 3, 'train_loss': 1.2755100069677128}
2023-10-02 21:35:41,524 - INFO - {'time': '0:00:06.83', 'epoch': 4, 'train_loss': 1.166888615664314}
2023-10-02 21:35:48,830 - INFO - {'time': '0:00:07.30', 'epoch': 5, 'train_loss': 1.1292648087529575}
2023-10-02 21:35:55,574 - INFO - {'time': '0:00:06.74', 'epoch': 6, 'train_loss': 1.092990805121029

In [45]:
model = train_fn(train_dl, Bunch(study2.best_params))

R2Score()(*model.evaluate(train_dl)), ml.MSELossFn()(*model.evaluate(test_dl))

2023-10-02 21:49:29,269 - INFO - {'time': '0:00:06.12', 'epoch': 1, 'train_loss': 7.195241689682007}
2023-10-02 21:49:35,093 - INFO - {'time': '0:00:05.82', 'epoch': 2, 'train_loss': 1.049347599639612}
2023-10-02 21:49:41,195 - INFO - {'time': '0:00:06.10', 'epoch': 3, 'train_loss': 0.8584769666194916}
2023-10-02 21:49:47,399 - INFO - {'time': '0:00:06.20', 'epoch': 4, 'train_loss': 0.8241643511197146}
2023-10-02 21:49:53,277 - INFO - {'time': '0:00:05.88', 'epoch': 5, 'train_loss': 0.8099194239167606}
2023-10-02 21:49:59,441 - INFO - {'time': '0:00:06.16', 'epoch': 6, 'train_loss': 0.8012905795784557}
2023-10-02 21:50:12,193 - INFO - {'time': '0:00:12.75', 'epoch': 7, 'train_loss': 0.7958983822780497}
2023-10-02 21:50:19,814 - INFO - {'time': '0:00:07.62', 'epoch': 8, 'train_loss': 0.7926713447360432}
2023-10-02 21:50:25,926 - INFO - {'time': '0:00:06.11', 'epoch': 9, 'train_loss': 0.79094678514144}


(tensor(0.2581), tensor(0.7034))

In [46]:
study1.best_params

{'conditions': 1, 'epochs': 7, 'lr': 0.03214481771820007}

In [47]:
study2.best_params

{'conditions': 1, 'epochs': 9, 'lr': 0.04003188647319603}