### This notebook describes how to stack a Pytorch simple model for tabular data binary calssification.

It is part of my solution of the TPS competition in. It will train the same model on different data spilits and then combine the 
out of folds (oofs) in another level using a simple linear regression.

There are four main components in this notebook used here: **experiment**, **model**, **level**, and **stack**. These will be modeled by classes as described in this notebook.

In [None]:
# TPU setup
!curl https://raw.githubusercontent.com/pytorch/xla/master/contrib/scripts/env-setup.py -o pytorch-xla-env-setup.py
!python3 pytorch-xla-env-setup.py --version 1.7 --apt-packages libomp5 libopenblas-dev


In [None]:
# Familiar imports
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import display

import os
import gc
import glob
import random
from datetime import datetime
from pathlib import Path



# helpers
from sklearn.preprocessing import OrdinalEncoder, LabelEncoder, OneHotEncoder, PowerTransformer, StandardScaler, \
                                  MinMaxScaler, RobustScaler, PolynomialFeatures, QuantileTransformer,  KBinsDiscretizer

from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer
from sklearn.model_selection import train_test_split, KFold, cross_val_score, StratifiedKFold
from sklearn.pipeline import make_pipeline, Pipeline



# Models
from sklearn.linear_model import LogisticRegression


# torch
import torch   
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from torchmetrics import AUROC

# for TPU
import torch_xla
import torch_xla.core.xla_model as xm

# base
from sklearn.base import clone

# scoring
from sklearn import metrics

In [None]:
# notebook options
pd.set_option("display.max_columns", 100)
path = "../input/tabular-playground-series-sep-2021/"
train_file = "train.csv"
test_file = "test.csv"

In [None]:
# Load the training data
train = pd.read_csv(f'{path}{os.sep}{train_file}', index_col=0)
test = pd.read_csv(f'{path}{os.sep}{test_file}', index_col=0)

In [None]:
print(f'Train set size {train.memory_usage(index=False).sum()/(2**30)}')
print(f'Test set size {test.memory_usage(index=False).sum()/(2**30)}')

In [None]:
# Separate target from features
y = train['claim']
X = train.drop(['claim'], axis=1)


### Feature Engineering

In [None]:

# identify columns
numerical_cols = list(X.select_dtypes(include=np.number).columns)
non_numeric_cols = list(X.select_dtypes(include=['object', 'bool']).columns)

print(f'We have {len(numerical_cols)} numeric and {len(non_numeric_cols)} non-numeric features')


# work on a copy
X_train = X.copy()
X_test = test.copy()


# all features
features = non_numeric_cols + numerical_cols

# new features
# https://www.kaggle.com/hiro5299834/tps-sep-2021-single-lgbm
X_train['n_missing'] = X_train[features].isna().sum(axis=1)
X_test['n_missing'] = X_test[features].isna().sum(axis=1)

X_train['std'] = X_train[features].std(axis=1)
X_test['std'] = X_test[features].std(axis=1)

#X_train['min'] = X_train[features].min(axis=1)
#X_test['min'] = X_test[features].min(axis=1)

features += ['n_missing', 'std']
#n_missing = X_train['n_missing'].copy()

# imputation
X_train[features] = X_train[features].fillna(X_train[features].mean())
X_test[features] = X_test[features].fillna(X_test[features].mean())

scaler = StandardScaler()
X_train[features] = scaler.fit_transform(X_train[features])
X_test[features] = scaler.transform(X_test[features])


# useful for column transformers 
numerical_ix = X_train.columns.get_indexer(features)
#non_numeric_ix = X_train.columns.get_indexer(non_numeric_cols)

### Model-dev helper functions

These are functions to save and load predictions, they can be wrapped within a class for a better modeling or kept as they are since they are independent of the project setting.


In [None]:

## helper fucntions
def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available(): 
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
        torch.backends.cudnn.deterministic = True
        torch.backends.cudnn.benchmark = False

        
def to_file(data, output_folder, idxs=None, suffix='.csv'):
    print(data)
    df = pd.DataFrame(data)
    df.to_csv(f'{output_folder}{os.sep}{suffix}', index=True)
        
    
def calc_folds_indexes(X, y, n_folds=5, shuffle=True, sampler=KFold, seeds=[42]):
    """
    Create folds from a dataset X and a target y
    sampler: can be KFold,  StratifiedKFold, or any sampling class  
    
    return a list of dictionaries of {'seed':, 'idxs':[train_idxs, test_idxs]}
    """
    folds_idxs_list = []
    for seed in seeds:
        folds = sampler(n_splits=n_folds, 
                        random_state=seed,
                        shuffle=shuffle)

        folds_idxs_list.append({'seed': seed, 'idxs':list(folds.split(X, y))})
        
    return folds_idxs_list   

def score(y, target, average=False):
    # if y is a list then it will return a list of scores
    # if average is True then it will return the mean of the scores
    
    if type(y) in [list, np.ndarray]:
        scores = []
        for y_i in y:
            scores.append(score_func(y_i, target, **score_func_params))
        if average:
            return np.mean(scores)
        else:
            return scores
        
    return score_func(y, target, **score_func_param)


In [None]:
# initialize things

# seed
seed = 42
seed_everything(seed)

### ModelWrapper 
This main role of this class is to avoid coding multiple classes for each model (or model types). We can see that models can actually be categorized into different categories, where some models accept more parameters than the others. For instance xgboost can use an evaluation set to determine the stopping round number, while Lasso does not have such extra parameters.

Thanks to the flexibility of Python and the design of the base models, we can wrap the model and develope a `wrapper` to do what the model should do. In fact, we can easily stretch this class to support sklearn pipelines or any framework we are using. The idea is again, seperate code from data and try to generalize.


In [None]:
class ModelWrapper():
    def __init__(self, 
                 model,
                 name,
                 main_params,
                 uses_eval_set=False,
                 fit_params={}):
        
        self.model = model
        self.name = name
        self.main_params = main_params
        self.uses_eval_set = uses_eval_set
        self.fit_params = fit_params # any extra params for the 'fit' function
                
    def create_model(self, random_state=None):
        """
        create a model
        """
        model = self.model(**self.main_params)
        if random_state is not None:
            if hasattr(model, 'random_state'):
                model.random_state = random_state
            elif hasattr(model, 'random_seed'):
                model.random_seed = random_state
        
        return model
    
        

### ModelTrainer
This role of this class is to train a model and calculate the oofs and the test predictions (meta-features). That is, to cross validate.


In [None]:
class ModelTrainer():
    def __init__(self,
                  model_wrapper: ModelWrapper):
        
        self.modelwrapper = model_wrapper
        
    def cross_validate(self,
                  X, y,
                  X_test,
                  folds_idxs,
                  transformer=None,
                  fit_transform_on_test_set=False,
                  verbose=False,
                  use_different_random_states=True, 
                  score_function=metrics.roc_auc_score,
                  score_function_params={}):
        """
        Return the oofs predictions and the meta features (test predictions)
        """
        
        test_predictions = 0
        oof_predictions = np.zeros_like(np.array(y), dtype=np.float64)
        valid_mean_score = [] 
        for fold, (train_ix, valid_ix) in enumerate(folds_idxs): # we are not using spilit here for a better generalization
            X_train, X_valid = X[train_ix], X[valid_ix]
            y_train, y_valid = y[train_ix], y[valid_ix]
                             
            # transform input
            if transformer is not None:
                X_train = transformer.fit_transform(X_train)
                if fit_transform_on_test_set:
                    X_valid = transformer.fit_transform(X_valid)
                    X_test_ = transformer.fit_transform(X_test)
                else:
                    X_test_ = transformer.transform(X_test)
                    X_valid = transformer.transform(X_valid)
            else:
                X_test_ = X_test
                
            # check if we train each fold on differently initialized clone
            if use_different_random_states:
                model = self.modelwrapper.create_model(random_state=fold)
            else:
                model = self.modelwrapper.create_model()
            
            # fit the model
            if self.modelwrapper.uses_eval_set:
                model.fit(X_train, y_train, eval_set=[(X_valid, y_valid)], **self.modelwrapper.fit_params)
            else:
                model.fit(X_train, y_train, **self.modelwrapper.fit_params)
                
            ## predictions
            # on the validation set
            valid_predications = model.predict_proba(X_valid)[:, -1]
            score = score_function(y_valid, valid_predications)
            valid_mean_score.append(score)
            oof_predictions[valid_ix] = valid_predications
            
            # on the test set        
            test_predictions += model.predict_proba(X_test_)[:, -1] / len(folds_idxs)
            
            if verbose:
                print('Model:{} Fold:{} score:{:.4f}'.format(self.modelwrapper.name, fold + 1, score))
        
        if verbose:
            print('Average score:{:.4f} ({:.4f})'.format(np.mean(valid_mean_score), np.std(valid_mean_score) ))
    
        return oof_predictions, test_predictions

### Level
The level class glues all components in a given layer

In [None]:
class Level():
    def __init__(self,
                level_id,
                models,
                folder,
                transformer,
                n_folds=5,
                seeds=[42],
                frozen=False,
                fit_transform_on_test_set = False,
                use_different_random_states=True):
        
        self.level_id = level_id
        self.models = models
        self.folder = folder
        self.transformer = transformer
        self.n_folds = n_folds
        self.seeds = seeds
        self.frozen = frozen=False
        self.fit_transform_on_test_set = fit_transform_on_test_set
        self.use_different_random_states = use_different_random_states
    
    def create(self, model_zoo):
        """
         Create a level.
         model_zoo: a dictionay of all avialable models.
        """
        self.model_wrappers = []
        
        # get models 
        # if models is set to 'all' use all models
        if self.models[0].lower() == 'all':
            level_models_names = model_zoo.keys()
        else: 
            level_models_names = self.models

        for model_name in level_models_names:
            # get paramaters  
            model_dict = model_zoo[model_name]
            model = model_dict['model']
            fit_kwargs = model_dict['fit_kwargs']
            app_params = model_dict['app_params']
            main_params = model_dict['main_params']

            model_wrapper = ModelWrapper(model=model, name=model_name, main_params=main_params)
            if fit_kwargs is not None:
                model_wrapper.fit_params = fit_kwargs
            if app_params is not None:
                model_wrapper.uses_eval_set = app_params['uses_eval_set']
            self.model_wrappers.append(model_wrapper)

#### Level Trainer
Trains all models in a given level.

In [None]:
class LevelTrainer():
    def __init__(self,
                level,
                seeds_folds_idxs_list):
        self.level = level
        self.seeds_folds_idxs_list = seeds_folds_idxs_list
        
    def train(self, X_train, y, X_test, verbose=True, agg_func=None):
        """
        train the level and return the oofs and meta-features for each model in the level.
        If the level has many seeds it will either use the agg_func to combine predictions
        or will just return eveything, it depends on agg_func
        
        agg_func: can be None, np.mean, or any other numpy reduction function
        """

        level_oof_preds, level_test_preds = {}, {}
        for model_wrapper in self.level.model_wrappers:
            if verbose:
                print('-'*30)
                print(f'Model:{model_wrapper.name}')
                print('-'*30)

            # train each model with as many times as the length of folds_idxs_list 
            model_oof_preds, model_test_preds = [], []
            
            for seeds_folds_idxs in self.seeds_folds_idxs_list:
                seed, folds_idxs = seeds_folds_idxs['seed'], seeds_folds_idxs['idxs']
                print('-'*30)
                print(f'Seed:{seed}')
                print('-'*30)
                
                trainer = ModelTrainer(model_wrapper)
                oof_preds, test_preds = trainer.cross_validate(X_train, 
                                                          y,
                                                          X_test,
                                                          transformer=self.level.transformer,
                                                          folds_idxs=folds_idxs,
                                                          verbose=verbose,
                                                          fit_transform_on_test_set=self.level.fit_transform_on_test_set)
                if agg_func is None:
                    level_oof_preds[f'{model_wrapper.name}_seed_{seed}'] =  oof_preds
                    level_test_preds[f'{model_wrapper.name}_seed_{seed}'] =  test_preds
                else: # collect them in order to aggregate them with the agg_func function
                    model_oof_preds.append(oof_preds)
                    model_test_preds.append(test_preds)

          # aggregate the results
        if agg_func is not None:
            level_oof_preds[f'{model_wrapper.name}'] = agg_func(np.column_stack(model_oof_preds))
            level_test_preds[f'{model_wrapper.name}'] = agg_func(np.column_stack(model_test_preds))

        if verbose:
            print('-'*30)

        return pd.DataFrame(level_oof_preds), pd.DataFrame(level_test_preds)

### Experiment 
Since in many cases everything boils down to stacking, the experiment class will handle the organization of the resulted files from the test: test and oofs predictions. Therefore, assuming the project has the following structure with a folder called **experiments** we can save our tests in this folder. This is what this class will do. This class is the entry point for any run (experiment) in the project. It reads the input and the settings and produces the output.

```
    TPS_project
    │   README.md
    │
    └───notebooks
    │   ...
    │
    └───experiments
    │   │   
    │   │
    │   └───experiment_1   
    │   │   level_1_oofs.csv
    │   │   level_1_test.csv
    │   │   level_2_oofs.csv
    │   │   level_2_test.csv
    │   │   ...
    │   │   meta_level_oofs.csv
    │   │   meta_level_test.csv
    │   └───experiment_...
```


>The code that generated the results is important to save too, but that can be done easily by creating a new version of the notebook or copying notebook with the CV_LB results. If we are running it in a local machine without notebooks, we can create a small function to copy the code files to the experiment levels. On other words, to save the code and the results for each experiement for a better look up.

>This class is so important when running notebooks in our computers. Since Kaggle has a nice notebook management system it saves outputs as well.



In [None]:
class Experiment():
    def __init__(self,
                 title,
                 description,
                 stack,
                 model_zoo,
                 main_folder=os.getcwd()):
        
        self.title = title
        self.main_folder = main_folder
        self.stack = stack
        self.model_zoo = model_zoo
        self.description = description
        # create the main folder if it does not exist
        if not os.path.exists(f'{self.main_folder}'):
            os.makedirs(f'{self.main_folder}', exist_ok=True)
        
    def join_folder(self, folder=None):
         """
         Join a folder and output where results will be saved.
         If 'folder' is None, it will create a folder
         with a time stamp.
         """

         # create time stamp and subfolder with the current time stamp
         if folder is not None: # if folder is specified
            self.output_folder = folder 
            # create a folder if does not exit.
            folder_path = f'{self.main_folder}{os.sep}{self.output_folder}'
            if not os.path.exists(folder_path):
                os.makedirs(folder_path)                
         else: # create a folder with the time stamp
            time_stamp = datetime.now().isoformat(' ', 'seconds')
            self.output_folder = self.title + ' ' + time_stamp.replace(':', '-')
            # create and replace if it exits.
            Path(f'{self.main_folder}{os.sep}{self.output_folder}').mkdir(parents=True, exist_ok=True)
    
    
    def run(self, X_train, y, X_test, 
            train_idxs,
            test_idxs,
            verbose=True, store=True):
        
        # run the stack
        for  level_params in self.stack:
            # create all models in the level
            level = Level(**level_params)
            level.create(self.model_zoo)

            print('-'*50)
            print(f'Current Level: {level.level_id}')
            print('-'*50)

            # join the level's output folder
            #self.join_folder(folder=level.folder)

            # create folds indexes for the level
            seeds_folds_idxs_list = calc_folds_indexes(X=X_train,
                                                       y=y,
                                                       n_folds=level.n_folds,
                                                       sampler=StratifiedKFold,
                                                       seeds=level.seeds)

            # train the level
            if not level.frozen:  # escape any trained level   
                level_trainer = LevelTrainer(level=level, 
                                             seeds_folds_idxs_list=seeds_folds_idxs_list)

                level_oof_preds, level_test_preds =  level_trainer.train(X_train=X_train,
                                                                         y=y,
                                                                         X_test=X_test)
                # store predictions?
                if store:
                    # oofs 
                    level_oof_preds.to_csv(f'{self.main_folder}{os.sep}{self.output_folder}{os.sep}{level.level_id}_oofs.csv')
                    # test predictions
                    level_test_preds.to_csv(f'{self.main_folder}{os.sep}{self.output_folder}{os.sep}{level.level_id}_test.csv')
                    
                
                # update train and test 
                X_train, X_test = level_oof_preds.values, level_test_preds.values
            else:
                print('This level is already trained')
                # load saved of this level and raise error
                fold_id = level.n_folds
                folder =f'{self.main_folder}{os.sep}{self.output_folder}'
                
                # new features 
                level_oof_preds = pd.read_csv(f"{folder}{os.sep}*{fold_id}_oofs.csv")
                level_test_preds = pdf.read_csv(f"{folder}{os.sep}*{fold_id}_test.csv")
                
                X_train = level_oof_preds.values
                X_test = level_test_preds.values
                
            if verbose:
                display(level_oof_preds.head(10))
                display(level_test_preds.head(10))
                
        # return the last output from the last level
        return level_test_preds

### DNN Models

In [None]:
# My other nets here: https://github.com/fmstam/MLS/blob/master/ceot_drl/core/DNN.py
class Net(nn.Module):
    def __init__(self,
                  input_size, 
                  output_size=1, 
                  embedding_layer=nn.Embedding, # nn.Embedding, nn.Linear, nn.Conv1d, ...
                  embedding_size=64,
                  feature_size=16
                 ):
            
        super(Net, self).__init__()
        
        self.input_size = input_size
        self.embedding_size = embedding_size
        self.output_shape= output_size
        self.feature_size = feature_size
        
        
        # create the net
        self.embedding_layer = embedding_layer(embedding_size, feature_size)
        # first  layer
        self.fc_1 = nn.Linear(in_features=input_size * feature_size, out_features=feature_size)
        self.dropout_1 = nn.Dropout(0.6)
        #self.batch_norm1d = nn.BatchNorm1d(feature_size)
        # second layer
        self.fc_2 = nn.Linear(in_features=feature_size, out_features=int(feature_size/2))
        self.dropout_2 = nn.Dropout(0.4)
        # third layer
        self.fc_3 = nn.Linear(in_features=int(feature_size/2), out_features=int(feature_size/4))
        self.dropout_3 = nn.Dropout(0.2)
        # output
        self.fc_out = nn.Linear(in_features=int(feature_size/4), out_features=output_size)
        
        
    
    def forward(self, x):
        if isinstance(x, torch.Tensor):
            x = x
        else:
            x = torch.Tensor(x)

        
        x = self.embedding_layer(x.long())
        x = x.view(x.shape[0], self.input_size * self.feature_size)
        
        x = F.silu(self.fc_1(x))
        x = self.dropout_1(x)
        #x = self.batch_norm1d(x)    
        
        x = F.silu(self.fc_2(x)) # silu = swish function
        x = self.dropout_2(x)
        
        x = F.silu(self.fc_3(x)) # silu = swish function
        x = self.dropout_3(x)
        
        
        x = self.fc_out(x)
        x = torch.sigmoid(x)        
        return x    


class CompDataSet(Dataset):
    def __init__(self, X, y, transformer):
        
        self.X = X
        self.y = y
        self.transformer = transformer
        
    def __getitem__(self, index):
        x = self.X[index]
        y = self.y[index]
        
        if self.transformer:
            x = self.transformer(x)
        
        return x, y
    
    def __len__(self):
        return len(self.X)
    

    
class NetModel():
    def __init__(self,
                 net,
                 optimizer=torch.optim.Adam,
                 lr=2.5e-4,
                 loss=torch.nn.BCELoss(),
                 metric=AUROC(),
                 batch_size=1024,
                 scheduler=torch.optim.lr_scheduler.OneCycleLR,
                 max_lr=1e-3, 
                 device='gpu'
                 ):
        
        self.net = net
        self.optimizer = optimizer(self.net.parameters(), lr)
        self.lr = lr
        self.max_lr = max_lr
        self.loss = loss
        self.metric = metric
        self.batch_size = batch_size
        self.batch_size = batch_size
        self.scheduler = scheduler
        
        self.device = 'cpu'
        if device is 'gpu':
            if torch.cuda.is_available():
                self.device = 'cuda:0'
        elif device is 'tpu':
            self.device = xm.xla_device()
            
                
        self.best_model = None
    
    def fit(self, X, y, eval_set=None, epochs=1, verbose=1, plot_loss=True):
        
        X_train_dataset = CompDataSet(X, y, transformer=torch.tensor)
        X_train_dataloader = DataLoader(X_train_dataset, batch_size=self.batch_size)
        
        if eval_set is not None:
            # use the last item in the eval_set for validation
            X_valid_dataset = CompDataSet(eval_set[-1][0], eval_set[-1][1], transformer=torch.tensor)
            X_valid_dataloader = DataLoader(X_valid_dataset, batch_size=self.batch_size)

            
        best_train_score = - float('inf')
        best_valid_score = - float('inf')
        ## actual training loop
        # put model to the selected device
        self.net.to(self.device)
        gc.collect() # clearn up before the loop
        n_batches = len(X_train_dataloader) # number of batches (steps)
        # update the scheudaler number of steps according to the batches 
        self.scheduler = self.scheduler(self.optimizer, max_lr=self.max_lr, epochs=epochs, steps_per_epoch=n_batches)
        
        if verbose > 0:
            print('Start of training ...')
        # epochs
        for epoch in range(epochs):        
            # tracking the train and validation losses
            train_loss_list = []
            valid_loss_list = []
        
            # put model in training mode
            self.net.train()
            # rest the metric
            self.metric.reset()
            epoch_mean_loss = 0
            
            for batch_X_train, batch_y_train in X_train_dataloader:
                
                # forward step
                y_predict = self.net(batch_X_train.to(self.device))
                batch_y_train = torch.unsqueeze(batch_y_train.float(), axis=1).to(self.device)
                
                # backward step
                self.optimizer.zero_grad() # zero gradds because pytorch accumaltes them before each batch

                loss = self.loss(y_predict, batch_y_train)
                
                loss.backward() # backpropagate error
                self.optimizer.step() # update weights
                self.scheduler.step() # update the LR schedualer
                epoch_mean_loss += (loss.detach().cpu().item()) / n_batches # store loss
                self.metric(y_predict, batch_y_train.int()) # calculate metric for the batch target hast to be int
                
            train_score = self.metric.compute()
            if train_score > best_train_score:
                best_train_score = train_score
                # time to checkpoint the model based on training set
                #torch.save(self.net.state_dict(), 'best_model.pt')
                self.best_model = self.net.state_dict()

                
            # evaluation set?
            if eval_set is not None:    
                # put model in validation mode
                self.net.eval()
                # rest the metric
                self.metric.reset() 
                
                with torch.no_grad(): # stop autograd engine
                    for batch_X_train, batch_y_train in X_valid_dataloader:
                        y_predict = self.net(batch_X_train.to(self.device))
                        batch_y_train = torch.unsqueeze(batch_y_train.float(), axis=1).to(self.device)
                        self.metric(y_predict, batch_y_train.int())
                        
                     # calculate metric for the batch target hast to be int
                    valid_score = self.metric.compute()
                    if valid_score > best_valid_score:
                        best_valid_score = valid_score 
                        # time to checkpoint the model based on the eval set
                        # torch.save(self.net.state_dict(), 'best_model.pt')
                        self.best_model = self.net.state_dict()

                    print(f'Epoch {epoch}/{epochs}: loss {epoch_mean_loss} train_score {train_score} best_score {best_train_score} valid_score {valid_score} best_score {best_valid_score}')
                    
            else: # no eval-set was provided use the training set only
                print(f'Epoch {epoch}/{epochs}: loss {epoch_mean_loss} train_score {train_score} best_score {best_train_score}')

            train_loss_list.append(epoch_mean_loss)
            
        # clean up
        del X_train_dataloader
        del X_valid_dataloader
        gc.collect()
            
            
        return self

    def predict_proba(self, X):
        if not isinstance(X, torch.Tensor):
            X = torch.Tensor(X)
            
        X = X.to(self.device)
        
        print(X.shape)
        # use the best model for predition
        assert self.best_model is not None, "The model is not trained yet"
        self.net.load_state_dict(self.best_model)
        X =  self.net(X).detach().cpu().numpy()
        
        return X
        
        
        

### Hyperparameters

Here goes the paramaters of each model. These can actually be stored in an external JSON file.


In [None]:
net_params = {
    'input_size': len(X_train.columns),
    'embedding_size': 100,
    'feature_size': 64
}

In [None]:
gc.collect()

### These are model/task dependent parameters

In [None]:
# external hyperparamaters

### fit function hyperparamaters
# some models require special paramaters like early stoping in xgboost and lgbm
fit_params = {'early_stopping_rounds': 300,
                  'verbose': 1000}

net_fit_params = {'epochs': 5}

### application/implementation paramaters
# These paramaters are implementation dependent 
app_params = {'uses_eval_set':True}



### Models

In [None]:
# compile all settings in one dictionary, 
# we can store/load it then to a JSON file
model_zoo = {
          'LogisticRegression': {"model": LogisticRegression, "main_params":{}, "fit_kwargs":None, "app_params": None},
          'NetModel': {"model": NetModel, "main_params":{"net": Net(**net_params), "device":"tpu"}, "fit_kwargs":net_fit_params, "app_params": app_params},
          # NN models
          #'TabNetClassifier': {"model": tabnet_clf, "fit_kwargs":tabnet_fit_params, "app_params": app_params},
          # we can add any number of models here 
        }
list(model_zoo.keys())

### Stacking

Here goes the actual stacking procedure. 
   - We first define the architecture, and setup the a session.
   - Define the stack. That is, the models and transformers in the levels

In [None]:
# settings: experiment and stacking architecutre

# initialize the stack with the actual input
X_train_, X_test_ = X_train, X_test


transform_pipeline =Pipeline([
                            ("scaler", QuantileTransformer(output_distribution='normal')),
                            ("biner", KBinsDiscretizer(n_bins=net_params['embedding_size'], encode='ordinal', strategy='uniform'))])

# the n_bins is equal to the embedding size
level_1_transformers = [('num', transform_pipeline, numerical_ix)]
level_1_transform = ColumnTransformer(transformers=level_1_transformers)


# define the actual stack
stack = [ {"level_id": "level-1", 
           "models": [
                     'NetModel'
                    ],
            "n_folds": 5,
            "seeds" : [42],# 43, 44, 45, 46, 47, 48, 49, 50],
            "folder": "level_1", 
            "transformer": level_1_transform,
            "fit_transform_on_test_set": False,
            "frozen": False # to freeze the level if already trained
            },
            
         # ...
         # we can add any number of levels here
         # ...
         
          {"level_id": "meta_level",
            "models": [#'LinearRegression',
                       'LogisticRegression'
                      ],
            "n_folds": 5,
            "seeds" : [42],
            "folder": "meta_level",
            "transformer": None,
            "fit_transform_on_test_set": False,
            "frozen": False
          }
         
         
        ]
         

- Loop through each level in the stack

In [None]:
# create experiment
experiments_folder = "Experiments"
experiment_folder = 'experiement_1' # if None a folder with time stamp will be created
experiment_description = "Simple net model with multiple seeds"

TPS921_experiment = Experiment(title='TPS-9-21',
                             description=experiment_description,
                             stack=stack,
                             model_zoo=model_zoo,
                             main_folder=f'{os.getcwd()}{os.sep}{experiments_folder}')

TPS921_experiment.join_folder(experiment_folder)

results = TPS921_experiment.run(X_train=X_train_.values,
                     y=y.values, 
                     X_test=X_test_.values,
                     train_idxs = X_train_.index,
                     test_idxs = X_test_.index)

In [None]:
# final results
results.head(10)

### Submit the results

In [None]:
predictions = results.iloc[:, -1].values

In [None]:
# Save the predictions to a CSV file
output = pd.DataFrame({'id': X_test.index,
                       'target': predictions})
output.to_csv('submission.csv', index=False)

In [None]:
# results 
output.head(20)