## Skorch RHC network classification example

In [None]:
import subprocess

# Installation on Google Colab
try:
    import google.colab
    subprocess.run(['python', '-m', 'pip', 'install', 'skorch' , 'torch'])
except ImportError:
    pass

In [1]:
import numpy as np
from sklearn.datasets import make_classification
import torch
from torch import nn
from skorch import NeuralNetClassifier
from skorch import NeuralNet
from pyperch.neural.rhc_nn import RHCModule  
from pyperch.utils.decorators import add_to
from skorch.dataset import unpack_data
import copy

In [2]:
X, y = make_classification(1000, 20, n_informative=10, random_state=0)
X = X.astype(np.float32)
y = y.astype(np.int64)
input_dim=20
output_dim=2
num_units=10

In [3]:
rhc_module=RHCModule(input_dim=input_dim, output_dim=output_dim)

net = NeuralNetClassifier(
    rhc_module,
    max_epochs=20,
    lr=0.1,
    # Shuffle training data on each epoch
    iterator_train__shuffle=True,
)

## train_step_single override - register the RHC training step and disable backprop 

In [4]:
@add_to(NeuralNet)
def train_step_single(self, batch, **fit_params):
    self._set_training(True)
    Xi, yi = unpack_data(batch)
    y_pred = self.infer(Xi, **fit_params)
    loss = self.get_loss(y_pred, yi, X=Xi, training=True)
    #disable backprop and run custom training step
    #loss.backward()
    self.module_.run_rhc_single_step(self, loss, Xi, yi, **fit_params)
    return {
        'loss': loss,
        'y_pred': y_pred,
    }

In [5]:
net.fit(X, y)

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m7.1045[0m       [32m0.5000[0m        [35m7.9712[0m  0.0137
      2        7.9565       0.5000        7.9712  0.0171
      3        7.8915       0.5000        7.9712  0.0156
      4        7.9712       0.5000        7.9712  0.0189
      5        7.9122       0.4950        8.0509  0.0214
      6        8.2142       [32m0.5050[0m        [35m7.8915[0m  0.0139
      7        8.4744       0.5000        7.9712  0.0164
      8        8.3728       [32m0.5100[0m        [35m7.8118[0m  0.0221
      9        8.2205       0.5000        7.9712  0.0214
     10        8.6205       0.4800        8.2900  0.0142
     11        7.9852       [32m0.5150[0m        [35m7.7292[0m  0.0191
     12        8.1353       0.4500        8.7683  0.0161
     13        7.9666       [32m0.5400[0m        [35m7.3335[0m  0.0133
     14        8.2472       0.4450        8.84

<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=RHCModule(
    (dense0): Linear(in_features=20, out_features=10, bias=True)
    (nonlin): ReLU()
    (dropout): Dropout(p=0.5, inplace=False)
    (dense1): Linear(in_features=10, out_features=10, bias=True)
    (output): Linear(in_features=10, out_features=2, bias=True)
    (softmax): Softmax(dim=-1)
  ),
)

## Using sklearn pipeline with RO

In [6]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

pipe = Pipeline([
    ('scale', StandardScaler()),
    ('net', net),
])

pipe.fit(X, y)
y_proba = pipe.predict_proba(X)

Re-initializing module.
Re-initializing criterion.
Re-initializing optimizer.
  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m8.3720[0m       [32m0.4900[0m        [35m8.1306[0m  0.0160
      2        [36m8.1358[0m       [32m0.5050[0m        [35m7.8334[0m  0.0197
      3        [36m7.9327[0m       0.5050        7.8915  0.0206
      4        8.1904       [32m0.5100[0m        [35m7.8118[0m  0.0175
      5        8.3299       0.4850        8.2103  0.0146
      6        7.9521       0.4800        8.2900  0.0148
      7        8.1039       0.4650        8.5292  0.0177
      8        8.4495       0.4850        8.2103  0.0190
      9        [36m7.6125[0m       0.4950        8.0509  0.0133
     10        7.6523       0.5050        7.8915  0.0174
     11        8.0110       0.4500        8.7683  0.0184
     12        7.9513       0.4400        8.9277  0.0178
     13        8.1306       0.4550

## Using sklearn grid search with RO

In [7]:
from sklearn.model_selection import GridSearchCV

# deactivate skorch-internal train-valid split and verbose logging
net.set_params(train_split=False, verbose=0, )

#can add additional RHC specific params for grid_search here

params = {
    'lr': [0.01, 0.02],
    'max_epochs': [10, 20],
    'module__num_units': [10, 20],
    'module__input_dim': [20],
    'module__output_dim': [2],
}
gs = GridSearchCV(net, params, refit=False, cv=3, scoring='accuracy', verbose=2)

gs.fit(X, y)
print("best score: {:.3f}, best params: {}".format(gs.best_score_, gs.best_params_))

Fitting 3 folds for each of 8 candidates, totalling 24 fits
[CV] END lr=0.01, max_epochs=10, module__input_dim=20, module__num_units=10, module__output_dim=2; total time=   0.1s
[CV] END lr=0.01, max_epochs=10, module__input_dim=20, module__num_units=10, module__output_dim=2; total time=   0.1s
[CV] END lr=0.01, max_epochs=10, module__input_dim=20, module__num_units=10, module__output_dim=2; total time=   0.1s
[CV] END lr=0.01, max_epochs=10, module__input_dim=20, module__num_units=20, module__output_dim=2; total time=   0.1s
[CV] END lr=0.01, max_epochs=10, module__input_dim=20, module__num_units=20, module__output_dim=2; total time=   0.1s
[CV] END lr=0.01, max_epochs=10, module__input_dim=20, module__num_units=20, module__output_dim=2; total time=   0.1s
[CV] END lr=0.01, max_epochs=20, module__input_dim=20, module__num_units=10, module__output_dim=2; total time=   0.2s
[CV] END lr=0.01, max_epochs=20, module__input_dim=20, module__num_units=10, module__output_dim=2; total time=   0