## Skorch RHC network classification example

In [None]:
import subprocess

# Installation on Google Colab
try:
    import google.colab
    subprocess.run(['python', '-m', 'pip', 'install', 'skorch' , 'torch'])
except ImportError:
    pass

In [1]:
import numpy as np
from sklearn.datasets import make_classification
import torch
from torch import nn
from skorch import NeuralNetClassifier
from skorch import NeuralNet
from pyperch.neural.rhc_nn import RHCModule  
from pyperch.utils.decorators import add_to
from skorch.dataset import unpack_data
import copy

In [2]:
X, y = make_classification(1000, 20, n_informative=10, random_state=0)
X = X.astype(np.float32)
y = y.astype(np.int64)
input_dim=20
output_dim=2
num_units=10

In [3]:
rhc_module=RHCModule(input_dim=input_dim, output_dim=output_dim)

net = NeuralNetClassifier(
    rhc_module,
    max_epochs=20,
    lr=0.1,
    # Shuffle training data on each epoch
    iterator_train__shuffle=True,
)

## train_step_single override - add RHC training step and disable backprop 

In [4]:
RHCModule.register_rhc_training_step()

In [5]:
net.fit(X, y)

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m6.4767[0m       [32m0.4200[0m        [35m9.1663[0m  0.0113
      2        7.8079       [32m0.4850[0m        [35m8.2103[0m  0.0150
      3        7.9437       0.3750        9.9640  0.0162
      4        9.1252       0.4200        9.2466  0.0167
      5        9.1091       0.4350        9.0074  0.0182
      6        8.8317       0.4350        9.0074  0.0157
      7        8.8466       0.4200        9.2466  0.0156
      8        8.0310       [32m0.5150[0m        [35m7.7321[0m  0.0150
      9        7.8704       0.5050        7.8915  0.0178
     10        7.2548       [32m0.5550[0m        [35m7.0944[0m  0.0188
     11        7.3335       0.5100        7.8118  0.0182
     12        7.7688       0.5200        7.6523  0.0117
     13        7.2936       0.5200        7.6523  0.0133
     14        7.3282       0.5550        [35m7.0944[0m  0.011

<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=RHCModule(
    (dense0): Linear(in_features=20, out_features=10, bias=True)
    (nonlin): ReLU()
    (dropout): Dropout(p=0.5, inplace=False)
    (dense1): Linear(in_features=10, out_features=10, bias=True)
    (output): Linear(in_features=10, out_features=2, bias=True)
    (softmax): Softmax(dim=-1)
  ),
)

## Using sklearn pipeline with RO

In [6]:
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

pipe = Pipeline([
    ('scale', StandardScaler()),
    ('net', net),
])

pipe.fit(X, y)
y_proba = pipe.predict_proba(X)

Re-initializing module.
Re-initializing criterion.
Re-initializing optimizer.
  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m8.0310[0m       [32m0.4600[0m        [35m8.6089[0m  0.0203
      2        8.4648       [32m0.4850[0m        [35m8.2103[0m  0.0237
      3        8.4893       [32m0.4900[0m        [35m8.1306[0m  0.0184
      4        [36m7.8317[0m       0.4650        8.4777  0.0148
      5        8.0509       0.4900        [35m8.1306[0m  0.0159
      6        8.2669       [32m0.5150[0m        [35m7.7321[0m  0.0160
      7        8.1904       [32m0.5350[0m        [35m7.4132[0m  0.0127
      8        8.2900       0.4900        8.1306  0.0184
      9        8.4122       0.5100        7.8118  0.0252
     10        8.1505       0.4700        8.4495  0.0276
     11        [36m7.7121[0m       0.5000        7.9712  0.0299
     12        8.0310       0.5200        7.6523  0.026

## Using sklearn grid search with RO

In [12]:
from sklearn.model_selection import GridSearchCV

# deactivate skorch-internal train-valid split and verbose logging
net.set_params(train_split=False, verbose=0, )

default_params = {
    'module__input_dim': [20],
    'module__output_dim': [2],
}

grid_search_params = {
    'lr': [0.01, 0.02],
    'max_epochs': [10, 20],
    'module__num_units': [10, 20],
    **default_params,
}

gs = GridSearchCV(net, grid_search_params, refit=False, cv=3, scoring='accuracy', verbose=2)

gs.fit(X, y)
print("best score: {:.3f}, best params: {}".format(gs.best_score_, gs.best_params_))

Fitting 3 folds for each of 8 candidates, totalling 24 fits
[CV] END lr=0.01, max_epochs=10, module__input_dim=20, module__num_units=10, module__output_dim=2; total time=   0.1s
[CV] END lr=0.01, max_epochs=10, module__input_dim=20, module__num_units=10, module__output_dim=2; total time=   0.1s
[CV] END lr=0.01, max_epochs=10, module__input_dim=20, module__num_units=10, module__output_dim=2; total time=   0.1s
[CV] END lr=0.01, max_epochs=10, module__input_dim=20, module__num_units=20, module__output_dim=2; total time=   0.1s
[CV] END lr=0.01, max_epochs=10, module__input_dim=20, module__num_units=20, module__output_dim=2; total time=   0.1s
[CV] END lr=0.01, max_epochs=10, module__input_dim=20, module__num_units=20, module__output_dim=2; total time=   0.1s
[CV] END lr=0.01, max_epochs=20, module__input_dim=20, module__num_units=10, module__output_dim=2; total time=   0.1s
[CV] END lr=0.01, max_epochs=20, module__input_dim=20, module__num_units=10, module__output_dim=2; total time=   0