In [8]:
import numpy as np
from sklearn.datasets import make_classification

import torch
import torch.nn as nn
import torch.nn.functional as F

In [2]:
# This is a toy dataset for binary classification, 1000 data points with 20 features each
X, y = make_classification(1000, 20, n_informative=10, random_state=0)
X, y = X.astype(np.float32), y.astype(np.int64)

In [3]:
X.shape, y.shape, y.mean()

((1000, 20), (1000,), 0.5)

In [None]:
class MLP:
    def __init__(self, 
                 X_train, X_test, 
                 y_train, y_test, 
                 name, 
                 topology, 
                 activation='relu', 
                 max_iter=100):
        super(MLP, self).__init__()
        
        self.hidden_layer_sizes = hidden_layer_sizes
        self.max_iter = max_iter
        self.activation = activation
        super(MLP, self).__init__(X_train, X_test, y_train, y_test, name)

    def evalAlgo(self):
        # print("MLPRegressor Regressor w/ Random Parameter Search")
        # mlprS = MLPRegressor(hidden_layer_sizes=self.hidden_layer_sizes, max_iter=self.max_iter)
        # param_dists = dict(
        # #activation = ['identity', 'logistic', 'tanh', 'relu'],
        # solver = ['lbfgs', 'sgd', 'adam'],
        # learning_rate = ['constant', 'invscaling', 'adaptive'],
        # momentum = uniform(0,1),
        # nesterovs_momentum = [True, False],
        # validation_fraction = uniform(0,1),
        # beta_1 = uniform(0,0.999),
        # beta_2 = uniform(0,0.999),
        # epsilon = uniform(0,0.999)
        # )
        # bmodel=self.searchAndEvalRegressor(mlprS,param_dists)
        # print('**** Regressor ****')
        # #print('weights: ', bmodel.coefs_)
        # #print('intercepts: ', bmodel.intercepts_)
        # #print('act: ', bmodel.activation)
        # #print('outact: ', bmodel.out_activation_)

        # print("")
        # print("MLPRegressor w/o Random Parameter Search")
        # mlpr0 = MLPRegressor(hidden_layer_sizes=self.hidden_layer_sizes, max_iter=self.max_iter)
        # self.evalRegressor(mlpr0)

        # print("")

        print("MLP w/o Random Parameter Search")
        mlpc0 = MLPClassifier(hidden_layer_sizes=self.hidden_layer_sizes, max_iter=self.max_iter)
        self.evalClassifier(mlpc0)
        print("MLP w/ Random Parameter Search")
        mlpcS = MLPClassifier(hidden_layer_sizes=self.hidden_layer_sizes, max_iter=self.max_iter)
        param_dists = dict(
            # activation = ['identity', 'logistic', 'tanh', 'relu'],
            solver=['lbfgs', 'sgd', 'adam'],
            learning_rate=['constant', 'invscaling', 'adaptive'],
            momentum=uniform(0, 1),
            nesterovs_momentum=[True, False],
            validation_fraction=uniform(0, 1),
            beta_1=uniform(0, 0.999),
            beta_2=uniform(0, 0.999),
            epsilon=uniform(0, 0.999)
        )
        bmodel, params, accuracy = self.searchAndEvalClassifier(mlpcS, param_dists)
        return bmodel, params, accuracy

    def evalClassifier(self, clf):
        np.set_printoptions(threshold=np.inf)
        search = clf.fit(self.X_train, self.y_train)
        pred = clf.predict(self.X_test)

        print("mean squared error: ", mean_squared_error(pred, self.y_test))
        print("accuracy score: ", accuracy_score(pred, self.y_test))
        print(classification_report(self.y_test, pred))
        bmodel = clf
        return pred

    def searchAndEvalClassifier(self, clf, param_dists):
        classifier = RandomizedSearchCV(clf, param_dists, random_state=0, cv=5, n_iter=600, n_jobs=self.njobs)
        # classifier = GridSearchCV(dt, param_dists, scoring='f1_micro', cv=5)

        search = classifier.fit(self.X_train, self.y_train)
        pred = classifier.predict(self.X_test)

        print("best parameters found: ", search.best_params_)
        print("mean squared error: ", mean_squared_error(pred, self.y_test))
        print("accuracy score: ", accuracy_score(pred, self.y_test))
        print(classification_report(self.y_test, pred))

        dummy = DummyClassifier(strategy='most_frequent').fit(self.X_train, self.y_train)
        print("Baseline_Accuracy: {}".format(accuracy_score(dummy.predict(self.X_test), self.y_test)))
        # print('**** Classifier ****')
        bmodel = classifier.best_estimator_

        pred = bmodel.predict(self.X_test)
        accuracy = accuracy_score(pred, self.y_test)
        print("accuracy score: ", accuracy)
        return bmodel, search.best_params_, accuracy

In [9]:
class MLP(nn.Module):
    def __init__(
            self,
            num_units=10,
            nonlin=F.relu,
            dropout=0.5,
    ):
        super(MLP, self).__init__()
        self.num_units = num_units
        self.nonlin = nonlin
        self.dropout = dropout

        self.dense0 = nn.Linear(20, num_units)
        self.nonlin = nonlin
        self.dropout = nn.Dropout(dropout)
        self.dense1 = nn.Linear(num_units, 10)
        self.output = nn.Linear(10, 2)

    def forward(self, X, **kwargs):
        X = self.nonlin(self.dense0(X))
        X = self.dropout(X)
        X = F.relu(self.dense1(X))
        X = F.softmax(self.output(X), dim=-1)
        return X

In [15]:
from skorch import NeuralNetClassifier

In [21]:
# custom optimizer to encapsulate Adam
def make_lookahead(parameters, optimizer_cls, k, alpha, **kwargs):
    optimizer = optimizer_cls(parameters, **kwargs)
    return Lookahead(optimizer=optimizer, k=k, alpha=alpha)

In [30]:
net = NeuralNetClassifier(
    MLP,
    max_epochs=20,
    # lr=0.1,
    criterion=torch.nn.NLLLoss,
    optimizer=torch.optim.Adam,
    optimizer__lr=1e-3,
    optimizer__weight_decay=1e-2,
#     device='cuda',  # uncomment this to train with CUDA
)

In [31]:
# Training the network
net.fit(X, y)

  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m0.6903[0m       [32m0.5250[0m        [35m0.6838[0m  0.0430
      2        [36m0.6831[0m       [32m0.5300[0m        [35m0.6814[0m  0.0387
      3        [36m0.6795[0m       [32m0.5550[0m        [35m0.6788[0m  0.0368
      4        [36m0.6751[0m       [32m0.5800[0m        [35m0.6758[0m  0.0301
      5        [36m0.6723[0m       0.5550        [35m0.6723[0m  0.0324
      6        [36m0.6639[0m       [32m0.6000[0m        [35m0.6684[0m  0.0335
      7        [36m0.6612[0m       [32m0.6100[0m        [35m0.6642[0m  0.0329
      8        [36m0.6534[0m       [32m0.6350[0m        [35m0.6596[0m  0.0389
      9        [36m0.6525[0m       [32m0.6550[0m        [35m0.6545[0m  0.0423
     10        [36m0.6453[0m       [32m0.6650[0m        [35m0.6489[0m  0.0361
     11        0.6481       [32m0.6800[0m        [35

<class 'skorch.classifier.NeuralNetClassifier'>[initialized](
  module_=MLP(
    (dense0): Linear(in_features=20, out_features=10, bias=True)
    (dropout): Dropout(p=0.5, inplace=False)
    (dense1): Linear(in_features=10, out_features=10, bias=True)
    (output): Linear(in_features=10, out_features=2, bias=True)
  ),
)

In [34]:
from sklearn.model_selection import GridSearchCV

In [32]:
params = {
    'lr': [0.05, 0.1],
    'module__num_units': [10, 20],
    'module__dropout': [0, 0.5],
}

In [35]:
gs = GridSearchCV(net, params, refit=False, cv=3, scoring='accuracy', verbose=2)

In [36]:
gs.fit(X, y)

Fitting 3 folds for each of 8 candidates, totalling 24 fits
  epoch    train_loss    valid_acc    valid_loss     dur
-------  ------------  -----------  ------------  ------
      1        [36m0.7032[0m       [32m0.4851[0m        [35m0.7007[0m  0.0319
      2        [36m0.6940[0m       [32m0.4925[0m        [35m0.6939[0m  0.0250
      3        [36m0.6861[0m       [32m0.5299[0m        [35m0.6875[0m  0.0190
      4        [36m0.6789[0m       [32m0.5821[0m        [35m0.6815[0m  0.0288
      5        [36m0.6718[0m       [32m0.6194[0m        [35m0.6756[0m  0.0221
      6        [36m0.6647[0m       [32m0.6269[0m        [35m0.6694[0m  0.0207
      7        [36m0.6576[0m       0.6269        [35m0.6630[0m  0.0243
      8        [36m0.6502[0m       [32m0.6343[0m        [35m0.6564[0m  0.0228
      9        [36m0.6426[0m       0.6343        [35m0.6496[0m  0.0212
     10        [36m0.6346[0m       [32m0.6642[0m        [35m0.6424[0m  0.0266
    

In [37]:
print(gs.best_score_, gs.best_params_)

0.7530134925344506 {'lr': 0.1, 'module__dropout': 0, 'module__num_units': 20}
