In [20]:
import numpy as np
import scipy as sp
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [34]:
from sklearn.model_selection import train_test_split

In [35]:
from theano import tensor as T

from lasagne.layers import InputLayer, DenseLayer, DropoutLayer
from lasagne.nonlinearities import sigmoid
from lasagne.updates import nesterov_momentum
from lasagne.objectives import binary_crossentropy

from nolearn.lasagne import NeuralNet, TrainSplit

In [23]:
def load_fulldataset():
    df = pd.read_csv('data_extracted/cs-training-prepared.csv').dropna(axis=1)

    target_var = 'SeriousDlqin2yrs'

    X = df.drop([target_var], axis=1).dropna(axis=1)
    y = df[target_var]
    
    print('DataFrame shape: %d x %d' % (X.shape))
    
    return (X, y)

In [24]:
def load_dataset(test_size=0.2, val_size=0.2):
    X, y = load_fulldataset()
    
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size)
    
    X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=val_size)
    
    return(X_train, X_val, X_test, y_train, y_val, y_test)

In [25]:
# X_train, X_val, X_test, y_train, y_val, y_test = load_dataset()
X, y = load_fulldataset()

DataFrame shape: 150000 x 10


In [39]:
X = np.asarray(X, dtype=np.float32)
y = np.asarray(y, dtype=np.int32).reshape(-1, 1)

In [26]:
num_features = X.shape[1]

In [41]:
net0 = NeuralNet(layers=[('input', InputLayer), ('hidden1', DenseLayer), ('dropout1', DropoutLayer),
                         ('hidden2', DenseLayer), ('dropout2', DropoutLayer), ('output', DenseLayer)],
                 input_shape=(None, num_features), hidden1_num_units=80,
                 dropout1_p=0.1, hidden2_num_units=80, dropout2_p=0.1,
                 output_nonlinearity=sigmoid, output_num_units=1,
                 update=nesterov_momentum, update_learning_rate=0.01, update_momentum=0.9,
                 regression=True, y_tensor_type=T.imatrix, objective_loss_function=binary_crossentropy,
                 max_epochs=20, train_split=TrainSplit(eval_size=0.4), verbose=10)

net0.fit(X, y)

# Neural Network with 7441 learnable parameters

## Layer information

  #  name        size
---  --------  ------
  0  input         10
  1  hidden1       80
  2  dropout1      80
  3  hidden2       80
  4  dropout2      80
  5  output         1

  epoch    trn loss    val loss    trn/val  dur
-------  ----------  ----------  ---------  -----
      1     [36m5.04915[0m     [32m0.24667[0m   20.46894  4.15s
      2     [36m0.24665[0m     [32m0.24655[0m    1.00040  4.02s
      3     0.24669     [32m0.24649[0m    1.00081  3.44s
      4     [36m0.24656[0m     0.24668    0.99951  3.32s
      5     [36m0.24626[0m     [32m0.24646[0m    0.99919  3.72s
      6     [36m0.24610[0m     0.24670    0.99756  4.17s
      7     0.24622     0.24660    0.99847  4.09s
      8     0.24628     0.24657    0.99884  3.52s
      9     [36m0.24593[0m     [32m0.24624[0m    0.99876  3.43s
     10     0.24623     [32m0.24622[0m    1.00005  3.55s
     11     0.24596     [32m0.24619[0m    0.

NeuralNet(X_tensor_type=None,
     batch_iterator_test=<nolearn.lasagne.base.BatchIterator object at 0x7f3b2f36ea90>,
     batch_iterator_train=<nolearn.lasagne.base.BatchIterator object at 0x7f3b2f48db10>,
     check_input=True, custom_scores=None, dropout1_p=0.1, dropout2_p=0.1,
     hidden1_num_units=80, hidden2_num_units=80, input_shape=(None, 10),
     layers=[('input', <class 'lasagne.layers.input.InputLayer'>), ('hidden1', <class 'lasagne.layers.dense.DenseLayer'>), ('dropout1', <class 'lasagne.layers.noise.DropoutLayer'>), ('hidden2', <class 'lasagne.layers.dense.DenseLayer'>), ('dropout2', <class 'lasagne.layers.noise.DropoutLayer'>), ('output', <class 'lasagne.layers.dense.DenseLayer'>)],
     loss=None, max_epochs=20, more_params={},
     objective=<function objective at 0x7f3b2f2f7cf8>,
     objective_loss_function=<function binary_crossentropy at 0x7f3b2f41ecf8>,
     on_batch_finished=[],
     on_epoch_finished=[<nolearn.lasagne.handlers.PrintLog instance at 0x7f3b29b776c

In [44]:
net1 = NeuralNet(layers=[('input', InputLayer), ('hidden1', DenseLayer), ('dropout1', DropoutLayer),
                         ('hidden2', DenseLayer), ('dropout2', DropoutLayer), ('output', DenseLayer)],
                 input_shape=(None, num_features), hidden1_num_units=120,
                 dropout1_p=0.25, hidden2_num_units=120, dropout2_p=0.25,
                 output_nonlinearity=sigmoid, output_num_units=1,
                 update=nesterov_momentum, update_learning_rate=0.01, update_momentum=0.9,
                 regression=True, y_tensor_type=T.imatrix, objective_loss_function=binary_crossentropy,
                 max_epochs=20, train_split=TrainSplit(eval_size=0.4), verbose=10)

net1.fit(X, y)

# Neural Network with 15961 learnable parameters

## Layer information

  #  name        size
---  --------  ------
  0  input         10
  1  hidden1      120
  2  dropout1     120
  3  hidden2      120
  4  dropout2     120
  5  output         1

  epoch    trn loss    val loss    trn/val  dur
-------  ----------  ----------  ---------  -----
      1    [36m41.89387[0m     [32m0.24579[0m  170.44244  7.04s
      2     [36m0.24531[0m     [32m0.24568[0m    0.99848  6.51s
      3     [36m0.24523[0m     [32m0.24566[0m    0.99828  6.43s
      4     [36m0.24523[0m     0.24567    0.99821  8.26s
      5     0.24523     0.24566    0.99823  7.15s
      6     0.24523     0.24567    0.99822  6.95s
      7     0.24524     [32m0.24565[0m    0.99832  6.75s
      8     0.24525     0.24567    0.99831  6.93s
      9     0.24526     0.24566    0.99838  6.60s
     10     0.24525     0.24566    0.99833  7.40s
     11     [36m0.24522[0m     0.24567    0.99819  8.00s
     12     0.24527   

NeuralNet(X_tensor_type=None,
     batch_iterator_test=<nolearn.lasagne.base.BatchIterator object at 0x7f3b2f36ea90>,
     batch_iterator_train=<nolearn.lasagne.base.BatchIterator object at 0x7f3b2f48db10>,
     check_input=True, custom_scores=None, dropout1_p=0.25,
     dropout2_p=0.25, hidden1_num_units=120, hidden2_num_units=120,
     input_shape=(None, 10),
     layers=[('input', <class 'lasagne.layers.input.InputLayer'>), ('hidden1', <class 'lasagne.layers.dense.DenseLayer'>), ('dropout1', <class 'lasagne.layers.noise.DropoutLayer'>), ('hidden2', <class 'lasagne.layers.dense.DenseLayer'>), ('dropout2', <class 'lasagne.layers.noise.DropoutLayer'>), ('output', <class 'lasagne.layers.dense.DenseLayer'>)],
     loss=None, max_epochs=20, more_params={},
     objective=<function objective at 0x7f3b2f2f7cf8>,
     objective_loss_function=<function binary_crossentropy at 0x7f3b2f41ecf8>,
     on_batch_finished=[],
     on_epoch_finished=[<nolearn.lasagne.handlers.PrintLog instance at 0x7f

In [47]:
net2 = NeuralNet(layers=[('input', InputLayer), ('hidden1', DenseLayer), ('dropout1', DropoutLayer),
                         ('hidden2', DenseLayer), ('dropout2', DropoutLayer), ('output', DenseLayer)],
                 input_shape=(None, num_features), hidden1_num_units=80,
                 dropout1_p=0.1, hidden2_num_units=80, dropout2_p=0.1,
                 output_nonlinearity=sigmoid, output_num_units=1,
                 update=nesterov_momentum, update_learning_rate=0.01, update_momentum=0.9,
                 regression=True, y_tensor_type=T.imatrix, objective_loss_function=binary_crossentropy,
                 max_epochs=20, train_split=TrainSplit(eval_size=0.4), verbose=10)

net2.fit(X, y)

# Neural Network with 7441 learnable parameters

## Layer information

  #  name        size
---  --------  ------
  0  input         10
  1  hidden1       80
  2  dropout1      80
  3  hidden2       80
  4  dropout2      80
  5  output         1

  epoch    trn loss    val loss    trn/val  dur
-------  ----------  ----------  ---------  -----
      1     [36m0.99642[0m     [32m0.24622[0m    4.04689  3.38s
      2     [36m0.24764[0m     0.24623    1.00575  3.37s
      3     [36m0.24674[0m     [32m0.24612[0m    1.00253  3.64s
      4     0.24686     0.24641    1.00185  3.37s
      5     0.24690     0.24660    1.00120  3.39s
      6     [36m0.24634[0m     0.24644    0.99957  3.39s
      7     [36m0.24622[0m     [32m0.24593[0m    1.00119  3.41s
      8     [36m0.24601[0m     [32m0.24590[0m    1.00042  3.38s
      9     0.24612     0.24604    1.00032  3.50s
     10     [36m0.24576[0m     0.24592    0.99932  3.52s
     11     0.24585     [32m0.24585[0m    1.00001  3.

NeuralNet(X_tensor_type=None,
     batch_iterator_test=<nolearn.lasagne.base.BatchIterator object at 0x7f3b2f36ea90>,
     batch_iterator_train=<nolearn.lasagne.base.BatchIterator object at 0x7f3b2f48db10>,
     check_input=True, custom_scores=None, dropout1_p=0.1, dropout2_p=0.1,
     hidden1_num_units=80, hidden2_num_units=80, input_shape=(None, 10),
     layers=[('input', <class 'lasagne.layers.input.InputLayer'>), ('hidden1', <class 'lasagne.layers.dense.DenseLayer'>), ('dropout1', <class 'lasagne.layers.noise.DropoutLayer'>), ('hidden2', <class 'lasagne.layers.dense.DenseLayer'>), ('dropout2', <class 'lasagne.layers.noise.DropoutLayer'>), ('output', <class 'lasagne.layers.dense.DenseLayer'>)],
     loss=None, max_epochs=20, more_params={},
     objective=<function objective at 0x7f3b2f2f7cf8>,
     objective_loss_function=<function binary_crossentropy at 0x7f3b2f41ecf8>,
     on_batch_finished=[],
     on_epoch_finished=[<nolearn.lasagne.handlers.PrintLog instance at 0x7f3b24c9b7a

In [48]:
net3 = NeuralNet(layers=[('input', InputLayer), ('hidden1', DenseLayer), ('dropout1', DropoutLayer),
                         ('hidden2', DenseLayer), ('dropout2', DropoutLayer), ('hidden3', DenseLayer),
                         ('dropout3', DropoutLayer), ('output', DenseLayer)],
                 input_shape=(None, num_features), hidden1_num_units=80,
                 dropout1_p=0.1, hidden2_num_units=80, dropout2_p=0.1,
                 hidden3_num_units=80, dropout3_p=0.1,
                 output_nonlinearity=sigmoid, output_num_units=1,
                 update=nesterov_momentum, update_learning_rate=0.01, update_momentum=0.9,
                 regression=True, y_tensor_type=T.imatrix, objective_loss_function=binary_crossentropy,
                 max_epochs=20, train_split=TrainSplit(eval_size=0.4), verbose=10)

net3.fit(X, y)

# Neural Network with 13921 learnable parameters

## Layer information

  #  name        size
---  --------  ------
  0  input         10
  1  hidden1       80
  2  dropout1      80
  3  hidden2       80
  4  dropout2      80
  5  hidden3       80
  6  dropout3      80
  7  output         1

  epoch    trn loss    val loss    trn/val  dur
-------  ----------  ----------  ---------  -----
      1     [36m0.78369[0m     [32m0.21831[0m    3.58985  6.32s
      2     [36m0.21713[0m     0.21848    0.99383  7.12s
      3     [36m0.21290[0m     [32m0.20926[0m    1.01740  5.83s
      4     [36m0.20996[0m     [32m0.20532[0m    1.02262  5.86s
      5     0.21073     [32m0.20424[0m    1.03176  5.82s
      6     0.21104     0.20496    1.02967  6.01s
      7     0.21058     0.20433    1.03059  5.69s
      8     0.21023     0.21159    0.99359  6.03s
      9     [36m0.20974[0m     [32m0.20386[0m    1.02885  5.92s
     10     [36m0.20843[0m     [32m0.20381[0m    1.02266  5.98s
 

NeuralNet(X_tensor_type=None,
     batch_iterator_test=<nolearn.lasagne.base.BatchIterator object at 0x7f3b2f36ea90>,
     batch_iterator_train=<nolearn.lasagne.base.BatchIterator object at 0x7f3b2f48db10>,
     check_input=True, custom_scores=None, dropout1_p=0.1, dropout2_p=0.1,
     dropout3_p=0.1, hidden1_num_units=80, hidden2_num_units=80,
     hidden3_num_units=80, input_shape=(None, 10),
     layers=[('input', <class 'lasagne.layers.input.InputLayer'>), ('hidden1', <class 'lasagne.layers.dense.DenseLayer'>), ('dropout1', <class 'lasagne.layers.noise.DropoutLayer'>), ('hidden2', <class 'lasagne.layers.dense.DenseLayer'>), ('dropout2', <class 'lasagne.layers.noise.DropoutLayer'>), ('hidden3', <class 'lasagne.layers.dense.DenseLayer'>), ('dropout3', <class 'lasagne.layers.noise.DropoutLayer'>), ('output', <class 'lasagne.layers.dense.DenseLayer'>)],
     loss=None, max_epochs=20, more_params={},
     objective=<function objective at 0x7f3b2f2f7cf8>,
     objective_loss_function=<fu

In [49]:
net4 = NeuralNet(layers=[('input', InputLayer), ('hidden1', DenseLayer), ('dropout1', DropoutLayer),
                         ('hidden2', DenseLayer), ('dropout2', DropoutLayer), ('hidden3', DenseLayer),
                         ('dropout3', DropoutLayer), ('hidden4', DenseLayer), ('dropout4', DropoutLayer),
                         ('output', DenseLayer)],
                 input_shape=(None, num_features), hidden1_num_units=80,
                 dropout1_p=0.1, hidden2_num_units=80, dropout2_p=0.1,
                 hidden3_num_units=80, dropout3_p=0.1, hidden4_num_units=80,
                 dropout4_p=0.1, output_nonlinearity=sigmoid, output_num_units=1,
                 update=nesterov_momentum, update_learning_rate=0.01, update_momentum=0.9,
                 regression=True, y_tensor_type=T.imatrix, objective_loss_function=binary_crossentropy,
                 max_epochs=20, train_split=TrainSplit(eval_size=0.4), verbose=10)

net4.fit(X, y)

# Neural Network with 20401 learnable parameters

## Layer information

  #  name        size
---  --------  ------
  0  input         10
  1  hidden1       80
  2  dropout1      80
  3  hidden2       80
  4  dropout2      80
  5  hidden3       80
  6  dropout3      80
  7  hidden4       80
  8  dropout4      80
  9  output         1

  epoch    trn loss    val loss    trn/val  dur
-------  ----------  ----------  ---------  -----
      1     [36m0.27081[0m     [32m0.21032[0m    1.28759  9.80s
      2     [36m0.21147[0m     [32m0.20772[0m    1.01803  8.66s
      3     [36m0.21035[0m     0.20818    1.01042  9.82s
      4     0.21318     0.21130    1.00890  8.63s
      5     [36m0.20895[0m     0.20806    1.00431  8.70s
      6     [36m0.20870[0m     [32m0.20756[0m    1.00549  8.65s
      7     0.20903     [32m0.20379[0m    1.02569  8.73s
      8     [36m0.20638[0m     [32m0.20346[0m    1.01434  8.77s
      9     [36m0.20474[0m     0.20386    1.00436  8.64s
     10

NeuralNet(X_tensor_type=None,
     batch_iterator_test=<nolearn.lasagne.base.BatchIterator object at 0x7f3b2f36ea90>,
     batch_iterator_train=<nolearn.lasagne.base.BatchIterator object at 0x7f3b2f48db10>,
     check_input=True, custom_scores=None, dropout1_p=0.1, dropout2_p=0.1,
     dropout3_p=0.1, dropout4_p=0.1, hidden1_num_units=80,
     hidden2_num_units=80, hidden3_num_units=80, hidden4_num_units=80,
     input_shape=(None, 10),
     layers=[('input', <class 'lasagne.layers.input.InputLayer'>), ('hidden1', <class 'lasagne.layers.dense.DenseLayer'>), ('dropout1', <class 'lasagne.layers.noise.DropoutLayer'>), ('hidden2', <class 'lasagne.layers.dense.DenseLayer'>), ('dropout2', <class 'lasagne.layers.noise.DropoutLayer'>), ('hidden...<class 'lasagne.layers.noise.DropoutLayer'>), ('output', <class 'lasagne.layers.dense.DenseLayer'>)],
     loss=None, max_epochs=20, more_params={},
     objective=<function objective at 0x7f3b2f2f7cf8>,
     objective_loss_function=<function binary_c