In [40]:
from lasagne import layers
from lasagne.updates import nesterov_momentum
from lasagne.nonlinearities import softmax
from nolearn.lasagne import NeuralNet

In [99]:
import pandas as pd
import numpy as np

In [100]:
glass_data = pd.read_csv('data/dati/glass.data.csv')

In [101]:
glass_data.columns

Index([u'Id number', u'RI', u'Na', u'Mg', u'Al', u'Si', u'K', u'Ca', u'Ba',
       u'Fe', u'Type of glass'],
      dtype='object')

In [102]:
glass_data.drop('Id number', axis=1,inplace=True)

In [103]:
def normalize(col, df):
    for i in col:
        df[i] = (df[i] - np.min(df[i]))/(np.max(df[i]) - np.min(df[i]))

In [104]:
normalize(glass_data.columns.drop('Type of glass'), glass_data)

In [105]:
X = glass_data.drop('Type of glass', axis=1)
y = glass_data['Type of glass']

In [106]:
y.unique()

array([1, 2, 3, 5, 6, 7])

In [107]:
conversion_y = {1:0, 2:1, 3:2,5:3,6:4,7:5}
y = y.map(lambda x: conversion_y[x])

In [108]:
y.unique()

array([0, 1, 2, 3, 4, 5])

In [109]:
# SIZE = number of columns in feature matrix
size = X.shape[1]
# OUTPUTS = number of possible outputs (for binary classification this would be 2)
outputs = len(y.unique())

In [110]:
net1 = NeuralNet(
    layers=[  # three layers: one hidden layer
        ('input', layers.InputLayer),
        ('hidden', layers.DenseLayer),
        ('output', layers.DenseLayer),
        ],
    # layer parameters:
    input_shape=(None, size),  # this code won't compile without SIZE being set
    hidden_num_units=10,  # number of units in hidden layer
    output_nonlinearity=softmax,  # output layer uses identity function
    output_num_units=outputs,  # this code won't compile without OUTPUTS being set

    # optimization method:
    update=nesterov_momentum,
    update_learning_rate=0.01, 
    update_momentum=0.9, 

    regression=False,  # If you're doing classification you want this off
    max_epochs=400,  # more epochs can be good, 
    verbose=1, # enabled so that you see meaningful output when the program runs
    )


In [111]:
X = np.matrix(X)
y = np.array(y, dtype=np.int32)

In [112]:
net1.fit(X,y)

# Neural Network with 166 learnable parameters

## Layer information

  #  name      size
---  ------  ------
  0  input        9
  1  hidden      10
  2  output       6

  epoch    trn loss    val loss    trn/val    valid acc  dur
-------  ----------  ----------  ---------  -----------  -----
      1     [36m1.82655[0m     [32m1.80679[0m    1.01094      0.13333  0.00s
      2     [36m1.81989[0m     [32m1.80000[0m    1.01105      0.13333  0.00s
      3     [36m1.81130[0m     [32m1.79213[0m    1.01070      0.13333  0.00s
      4     [36m1.80145[0m     [32m1.78367[0m    1.00997      0.13333  0.00s
      5     [36m1.79077[0m     [32m1.77487[0m    1.00896      0.13333  0.00s
      6     [36m1.77977[0m     [32m1.76621[0m    1.00768      0.13333  0.00s
      7     [36m1.76866[0m     [32m1.75783[0m    1.00616      0.13333  0.00s
      8     [36m1.75764[0m     [32m1.74980[0m    1.00448      0.13333  0.00s
      9     [36m1.74702[0m     [32m1.74214[0m    1.00

NeuralNet(X_tensor_type=None,
     batch_iterator_test=<nolearn.lasagne.base.BatchIterator object at 0x7f12ba2e42d0>,
     batch_iterator_train=<nolearn.lasagne.base.BatchIterator object at 0x7f12ba2e4190>,
     check_input=True, custom_scores=None, hidden_num_units=10,
     input_shape=(None, 9),
     layers=[('input', <class 'lasagne.layers.input.InputLayer'>), ('hidden', <class 'lasagne.layers.dense.DenseLayer'>), ('output', <class 'lasagne.layers.dense.DenseLayer'>)],
     loss=None, max_epochs=400, more_params={},
     objective=<function objective at 0x7f12ba2e7848>,
     objective_loss_function=<function categorical_crossentropy at 0x7f12ba3b75f0>,
     on_batch_finished=[],
     on_epoch_finished=[<nolearn.lasagne.handlers.PrintLog instance at 0x7f12b33992d8>],
     on_training_finished=[],
     on_training_started=[<nolearn.lasagne.handlers.PrintLayerInfo instance at 0x7f12b3399710>],
     output_nonlinearity=<function softmax at 0x7f12ba565de8>,
     output_num_units=6, regre

In [113]:
net1.score(X,y)

0.5607476635514018

### Softmax (and other output non-linearities)

This is implemented mostly out of convention because it works. There are a host of other non-linearities in lasagne and nolearn, but this one is well studied and works well for classification problems in Neural Nets. If you want all the mathy details, Wikipedia is your friend

### Nesterov Momentum and SGD in DNN

Like a lot of problems, Neural Nets benefit from a Stochastic Gradient Descent approach. Nesterov Momentum is just one of the concepts of how to implement this, and apparently is a very popular method across applications. Feel free to experiment (and report back) with others included in lasagne, like adagrad and rmsprop if you so choose.

You may notice a pattern with this: there's a lot of tinkering that can be done with Neural Nets. By all means, tinker, just know what to expect when you mess with things!

### Parameter Tuning and Why Not To Do It

In my experience, messing with the learning rate and momentum just isn't worth it: you're more likely to improve performance if you mess with the overall structure of the neural net. These two parameters refer to the corresponding SGD parameters, a slow learning rate encourages incremental improvements, while a large momentum prevents getting stuck in local minima of the error surface. You're free to muck around with what these do, but be forewarned that if you just twiddle the knobs and hope for the best here, you're probably not going to get awesome results.