## Building a multilayer perceptron for regression in theano

In [1]:
import theano
import numpy as np
import pandas as pd

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
import sklearn
from sklearn import datasets

cali_housing = sklearn.datasets.california_housing.fetch_california_housing()

X = cali_housing.data
y = cali_housing.target
Xy = np.vstack((X.T,y)).T

features_and_target = cali_housing.feature_names + ["target"]
features_to_retrieve = cali_housing.feature_names
target_feature = ["target"]
Xy_df = pd.DataFrame(Xy, columns=features_and_target)

In [3]:
features = ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms', 'Population',
            'AveOccup', 'Latitude', 'Longitude']

target = ['target']

In [123]:
X_train = Xy_df[features].as_matrix()
y_train = Xy_df[target].as_matrix()

In [146]:
import theano
from theano import tensor as T

def floatX(X):
    return np.asarray(X, dtype=theano.config.floatX)

def relu(X):
    return T.maximum(X, 0.)


def updates_sgd(cost, params):
    """
    Method used to define a list of symbolic updates for theano
    """
    grads = theano.tensor.grad(cost=cost, wrt=params)
    updates = []
    for param,grad in zip(params, grads):
        updates.append([param, param - grad * 0.01 ])

    return updates

In [150]:
n_features = X_train.shape[1]
n_hid = 200
n_out = 1

sha_W1 = theano.shared(floatX(np.random.normal(np.zeros((n_features,n_hid)), scale=0.1)/np.sqrt(n_features)))
sha_b1 = theano.shared(floatX(np.zeros(n_hid)))

sha_W2 = theano.shared(floatX(np.random.normal(np.zeros((n_hid,n_out)), scale=0.1)/np.sqrt(n_out)))
sha_b2 = theano.shared(floatX(np.zeros(1)))

params = [sha_W1, sha_b1, sha_W2, sha_b2]

sym_Xbatch = T.matrix("sym_Xbatch")
sym_Ybatch = T.matrix("sym_Ybatch")

sym_out = T.dot(relu(T.dot(sym_Xbatch, sha_W1) + sha_b1), sha_W2) + sha_b2
sym_cost = T.mean((sym_out - sym_Ybatch)**2)
sym_updates = updates_sgd(sym_cost, params)

tfunc_fitbatch = theano.function(inputs=[sym_Xbatch, sym_Ybatch], 
                                 outputs=sym_cost,
                                 updates=sym_updates,
                                 allow_input_downcast = True)



In [154]:
tfunc_fitbatch(X_train[0:100], y_train[0:100])

array(2.487101262262859e+34, dtype=float32)

In [127]:
n_features 

8

#### Import mlp

In [90]:
import mlp_regression

In [91]:
n_features = len(features)

In [92]:
dims = [n_features, 200, 100, 1]
activations = ["relu", "relu", "identity"]

In [93]:
model = mlp_regression.MLPRegression(dims, activations, learning_rate=0.0001)

In [94]:
model.predict(Xy_df[features][0:5])

array([[ 0.98005003],
       [ 5.79990864],
       [ 1.40946817],
       [ 1.56663454],
       [ 1.57885885]], dtype=float32)

#### Computing MSE

- Check that MSE is computed correctly

In [95]:
from sklearn import metrics
sklearn.metrics.mean_squared_error

<function sklearn.metrics.regression.mean_squared_error>

In [96]:
yhat = model.predict(Xy_df[features][0:5])
y = Xy_df[target][0:5]
sklearn.metrics.mean_squared_error(yhat.flatten(), y)

5.748876570565681

In [97]:
model.compute_cost(Xy_df[features][0:5], Xy_df[target][0:5])

array(5.74887633504458)

In [98]:
model.compute_sym_cost(Xy_df[features][0:5], Xy_df[target][0:5])

mean

#### Gradient 

In [99]:
#model.tfunc_fit_mini_batch()

### Learning: testing fitting minibatch

In [100]:
model.W[2].shape.eval()

array([100,   1])

In [101]:
model = mlp_regression.MLPRegression(dims, activations, learning_rate=0.001)

In [103]:
model.predict(X_train[0:10])

array([[ 0.98005003],
       [ 5.79990864],
       [ 1.40946817],
       [ 1.56663454],
       [ 1.57885885],
       [ 1.20202112],
       [ 2.81579208],
       [ 2.95755386],
       [ 3.05576658],
       [ 3.84980774]], dtype=float32)

In [114]:
y_train[0:10].ndim

2

In [115]:
#Fits the current minibatch and returns the cost for the current minibatch
model.tfunc_fit_mini_batch(X_train[0:10], y_train[0:10])

array(7735704064.0, dtype=float32)

In [122]:
model.predict(X_train[0:3])

array([[  2.06736038e+12],
       [  1.48778485e+13],
       [  3.14199586e+12]], dtype=float32)

In [112]:
model.tfunc_fit_mini_batch(X_train[0:10], y_train[0:10])

array(1099.1241455078125, dtype=float32)

In [113]:
model.predict(X_train[0:10])

array([[ -24709.91796875],
       [-182955.984375  ],
       [ -37911.57421875],
       [ -42626.4765625 ],
       [ -43158.10546875],
       [ -31597.23632812],
       [ -83398.7421875 ],
       [ -88191.3671875 ],
       [ -91952.5234375 ],
       [-118175.3125    ]], dtype=float32)

In [None]:
model.learning_rate