## Import relevant libraries 

In [None]:
import pandas as pd
import numpy as np
import theano
import theano.tensor as t


## Import and Split the Data

In [None]:
dataset = pd.read_csv("auto-mpg.csv", na_values="?", dtype=theano.config.floatX)
dataset.dropna(inplace=True)
# dataset = dataset.set_index(['name'])
print dataset.head()


In [None]:
print dataset.shape
print dataset.values

In [None]:
train = dataset.values[0:300, :]
test = dataset.values[300:, :]

train_x = train[:, 1:]
train_y = train[:, 0]

test_x = test[:, 1:]
test_y = test[:, 0]

print train_y[:10]
print train_x.shape

# Linear Regression 

In [None]:
## Set up the theano variables

In [None]:
x = t.dmatrix("X")
y = t.dvector("y")

w_value = np.zeros(7, dtype=theano.config.floatX)
b_value = np.asarray(0., dtype=theano.config.floatX)


w = theano.shared(value=w_value, name="weights")
b = theano.shared(value=b_value, name="bias")
params = [w, b]

## Build our functions 

In [None]:
# Build Hypothesis
lin_hypothesis = t.dot(x, w) + b

# Define objective function
mean_sq_error = t.mean((lin_hypothesis - y)**2)

# Get the gradient 
gw, gb = t.grad(mean_sq_error, wrt=[w, b])

# Define the learning rate 
alpha = 0.001

# Define the update 
updates = (
    (w, w - alpha * gw), 
    (b, b - alpha * gb)
)


In [None]:
hyp = theano.function(inputs=[x], outputs=[lin_hypothesis])


train_function = theano.function(inputs=[x, y], 
                                 outputs=[lin_hypothesis, mean_sq_error], 
                                 updates=updates,
                                 name='train')

test_function = theano.function(inputs=[x, y],
                                outputs=[lin_hypothesis, mean_sq_error],
                                name='test')

## Train our Model 

In [None]:

for i in range(5):
    y_hat, error = train_function(train_x, train_y)
    print error



## Test the model

In [None]:
y_hat, test_error = test_function(test_x, test_y)
print y_hat
print test_err

# Multi Layer Perceptron  
## Set up Theano Variables  

In [None]:
x_mlp = t.dmatrix("X_mlp")
y_mlp = t.dvector("y_mlp")

fan_in = 7
n_nodes = 2

bound = np.sqrt(6. / (fan_in + n_nodes))

w1_value = np.asarray(
            np.random.uniform(
                low=-4*bound,
                high=4*bound,
                size=(fan_in, n_nodes)),
            dtype=theano.config.floatX
        )
b1_value = np.zeros(shape=n_nodes, dtype=theano.config.floatX)

w2_value = np.zeros(n_nodes, dtype=theano.config.floatX)
b2_value = np.asarray(0., dtype=theano.config.floatX)


w1 = theano.shared(value=w1_value, name="weights1")
b1 = theano.shared(value=b1_value, name="bias1")

w2 = theano.shared(value=w2_value, name="weights2")
b2 = theano.shared(value=b2_value, name="bias2")

params = [w1, b1, w2, b2]

## Define our functions

In [None]:
# lin activation 
z = t.dot(x_mlp, w1) + b1
# non linear logistic activation - sigmoid i.e. activations/probabilites 
a = 1 / (1 + t.exp(-z)) 
# get linear output 
y_hat_hypothesis = t.dot(a, w2) + b2 

# Define objective function
mean_sq_error = t.mean((y_hat_hypothesis - y_mlp)**2)

# Get the gradient 
g_params = t.grad(mean_sq_error, wrt=params, add_names=True)

# Define the learning rate 
alpha = 0.01

# Define the update 
updates = []
for param, g_param in zip(params, g_params):
    updates.append((param, param - g_param * alpha))

## Compile the Functions

In [None]:
train_function = theano.function(inputs=[x_mlp, y_mlp],
                                 outputs=[y_hat_hypothesis, mean_sq_error], 
                                 updates=updates)

test_function = theano.function(inputs=[x_mlp, y_mlp], 
                                outputs=[y_hat_hypothesis, mean_sq_error])

## Train the MLP

In [None]:
for i in range(2000):
    y_hat, error = train_function(train_x, train_y)
    if i%200 == 0:
        print error

## Test the MLP 

In [None]:
y_hat, error = test_function(test_x, test_y)