In [192]:
import mnist_loader
import numpy as np
import random
import pandas as pd
from sklearn.cross_validation import train_test_split
from sklearn import preprocessing


# Q1(b) Creating the Class

In [193]:
class Network(object):

    def __init__(self, train_data,sizes,actions):
        
       
        
        self.num_layers = len(sizes)
        
        self.sizes = sizes
        self.biases = [np.random.randn(y, 1) for y in sizes[1:]]
        self.weights = [np.random.randn(y, x)
                        for x, y in zip(sizes[:-1], sizes[1:])]
        self.actions = actions
        self.activations = 0
        self.z = 0
        self.delta_b = 0
        self.delta_w = 0
        self.train_data = train_data

    #Forward Pass
    def forwardpass(self, x):
        act = x
        activations = [x]
        zs = []
        cnt = 0
        for b, w in zip(self.biases, self.weights):
            z = np.dot(w, act)+b
            zs.append(z)
            if(self.actions[cnt]=='sigmoid'):
                act = self.sigmoid(z)
            elif(self.actions[cnt]=='relu'):
                act = self.relu(z)
            else: 
                act = z
            activations.append(act)
            cnt+=1
        self.activations = activations
        self.z = zs
        return activations[-1]
        
    
    def SGD_train(self,alpha):
        for x,y in self.train_data: 
            net.forwardpass(x)
            net.backprop(y)
            net.gradient_descent(alpha)

    #Back Propogation
    def backprop(self, y):
        
        delta_b = [np.zeros(b.shape) for b in self.biases]
        delta_w = [np.zeros(w.shape) for w in self.weights]
        
        activations = self.activations
        zs = self.z
        if(self.actions[-1]=='sigmoid'): 
            delta = self.initial_derivative(activations[-1], y)*self.sigmoid_prime(zs[-1])
        elif(self.actions[-1]=='relu'):
            delta = self.initial_derivative(activations[-1],y)*self.relu_prime(zs[-1])
        else: 
            delta = self.initial_derivative(activations[-1],y)*np.ones(np.shape(zs[-1]))
            

        delta_b[-1] = delta     
        delta_w[-1] = np.dot(delta, activations[-2].transpose())
        
        
        for l in range(2, self.num_layers):
            z = zs[-l]
            if(self.actions[-l] == 'sigmoid'):
                sp = self.sigmoid_prime(z)
                
            elif(self.actions[-l] == 'linear'):
                
                sp = np.ones(np.shape(z))
                                                                       
            else: 
                sp = self.relu_prime(z)
                                                            
            delta = np.dot(self.weights[-l+1].transpose(), delta) * sp
            delta_b[-l] = delta
            delta_w[-l] = np.dot(delta, activations[-l-1].transpose())
        
        self.delta_b = delta_b 
        self.delta_w = delta_w
   
    #Gradient Descent
    def gradient_descent(self,alpha):
        updated_weights = []
        updated_biases = []
        for w,b,db,dw in zip(self.weights,self.biases,self.delta_b,self.delta_w):
            
            w = w-alpha*dw
            b = b-alpha*db
            updated_weights.append(w)
            updated_biases.append(b)
        self.weights = updated_weights
        self.biases = updated_biases
    

            
    
    def initial_derivative(self, output_activations, y):
       
        return (output_activations-y)

    #Activation functions
    def sigmoid(self,z):
    
        return 1.0/(1.0+np.exp(-z))

    
    def sigmoid_prime(self,z):
   
        return self.sigmoid(z)*(1-self.sigmoid(z))

    def relu(self,z):
        z[z<0] = 0
        return z
    
    def relu_prime(self,z):
        z[z>0] = 1
        z[z<0] = 0
        return z
    
    def evaluate(self,test_data):
        
        
        test_results = [(np.argmax(net.forwardpass(x)),y) for (x, y) in test_data]
        return sum(int(x == y) for (x, y) in test_results)*1.0/len(test_data)


# Q1(c) Testing on MNIST dataset

In [194]:
training_data, validation_data, test_data = mnist_loader.load_data_wrapper()
net =  Network(training_data,[784,30,10],['sigmoid','sigmoid'])

In [195]:
net.SGD_train(0.9)

In [196]:
#Evaluation on MNIST dataset. 

test_results = [(np.argmax(net.forwardpass(x)),y)
                    for (x, y) in test_data]
validation_results = [(np.argmax(net.forwardpass(x)),y) for (x,y) in validation_data]

In [197]:
print "validation accuracy", sum(int(x==y) for (x,y) in validation_results)/10000.
print "test accuracy", sum(int(x == y) for (x, y) in test_results)/10000.

validation accuracy 0.918
test accuracy 0.9092


# Q1(d) Training Network on real estate dataset

In [198]:
df = pd.read_excel('real_estate.xlsx')
x = df.values #returns a numpy array
min_max_scaler = preprocessing.MinMaxScaler()
x_scaled = min_max_scaler.fit_transform(x)
df = pd.DataFrame(x_scaled)

In [199]:
X = df[df.columns[1:-1]]
y = df[df.columns[-1]]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=1)

X_train, X_val, y_train, y_val  = train_test_split(X_train, y_train, test_size=0.3, random_state=1)

In [200]:
y_t = y_train.values.reshape(-1,1,1)
X_t = X_train.values.reshape(-1,6,1)

y_te = y_test.values.reshape(-1,1,1)
X_te = X_test.values.reshape(-1,6,1)

y_v = y_val.values.reshape(-1,1,1)
X_v = X_val.values.reshape(-1,6,1)

In [201]:
train_data  = zip(X_t,y_t)
test_data = zip(X_te,y_te)
val_data = zip(X_v,y_v)

In [218]:
net =  Network(train_data,[6,1],['sigmoid'])

In [219]:
net.SGD_train(1.5)

In [220]:

test_results = [net.forwardpass(x)[0][0]
                    for (x, y) in test_data]
validation_results = [net.forwardpass(x)[0][0] for (x,y) in val_data]

In [221]:
TMSE = sum(y_test-test_results)**2/len(y_test)
TRMSE = np.sqrt(TMSE)

VMSE = sum(validation_results-y_val)**2/len(y_val)
RVMSE = np.sqrt(VMSE)

print("The Test RMSE and MSE are",TRMSE ,"and", TMSE)
print("The Val RMSE and MSE are",RVMSE,"and",VMSE)

('The Test RMSE and MSE are', 0.015391994387526446, 'and', 0.0002369134912256456)
('The Val RMSE and MSE are', 0.05472659861223529, 'and', 0.002995000595664714)
