# Implementing NN using Simulated Annealing

First we import everyting necessary and define some functions

In [3]:
import numpy as np
from sklearn.metrics import mean_squared_error
import random
import math
from sklearn import preprocessing

def sigmoid(x):
    return 1.0/(1+ np.exp(-x))

def sigmoid_derivative(x):
    return x * (1.0 - x)

def squared_loss(y, y_pred):
    return mean_squared_error(y, y_pred)**0.5

In [4]:
trainData = np.loadtxt(open("train.csv", "rb"), dtype=np.float64, delimiter=",", skiprows=1) #Load Data
trainData = preprocessing.normalize(trainData)
#y = trainData[:, 1]# y data, correct
y = [[x[1]] for x in trainData]
y = np.asarray(y)
X = trainData[:, 2:] # x_1 to x_5 data, correct
#X = preprocessing.normalize(trainData)

## Implementation of NN using backprop

In [11]:
class NeuralNetwork:
    def __init__(self, x, y, n2):
        self.input      = x
        self.weights1   = np.random.rand(self.input.shape[1],n2)    
        self.y          = y
        self.weights2   = np.random.rand(n2, 1)
        self.output     = np.zeros(self.y.shape)

    def feedforward(self):
        self.layer1 = sigmoid(np.dot(self.input, self.weights1))
        self.output = sigmoid(np.dot(self.layer1, self.weights2))

    def backprop(self):
        # application of the chain rule to find derivative of the loss function with respect to weights2 and weights1
        d = (2*(self.y - self.output) * sigmoid_derivative(self.output))
        d_weights2 = np.dot(self.layer1.T, d)
        d_weights1 = np.dot(self.input.T,  (np.dot(d, self.weights2.T) * sigmoid_derivative(self.layer1)))
        # update the weights with the derivative (slope) of the loss function
        
        self.weights1 += preprocessing.normalize(d_weights1)
        self.weights2 += preprocessing.normalize(d_weights2)
        #print(self.weights2[0])

In [13]:
print(X.shape)
print(y.shape)
print(y)

nn = NeuralNetwork(X,y, 10)

print("error:")
for i in range(1501):
    nn.feedforward()
    nn.backprop()
    if i % 100 == 0:
        print(squared_loss(y, nn.output))
    
    

print("final error: \n", squared_loss(y, nn.output))
#for item_a, item_b, item_z in zip(y, nn.output, y-nn.output):
#    print(item_a, item_b, item_z)

(10000, 10)
(10000, 1)
[[ 0.18840931]
 [ 0.15961654]
 [ 0.04236603]
 ...
 [ 0.00593165]
 [-0.01988865]
 [-0.02760952]]
error:
0.8731336301707824
0.05684912668804984
0.05560301718003331
0.05552620207746124
0.05549938759405814
0.055485582931718326
0.05547693071896578
0.0554709047077066
0.05546649125356281
0.05546319762752776
0.05546070480564479
0.05545879384658111
0.05545731471869787
0.05545615724583497
0.05545523616408335
0.0554544862593035
final error: 
 0.0554544862593035


## NN with variable depth

In [13]:
class NeuralNetwork:
    def __init__(self, x, y, n, l):
        self.input      = x
        self.y          = y
        self.width      = n
        self.depth      = l
        m = self.input.shape[1]
        
        self.layer      = []
        self.layer.append(x)
        for i in range(1,l):
            self.layer.append(np.zeros((m,n)))
        self.layer.append(np.zeros(self.y.shape))
        self.output = self.layer[l]
        
        self.weights    = []
        self.weights.append(np.random.rand(m, n))
        for i in range(1,l-1):
            self.weights.append(np.random.rand(n,n))
        self.weights.append(np.random.rand(n,1))

    def feedforward(self):
        l = self.depth
        for i in range(1,l):
            self.layer[i] = sigmoid(np.dot(self.layer[i-1], self.weights[i-1]))
            
        self.output = self.layer[l]

    def backprop(self):
        # application of the chain rule to find derivative of the loss function with respect to weights2 and weights1
        l = self.depth
        d_weights = []
        d_error = []
        print(len(self.weights), l)
        for w in self.weights:
            d_weights.append(w)
            d_error.append(y)
        d_error[l-1] = (self.y - self.layer[1])
        d_weights[l-1] = np.dot(d_error[l-1],self.layer[l-1].T)
        
        for i in range(l-2,0,-1):
            a = sigmoid_derivative(self.layer[i])
            print(d_weights[i+1].shape, d_error[i+1].shape, i+1)
            b = np.dot(d_weights[i+1].T,d_error[i+1])
            d_error[i] = a*b
            print(self.layer[i-1].shape)
            d_weights[i] = np.dot(d_error[i], self.layer[i-1].T)

        """d = (2*(self.y - self.output) * sigmoid_derivative(self.output))
        d_weights2 = np.dot(self.layer1.T, d)
        d_weights1 = np.dot(self.input.T,  (np.dot(d, self.weights2.T) * sigmoid_derivative(self.layer1)))
        
        
        
        
        
        
        d_weights2 = np.dot(self.layer1.T, (2*(self.y - self.output) * sigmoid_derivative(self.output)))
        d_weights1 = np.dot(self.input.T,  (np.dot(2*(self.y - self.output) * sigmoid_derivative(self.output), self.weights2.T) * sigmoid_derivative(self.layer1)))
        # update the weights with the derivative (slope) of the loss function
        
        self.weights1 += preprocessing.normalize(d_weights1)
        self.weights2 += preprocessing.normalize(d_weights2)
        #print(self.weights2[0])"""

In [14]:
nn = NeuralNetwork(X,y, 4, 4)

print("error:")
for i in range(1501):
    nn.feedforward()
    nn.backprop()
    if i % 100 == 0:
        print(squared_loss(y, nn.output))
    
    

print("final error: \n", squared_loss(y, nn.output))
#for item_a, item_b, item_z in zip(y, nn.output, y-nn.output):
#    print(item_a, item_b, item_z)

error:
4 4
(10000, 10000) (10000, 4) 3
(10000, 4)
(10000, 10000) (10000, 4) 2
(10000, 10)


ValueError: shapes (10000,4) and (10,10000) not aligned: 4 (dim 1) != 10 (dim 0)

# Simulated Annealing

In [7]:
class NeuralNetwork:
    def __init__(self, x, y):
        self.input      = x
        self.weights1   = np.random.rand(self.input.shape[1],4)
        self.weights2   = np.random.rand(4,1)                 
        self.y          = y
        self.output     = np.zeros(self.y.shape)
        self.best_sol   = 1000000
        self.best_out   = self.output
        self.best1      = self.weights1
        self.best2      = self.weights2

    def feedforward(self):
        self.layer1 = sigmoid(np.dot(self.input, self.weights1))
        self.output = sigmoid(np.dot(self.layer1, self.weights2))

    def optimize(self, temp):
        t = temp
        old_weights1 = self.weights1
        old_weights2 = self.weights2
        old_output = self.output
        old_loss = squared_loss(self.y, self.output)
        
        rand = np.random.rand(self.input.shape[1],4)
        self.weights1 = self.weights1 + rand
        rand2 = np.random.rand(4,1)
        self.weights2 = self.weights2 + rand2
        self.feedforward()
        new_loss = squared_loss(self.y, self.output)
        #print(new_loss, old_loss)
        #print(random.random(),math.exp(-(new_loss-old_loss)/t))
        if (new_loss > old_loss) and random.random() > math.exp(-(new_loss-old_loss)/t):
            self.weights1 = old_weights1
            self.weights2 = old_weights2
            self.output = old_output
        if squared_loss(self.y, self.output) < self.best_sol:
            self.best_sol = squared_loss(self.y, self.output)
            self.best_out = self.output
            self.best1 = self.weights1
            self.best2 = self.weights2
        temp = temp - 100

In [8]:
nn = NeuralNetwork(X,y)

print("error:")
for i in range(10000):
    nn.feedforward()
    nn.optimize(1000000)
    if i % 1090 == 0:
        print('error in round ', i ,squared_loss(y, nn.best_out))
    
    

print("final output: \n", nn.best_out)

error:
error in round  0 0.8875998764918559
error in round  1090 0.7887363817463872
error in round  2180 0.785998345042974
error in round  3270 0.7846064507398828
error in round  4360 0.7842330347012362
error in round  5450 0.7837358827766141
error in round  6540 0.7835368046062703
error in round  7630 0.7832214819389576
error in round  8720 0.7830388190908184
error in round  9810 0.7825833107260677
final output: 
 [[1. ]
 [1. ]
 [1. ]
 ...
 [1. ]
 [0.5]
 [0.5]]
