# Homework 6

In [None]:
import pandas as pd
import numpy as np
from sympy import Symbol, lambdify

In [None]:
train_data = pd.read_csv('Input/training.dat', sep=' ', header=None, names=['x', 'y']);
test_data = pd.read_csv('Input/test.dat', sep=' ', header=None, names=['x', 'y']);

x_train = np.array(train_data['x'])
y_train = np.array(train_data['y'])

w0 = Symbol("w0")
w1 = Symbol("w1")
w2 = Symbol("w2")

func_a = np.sum(np.square(y_train - w0 - w1 * x_train))
f_a = lambdify([[w0, w1]], func_a, "numpy")
gf_a = lambdify([[w0, w1]], func_a.diff([[w0, w1]]), "numpy")
grad_fa = lambda x_arr : np.array(gf_a(x_arr), 'float64').reshape(1,len(x_arr))

func_b = np.sum(np.square(y_train - w0 - w1 * x_train - w2 * x_train**2))
f_b = lambdify([[w0, w1, w2]], func_b, "numpy")
gf_b = lambdify([[w0, w1, w2]], func_b.diff([[w0, w1, w2]]), "numpy")
grad_fb = lambda x_arr : np.array(gf_b(x_arr), 'float64').reshape(1,len(x_arr))

### Useful Functions

In [None]:
np_str = lambda x_k : np.array2string(x_k.reshape(len(x_k)), precision=3, separator=',')

f_str = lambda x : "{0:.4f}".format(x)

In [None]:
class OutputTable:    
    def __init__(self):
        self.table = pd.DataFrame([],columns=['k', 'x^k', 'f(x^k)', 'd^k', 'a^k', 'x^k+1'])
    def add_row(self, k, xk, fxk, dk, ak, xkp):
        self.table.loc[len(self.table)] = [k, np_str(xk), f_str(fxk.item()), np_str(dk), ak, np_str(xkp)]
    def print_latex(self):
        print(self.table.to_latex(index=False))

## Part B : Neural Network

In [None]:
sigmoidalFunc = lambda output_array : 1 / (1 + np.exp(-output_array))
sigmoidalDeriv = lambda hiddenlayer : hiddenlayer * (1 - hiddenlayer)

In [None]:
def backpropagation(patterns, hiddenLayerSize, alpha = 0.5, learningRate = 0.9, epsilon = 0.001, seed = 440):
    np.random.seed(seed)
    t = 0
    P = np.size(patterns, 0)
    w_matrix = np.random.rand(hiddenLayerSize, np.size(patterns,1))*1 # patterns data includes y values, its column size is selected since we will add x0 to input layer
    W_matrix = np.random.rand(1, hiddenLayerSize+1)*1 # we will add h0 to hidden layer
    while(alpha > epsilon):
        np.random.shuffle(patterns)
        desiredOutputs = patterns[:,-1].reshape(-1,1)
        inputLayers = np.transpose(np.insert(patterns, 0, -1, axis=1)[:,:-1]) # x0 is added to all patterns and its value is -1, output values are excluded
        hiddenLayer = np.zeros((hiddenLayerSize+1, 1)) # hiddenlayersize doesn't include h0 so it's added
        hiddenLayer[0,:] = -1 # h0 is equal to -1
        actualOutput = np.zeros_like(desiredOutputs)
        for p in range(P):
            hiddenLayer[1:] = sigmoidalFunc(w_matrix @ inputLayers[:,p].reshape(-1,1))
            actualOutput[p] = W_matrix @ hiddenLayer
            # since the function is linear, net output is equal to actual output
            S_output = (1 * (desiredOutputs[p] - actualOutput[p])).reshape(-1,1)
            S_hidden = (sigmoidalDeriv(hiddenLayer[1:]) * (np.transpose(W_matrix[:,1:]) @ S_output)).reshape(-1,1)
            delta_W = alpha * S_output @ np.transpose(hiddenLayer)
            W_matrix += delta_W
            delta_w = alpha * S_hidden @ np.transpose(inputLayers[:,p].reshape(-1,1))
            w_matrix += delta_w
        alpha = learningRate * alpha
        t += 1
        actualHiddens = sigmoidalFunc(w_matrix @ inputLayers) # h1, ..., hj
        actualOutputMatrix = W_matrix @ np.insert(actualHiddens, 0, -1, axis=0) # o1, ..., oi
        error = np.sum(np.square(desiredOutputs - np.transpose(actualOutputMatrix)))
        print("Iteration {0} : error = {1}".format(t,error))
    return w_matrix, W_matrix, error

In [None]:
def backpropagationWithForLoops(trainingData, hiddenLayerSize, alpha = 0.5, learningRate = 0.9, epsilon = 0.001, seed = 440):
    t = 0
    patterns = np.copy(trainingData)
    patterns = np.insert(patterns, 0, -1, axis=1) # x0 = -1 unit is added
    P = np.size(patterns, 0) # pattern size
    I = 1 # output unit size
    K = np.size(patterns, 1) - I # input layer size
    J = hiddenLayerSize + 1 # h0 = -1 is added
    w_matrix = np.random.rand(J, K) # weights between input and hidden layer (we will exclude first row later since h0 is excluded)
    W_matrix = np.random.rand(I, J) # weight between hidden and output layer
    while(alpha >= epsilon):
        np.random.shuffle(patterns)
        x = np.transpose(patterns[:,:-1]).reshape(K, -1)
        y = patterns[:,-1]
        H = np.zeros(J)
        H[0] = -1 # h0 is equal to -1
        O = np.zeros_like(y)
        for p in range(P):
            for j in range(1,J):
                hj = np.sum(w_matrix[j] * x[:,p])
                H[j] = sigmoidalFunc(hj)
            for i in range(I):
                o = np.sum(W_matrix[i] * H)
                O[p] = o # linear function g(x) = x
            S_O = 0 # since there is only one output unit
            S_H = np.zeros_like(H)
            for i in range(I):
                S_O = 1 * (y[p] - O[p])
            for j in range(1,J):
                S_H[j] = sigmoidalDeriv(H[j]) * np.sum(W_matrix[0,j] * S_O)
            for j in range(J):
                dWj = alpha * S_O * H[j]
                W_matrix[0,j] += dWj
            for k in range(K):
                dwk = alpha * S_H * x[k,p]
                w_matrix[:,k] += dwk
        alpha *= learningRate
        t += 1
        actualHiddens = sigmoidalFunc(w_matrix @ x)
        actualHiddens[0,:] = -1 # h1, ..., hj
        actualOutputMatrix = W_matrix @ actualHiddens # o1, ..., oi
        error = np.sum(np.square(y - actualOutputMatrix))
        print("Iteration {0} : error = {1}".format(t,error))
    return w_matrix, W_matrix, error

In [None]:
patterns = np.array(train_data)
backpropagationWithForLoops(patterns, 3, seed=440)

Iteration 1 : error = 545201702.3026398
Iteration 2 : error = 34216771.06600089
Iteration 3 : error = 44680880.56835221
Iteration 4 : error = 7993620.881185431
Iteration 5 : error = 12828040.587517729
Iteration 6 : error = 14513311.579675596
Iteration 7 : error = 8882293.841745708
Iteration 8 : error = 8371360.57896401
Iteration 9 : error = 11547376.83829801
Iteration 10 : error = 7996422.046439336
Iteration 11 : error = 8045942.689556799
Iteration 12 : error = 10614577.894716825
Iteration 13 : error = 10069716.217366572
Iteration 14 : error = 10188591.169210207
Iteration 15 : error = 9333172.513779834
Iteration 16 : error = 8376198.572079177
Iteration 17 : error = 8685094.810365273
Iteration 18 : error = 9096962.782771751
Iteration 19 : error = 8138489.891812035
Iteration 20 : error = 8026904.107941785
Iteration 21 : error = 8304064.817181416
Iteration 22 : error = 8168498.98972702
Iteration 23 : error = 7960955.328688899
Iteration 24 : error = 9431368.281317905
Iteration 25 : error =

(array([[0.80939912, 0.88248547],
        [0.98901187, 0.52307794],
        [0.87928188, 0.43951135],
        [0.99135261, 0.30646859]]),
 array([[-96.51162155,  96.99315048,  97.3602759 ,  97.20723927]]),
 7953540.414327322)

In [None]:
patterns = np.array(train_data)
backpropagation(patterns, 3, seed=50)

  """Entry point for launching an IPython kernel.
Iteration 1 : error = 8816929.259708159
Iteration 2 : error = 35416253.22628992
Iteration 3 : error = 8229024.724786728
Iteration 4 : error = 13164051.38481994
Iteration 5 : error = 15566757.510988269
Iteration 6 : error = 23564751.692394815
Iteration 7 : error = 53413296.88368054
Iteration 8 : error = 8874842.832893368
Iteration 9 : error = 16104438.438315174
Iteration 10 : error = 19138031.8672025
Iteration 11 : error = 8787954.196188327
Iteration 12 : error = 9487006.006960494
Iteration 13 : error = 8848522.9449617
Iteration 14 : error = 8041621.976536453
Iteration 15 : error = 8811230.183558227
Iteration 16 : error = 7983583.827350191
Iteration 17 : error = 8200299.000644428
Iteration 18 : error = 8601616.187121753
Iteration 19 : error = 9006039.439315354
Iteration 20 : error = 8088580.127783587
Iteration 21 : error = 8830037.52002777
Iteration 22 : error = 9067076.772440638
Iteration 23 : error = 8332115.190639583
Iteration 24 : er

(array([[  0.62495165, -21.52694768],
        [  0.25547392,   0.39632991],
        [  0.3773151 ,   0.99657423]]),
 array([[-128.24385088, 1238.84623456,  129.41258477,  128.96205743]]),
 7953714.347670076)

In [None]:
patterns2 = np.insert(np.array(train_data), 1, np.square(train_data['x']), axis=1)
backpropagation(patterns2, 3)

Iteration 1 : error = 944697069.1400571
Iteration 2 : error = 43727473.75750813
Iteration 3 : error = 159908314.5821941
Iteration 4 : error = 42677214.057957344
Iteration 5 : error = 11275987.429028483
Iteration 6 : error = 17049547.397171166
Iteration 7 : error = 25096842.549336754
Iteration 8 : error = 13718095.177282214
Iteration 9 : error = 12997109.833571704
Iteration 10 : error = 19916851.98665541
Iteration 11 : error = 10254862.747661088
Iteration 12 : error = 7958938.20386924
Iteration 13 : error = 13124474.65127949
Iteration 14 : error = 8325240.935142044
Iteration 15 : error = 11576347.127680788
Iteration 16 : error = 10075010.19525402
Iteration 17 : error = 11186638.130882198
Iteration 18 : error = 8754226.932530902
Iteration 19 : error = 8602815.323399449
Iteration 20 : error = 9473650.243996458
Iteration 21 : error = 8365002.429345769
Iteration 22 : error = 7978635.053580212
Iteration 23 : error = 8024512.188551179
Iteration 24 : error = 7975198.944123716
Iteration 25 : er

(array([[0.98901151, 0.54954473, 0.2814473 ],
        [0.07728957, 0.4444695 , 0.47280797],
        [0.048522  , 0.16332445, 0.11595071]]),
 array([[-95.78239566,  97.26596939,  97.05988976,  97.40050902]]),
 7953557.665973738)

In [None]:
def averageError(w_matrix, W_matrix, test_data):
    inputLayers = np.transpose(np.insert(test_data, 0, -1, axis=1)[:,:-1]) # h1, ..., hj
    desiredOutputs = test_data[:,-1].reshape(-1,1)
    actualHiddens = sigmoidalFunc(w_matrix @ inputLayers)
    actualOutputMatrix = W_matrix @ np.insert(actualHiddens, 0, -1, axis=0) # o1, ..., oi
    squareResiduals = np.square(desiredOutputs - np.transpose(actualOutputMatrix))
    sse = np.sum(squareResiduals)
    mse = sse / np.size(desiredOutputs)
    variance = np.sum(np.square(mse-squareResiduals)) / (np.size(desiredOutputs) - 1)
    return mse, variance

In [None]:
def hiddenUnit(train_data, test_data, Jq = 3, epsilon = 0.001, seed = 440):
    train = np.array(train_data)
    test = np.array(test_data)
    q = 1
    Et = np.infty
    while(True):
        patterns = np.copy(train)
        w, W, total_error = backpropagation(patterns, Jq, epsilon=epsilon, seed = seed)
        Etp, var = averageError(w, W, test)
        print("{0} hidden units : MSE = {1} , variance = {2}".format(Jq,Etp,var))
        if(Etp >= Et):
            break
        Jq += 1
        q += 1
        Et = Etp
    return Jq-1, Et

In [None]:
hiddenUnit(train_data, test_data, epsilon=0.001, seed = 440)

Iteration 1 : error = 100965425.56553206
Iteration 2 : error = 33729509.58483874
Iteration 3 : error = 13921816.317282429
Iteration 4 : error = 14590979.783242617
Iteration 5 : error = 19582568.944003526
Iteration 6 : error = 8531847.50010686
Iteration 7 : error = 39518501.36311612
Iteration 8 : error = 46419589.36836452
Iteration 9 : error = 8959653.81647231
Iteration 10 : error = 20369875.821804263
Iteration 11 : error = 10501351.83867907
Iteration 12 : error = 19669562.75762891
Iteration 13 : error = 8037396.332666211
Iteration 14 : error = 9273446.201298472
Iteration 15 : error = 13515007.785977617
Iteration 16 : error = 8176503.7518314235
Iteration 17 : error = 10460791.013277885
Iteration 18 : error = 10462259.313096974
Iteration 19 : error = 8958969.593242789
Iteration 20 : error = 7985119.502770321
Iteration 21 : error = 8956957.974108052
Iteration 22 : error = 8333210.39631152
Iteration 23 : error = 8118365.326898939
Iteration 24 : error = 8094198.776738492
Iteration 25 : erro

(3, 99495.20650970993)

In [None]:
train_d = np.insert(np.array(train_data), 1, np.square(train_data['x']), axis=1)
test_d = np.insert(np.array(test_data), 1, np.square(test_data['x']), axis=1)
hiddenUnit(train_d, test_d)

Iteration 1 : error = 944697069.1400571
Iteration 2 : error = 43727473.75750813
Iteration 3 : error = 159908314.5821941
Iteration 4 : error = 42677214.057957344
Iteration 5 : error = 11275987.429028483
Iteration 6 : error = 17049547.397171166
Iteration 7 : error = 25096842.549336754
Iteration 8 : error = 13718095.177282214
Iteration 9 : error = 12997109.833571704
Iteration 10 : error = 19916851.98665541
Iteration 11 : error = 10254862.747661088
Iteration 12 : error = 7958938.20386924
Iteration 13 : error = 13124474.65127949
Iteration 14 : error = 8325240.935142044
Iteration 15 : error = 11576347.127680788
Iteration 16 : error = 10075010.19525402
Iteration 17 : error = 11186638.130882198
Iteration 18 : error = 8754226.932530902
Iteration 19 : error = 8602815.323399449
Iteration 20 : error = 9473650.243996458
Iteration 21 : error = 8365002.429345769
Iteration 22 : error = 7978635.053580212
Iteration 23 : error = 8024512.188551179
Iteration 24 : error = 7975198.944123716
Iteration 25 : er

(4, 99443.59106525224)