# Homework 6

In [None]:
import pandas as pd
import numpy as np
from sympy import Symbol, lambdify

In [None]:
train_data = pd.read_csv('Input/training.dat', sep=' ', header=None, names=['x', 'y']);
test_data = pd.read_csv('Input/test.dat', sep=' ', header=None, names=['x', 'y']);

x_train = np.array(train_data['x'])
y_train = np.array(train_data['y'])

w0 = Symbol("w0")
w1 = Symbol("w1")
w2 = Symbol("w2")

func_a = np.sum(np.square(y_train - w0 - w1 * x_train))
f_a = lambdify([[w0, w1]], func_a, "numpy")
gf_a = lambdify([[w0, w1]], func_a.diff([[w0, w1]]), "numpy")
grad_fa = lambda x_arr : np.array(gf_a(x_arr), 'float64').reshape(1,len(x_arr))

func_b = np.sum(np.square(y_train - w0 - w1 * x_train - w2 * x_train**2))
f_b = lambdify([[w0, w1, w2]], func_b, "numpy")
gf_b = lambdify([[w0, w1, w2]], func_b.diff([[w0, w1, w2]]), "numpy")
grad_fb = lambda x_arr : np.array(gf_b(x_arr), 'float64').reshape(1,len(x_arr))

### Useful Functions

In [None]:
np_str = lambda x_k : np.array2string(x_k.reshape(len(x_k)), precision=3, separator=',')

f_str = lambda x : "{0:.4f}".format(x)

In [None]:
class OutputTable:    
    def __init__(self):
        self.table = pd.DataFrame([],columns=['k', 'x^k', 'f(x^k)', 'd^k', 'a^k', 'x^k+1'])
    def add_row(self, k, xk, fxk, dk, ak, xkp):
        self.table.loc[len(self.table)] = [k, np_str(xk), f_str(fxk.item()), np_str(dk), ak, np_str(xkp)]
    def print_latex(self):
        print(self.table.to_latex(index=False))

In [None]:
patterns = np.array([[1,2,3],[4,5,6]])
a = np.zeros(3)
a.shape

(3,)

## Part B : Neural Network

In [None]:
sigmoidalFunc = lambda output_array : 1 / (1 + np.exp(-output_array))
sigmoidalDeriv = lambda hiddenlayer : hiddenlayer * (1 - hiddenlayer)

In [None]:
def backpropagation(patterns, hiddenLayerSize, alpha = 0.5, learningRate = 0.9, epsilon = 0.001, seed = np.random.randint(0,100)):
    t = 0
    np.random.seed(seed)
    P = np.size(patterns, 0)
    w_matrix = np.random.rand(hiddenLayerSize, np.size(patterns,1))*1 # patterns data includes y values, its column size is selected since we will add x0 to input layer
    W_matrix = np.random.rand(1, hiddenLayerSize+1)*1 # we will add h0 to hidden layer
    while(alpha > epsilon):
        np.random.shuffle(patterns)
        desiredOutputs = patterns[:,-1].reshape(-1,1)
        inputLayers = np.transpose(np.insert(patterns, 0, -1, axis=1)[:,:-1]) # x0 is added to all patterns and its value is -1, output values are excluded
        hiddenLayer = np.zeros((hiddenLayerSize+1, 1)) # hiddenlayersize doesn't include h0 so it's added
        hiddenLayer[0,:] = -1 # h0 is equal to -1
        actualOutput = np.zeros_like(desiredOutputs)
        for p in range(P):
            hiddenLayer[1:] = sigmoidalFunc(w_matrix @ inputLayers[:,p].reshape(-1,1))
            actualOutput[p] = W_matrix @ hiddenLayer
            # since the function is linear, net output is equal to actual output
            S_output = (1 * (desiredOutputs[p] - actualOutput[p])).reshape(-1,1)
            S_hidden = (sigmoidalDeriv(hiddenLayer[1:]) * (np.transpose(W_matrix[:,1:]) @ S_output)).reshape(-1,1)
            delta_W = alpha * S_output @ np.transpose(hiddenLayer)
            W_matrix += delta_W
            delta_w = alpha * S_hidden @ np.transpose(inputLayers[:,p].reshape(-1,1))
            w_matrix += delta_w
        alpha = learningRate * alpha
        t += 1
        actualHiddens = sigmoidalFunc(w_matrix @ inputLayers) # h1, ..., hj
        actualOutputMatrix = W_matrix @ np.insert(actualHiddens, 0, -1, axis=0) # o1, ..., oi
        error = np.sum(np.square(desiredOutputs - np.transpose(actualOutputMatrix)))
        print("Iteration {0} : error = {1}".format(t,error))
    return w_matrix, W_matrix, error

In [None]:
def backpropagationWithForLoops(trainingData, hiddenLayerSize, alpha = 0.5, learningRate = 0.9, epsilon = 0.001, seed = np.random.randint(0,100)):
    t = 0
    np.random.seed(seed)
    patterns = np.copy(trainingData)
    patterns = np.insert(patterns, 0, -1, axis=1) # x0 = -1 unit is added
    P = np.size(patterns, 0) # pattern size
    I = 1 # output unit size
    K = np.size(patterns, 1) - I # input layer size
    J = hiddenLayerSize + 1 # h0 = -1 is added
    w_matrix = np.random.rand(J, K) # weights between input and hidden layer (we will exclude first row later since h0 is excluded)
    W_matrix = np.random.rand(I, J) # weight between hidden and output layer
    while(alpha >= epsilon):
        np.random.shuffle(patterns)
        x = np.transpose(patterns[:,:-1]).reshape(K, -1)
        y = patterns[:,-1]
        H = np.zeros(J)
        H[0] = -1 # h0 is equal to -1
        O = np.zeros_like(y)
        for p in range(P):
            for j in range(1,J):
                hj = np.sum(w_matrix[j] * x[:,p])
                H[j] = sigmoidalFunc(hj)
            for i in range(I):
                o = np.sum(W_matrix[i] * H)
                O[p] = o # linear function g(x) = x
            S_O = 0 # since there is only one output unit
            S_H = np.zeros_like(H)
            for i in range(I):
                S_O = 1 * (y[p] - O[p])
            for j in range(1,J):
                S_H[j] = sigmoidalDeriv(H[j]) * np.sum(W_matrix[0,j] * S_O)
            for j in range(J):
                dWj = alpha * S_O * H[j]
                W_matrix[0,j] += dWj
            for k in range(K):
                dwk = alpha * S_H * x[k,p]
                w_matrix[:,k] += dwk
        alpha *= learningRate
        t += 1
        actualHiddens = sigmoidalFunc(w_matrix @ x)
        actualHiddens[0,:] = -1 # h1, ..., hj
        actualOutputMatrix = W_matrix @ actualHiddens # o1, ..., oi
        error = np.sum(np.square(y - actualOutputMatrix))
        print("Iteration {0} : error = {1}".format(t,error))
    return w_matrix, W_matrix, error

In [None]:
patterns = np.array(train_data)
backpropagationWithForLoops(patterns, 3)

Iteration 1 : error = 1006807423.890619
Iteration 2 : error = 23869258.3102412
Iteration 3 : error = 11200764.651787676
Iteration 4 : error = 13094540.7481575
Iteration 5 : error = 28911787.667117942
Iteration 6 : error = 12996129.268918592
Iteration 7 : error = 9567145.65491038
Iteration 8 : error = 9398798.991825499
Iteration 9 : error = 24238285.426889125
Iteration 10 : error = 11579564.048979847
Iteration 11 : error = 21321846.2599155
Iteration 12 : error = 11702963.015266364
Iteration 13 : error = 8951321.602063853
Iteration 14 : error = 11644568.46351883
Iteration 15 : error = 8363362.130914341
Iteration 16 : error = 9716429.126406835
Iteration 17 : error = 8482873.206300361
Iteration 18 : error = 7969598.6007742705
Iteration 19 : error = 11253884.425861092
Iteration 20 : error = 8160376.578740317
Iteration 21 : error = 7978362.789735451
Iteration 22 : error = 8112614.054576066
Iteration 23 : error = 8355572.70983451
Iteration 24 : error = 7972604.648193134
Iteration 25 : error =

(array([[0.5881308 , 0.89771373],
        [0.89153073, 0.81583748],
        [0.03588959, 0.69175758],
        [0.37868094, 0.51851095]]),
 array([[-96.26897942,  97.12078111,  97.19924729,  97.64553682]]),
 7953547.223329035)

In [None]:
patterns2 = np.insert(np.array(train_data), 1, np.square(train_data['x']), axis=1)
backpropagation(patterns2, 3)

Iteration 1 : error = 944697069.1400571
Iteration 2 : error = 43727473.75750813
Iteration 3 : error = 159908314.5821941
Iteration 4 : error = 42677214.057957344
Iteration 5 : error = 11275987.429028483
Iteration 6 : error = 17049547.397171166
Iteration 7 : error = 25096842.549336754
Iteration 8 : error = 13718095.177282214
Iteration 9 : error = 12997109.833571704
Iteration 10 : error = 19916851.98665541
Iteration 11 : error = 10254862.747661088
Iteration 12 : error = 7958938.20386924
Iteration 13 : error = 13124474.65127949
Iteration 14 : error = 8325240.935142044
Iteration 15 : error = 11576347.127680788
Iteration 16 : error = 10075010.19525402
Iteration 17 : error = 11186638.130882198
Iteration 18 : error = 8754226.932530902
Iteration 19 : error = 8602815.323399449
Iteration 20 : error = 9473650.243996458
Iteration 21 : error = 8365002.429345769
Iteration 22 : error = 7978635.053580212
Iteration 23 : error = 8024512.188551179
Iteration 24 : error = 7975198.944123716
Iteration 25 : er

(array([[0.98901151, 0.54954473, 0.2814473 ],
        [0.07728957, 0.4444695 , 0.47280797],
        [0.048522  , 0.16332445, 0.11595071]]),
 array([[-95.78239566,  97.26596939,  97.05988976,  97.40050902]]),
 7953557.665973738)

In [None]:
def averageError(w_matrix, W_matrix, test_data):
    inputLayers = np.transpose(np.insert(test_data, 0, -1, axis=1)[:,:-1]) # h1, ..., hj
    desiredOutputs = test_data[:,-1].reshape(-1,1)
    actualHiddens = sigmoidalFunc(w_matrix @ inputLayers)
    actualOutputMatrix = W_matrix @ np.insert(actualHiddens, 0, -1, axis=0) # o1, ..., oi
    squareResiduals = np.square(desiredOutputs - np.transpose(actualOutputMatrix))
    sse = np.sum(squareResiduals)
    mse = sse / np.size(desiredOutputs)
    variance = np.sum(np.square(mse-squareResiduals)) / (np.size(desiredOutputs) - 1)
    return mse, variance

In [None]:
def hiddenUnit(train_data, test_data, Jq = 3, epsilon = 0.001, seed = np.random.randint(0,100)):
    train = np.array(train_data)
    test = np.array(test_data)
    q = 1
    Et = np.infty
    while(True):
        patterns = np.copy(train)
        w, W, total_error = backpropagation(patterns, Jq, epsilon=epsilon)
        Etp, var = averageError(w, W, test)
        print("{0} hidden units : MSE = {1}".format(Jq,Etp))
        if(Etp >= Et):
            break
        Jq += 1
        q += 1
        Et = Etp
    return Jq-1, Et

In [None]:
hiddenUnit(train_data, test_data, epsilon=0.001)

Iteration 1 : error = 1628488255.723855
Iteration 2 : error = 262377252.20313102
Iteration 3 : error = 8049430.024488673
Iteration 4 : error = 30989747.747654196
Iteration 5 : error = 12915262.003412895
Iteration 6 : error = 15631310.329285646
Iteration 7 : error = 13299944.680020344
Iteration 8 : error = 15413444.534937598
Iteration 9 : error = 8202380.856884986
Iteration 10 : error = 14992747.653127668
Iteration 11 : error = 11033716.824413612
Iteration 12 : error = 9445770.65141973
Iteration 13 : error = 12820611.805164412
Iteration 14 : error = 11682656.700652601
Iteration 15 : error = 8405368.471101215
Iteration 16 : error = 8216675.404738394
Iteration 17 : error = 8616794.902400084
Iteration 18 : error = 9581692.888374507
Iteration 19 : error = 7964761.80533867
Iteration 20 : error = 10317994.35639805
Iteration 21 : error = 9264252.029135918
Iteration 22 : error = 7998443.840230706
Iteration 23 : error = 8515195.355972568
Iteration 24 : error = 8531932.063469274
Iteration 25 : er

(4, 99621.7338496295)