# Homework 6

In [None]:
import pandas as pd
import numpy as np
from sympy import Symbol, lambdify
import matplotlib.pyplot as plt

In [None]:
train_data = pd.read_csv('Input/training.dat', sep=' ', header=None, names=['x', 'y']);
test_data = pd.read_csv('Input/test.dat', sep=' ', header=None, names=['x', 'y']);

x_train = np.array(train_data['x'])
y_train = np.array(train_data['y'])

w0 = Symbol("w0")
w1 = Symbol("w1")
w2 = Symbol("w2")

func_a = np.sum(np.square(y_train - w0 - w1 * x_train))
f_a = lambdify([[w0, w1]], func_a, "numpy")
gf_a = lambdify([[w0, w1]], func_a.diff([[w0, w1]]), "numpy")
grad_fa = lambda x_arr : np.array(gf_a(x_arr), 'float64').reshape(1,len(x_arr))

func_b = np.sum(np.square(y_train - w0 - w1 * x_train - w2 * x_train**2))
f_b = lambdify([[w0, w1, w2]], func_b, "numpy")
gf_b = lambdify([[w0, w1, w2]], func_b.diff([[w0, w1, w2]]), "numpy")
grad_fb = lambda x_arr : np.array(gf_b(x_arr), 'float64').reshape(1,len(x_arr))

### Useful Functions

In [None]:
def plotRegressionGraph(data, regFunc, labels=[], name="graph"):
    xmin = data[:,0].min
    xmax = data[:,0].max
    t1 = np.arange(xmin-1, xmax+1, 0.1)
    plt.figure()
    plt.plot(t1, regFunc(t1), 'b-', label='Regression line')
    plt.scatter(data[:,0], data[:,1], color="black", label="Data points")
    plt.legend()
    plt.savefig("{0}.png".format(name))

In [None]:
np_str = lambda x_k : np.array2string(x_k.reshape(len(x_k)), precision=3, separator=',')

f_str = lambda x : "{0:.4f}".format(x)

In [None]:
class OutputTable:    
    def __init__(self):
        self.table = pd.DataFrame([],columns=['k', 'x^k', 'f(x^k)', 'd^k', 'a^k', 'x^k+1'])
    def add_row(self, k, xk, fxk, dk, ak, xkp):
        self.table.loc[len(self.table)] = [k, np_str(xk), f_str(fxk.item()), np_str(dk), ak, np_str(xkp)]
    def print_latex(self):
        print(self.table.to_latex(index=False))

## Part B : Neural Network

In [None]:
sigmoidalFunc = lambda output_array : 1 / (1 + np.exp(-output_array))
sigmoidalDeriv = lambda hiddenlayer : hiddenlayer * (1 - hiddenlayer)

In [None]:
def backpropagation(trainingData, hiddenLayerSize, alpha = 0.5, momentum = 0.9, epsilon = 0.001, seed = 440):
    np.random.seed(seed)
    t = 0
    patterns = np.copy(trainingData)
    patterns = np.insert(patterns, 0, -1, axis=1) # x0 = -1 unit is added
    P = np.size(patterns, 0) # pattern size
    I = 1 # output unit size
    K = np.size(patterns, 1) - I # input layer size
    J = hiddenLayerSize + 1 # h0 = -1 is added
    w_matrix = np.random.rand(J, K) # weights between input and hidden layer (we will exclude first row in the result since h0 is excluded)
    W_matrix = np.random.rand(I, J) # weight between hidden and output layer
    while(alpha >= epsilon):
        np.random.shuffle(patterns)
        x = np.transpose(patterns[:,:-1]).reshape(K, -1)
        y = patterns[:,-1]
        H = np.zeros(J)
        H[0] = -1 # h0 is equal to -1
        O = np.zeros_like(y)
        for p in range(P):
            for j in range(1,J):
                hj = np.sum(w_matrix[j] * x[:,p])
                H[j] = sigmoidalFunc(hj)
            for i in range(I):
                o = np.sum(W_matrix[i] * H)
                O[p] = o # linear function g(x) = x
            S_O = 0 # since there is only one output unit
            S_H = np.zeros_like(H)
            for i in range(I):
                S_O = 1 * (y[p] - O[p])
            for j in range(1,J):
                S_H[j] = sigmoidalDeriv(H[j]) * np.sum(W_matrix[0,j] * S_O)
            for j in range(J):
                dWj = alpha * S_O * H[j]
                W_matrix[0,j] += dWj
            for k in range(K):
                dwk = alpha * S_H * x[k,p]
                w_matrix[:,k] += dwk
        alpha *= momentum
        t += 1
        actualHiddens = sigmoidalFunc(w_matrix @ x)
        actualHiddens[0,:] = -1 # h1, ..., hj
        actualOutputMatrix = W_matrix @ actualHiddens # o1, ..., oi
        error = np.sum(np.square(y - actualOutputMatrix))
        print("Iteration {0} : error = {1}".format(t,error))
    return w_matrix, W_matrix, error

In [None]:
def backpropagationWithMatrix(patterns, hiddenLayerSize, alpha = 0.5, momentum = 0.9, epsilon = 0.001, seed = 440):
    np.random.seed(seed)
    t = 0
    P = np.size(patterns, 0)
    w_matrix = np.random.rand(hiddenLayerSize, np.size(patterns,1))*1 # patterns data includes y values, its column size is selected since we will add x0 to input layer
    W_matrix = np.random.rand(1, hiddenLayerSize+1)*1 # we will add h0 to hidden layer
    while(alpha > epsilon):
        np.random.shuffle(patterns)
        desiredOutputs = patterns[:,-1].reshape(-1,1)
        inputLayers = np.transpose(np.insert(patterns, 0, -1, axis=1)[:,:-1]) # x0 is added to all patterns and its value is -1, output values are excluded
        hiddenLayer = np.zeros((hiddenLayerSize+1, 1)) # hiddenlayersize doesn't include h0 so it's added
        hiddenLayer[0,:] = -1 # h0 is equal to -1
        actualOutput = np.zeros_like(desiredOutputs)
        for p in range(P):
            hiddenLayer[1:] = sigmoidalFunc(w_matrix @ inputLayers[:,p].reshape(-1,1))
            actualOutput[p] = W_matrix @ hiddenLayer
            # since the function is linear, net output is equal to actual output
            S_output = (1 * (desiredOutputs[p] - actualOutput[p])).reshape(-1,1)
            S_hidden = (sigmoidalDeriv(hiddenLayer[1:]) * (np.transpose(W_matrix[:,1:]) @ S_output)).reshape(-1,1)
            delta_W = alpha * S_output @ np.transpose(hiddenLayer)
            W_matrix += delta_W
            delta_w = alpha * S_hidden @ np.transpose(inputLayers[:,p].reshape(-1,1))
            w_matrix += delta_w
        alpha = momentum * alpha
        t += 1
        actualHiddens = sigmoidalFunc(w_matrix @ inputLayers) # h1, ..., hj
        actualOutputMatrix = W_matrix @ np.insert(actualHiddens, 0, -1, axis=0) # o1, ..., oi
        error = np.sum(np.square(desiredOutputs - np.transpose(actualOutputMatrix)))
        print("Iteration {0} : error = {1}".format(t,error))
    return w_matrix, W_matrix, error

In [None]:
patterns = np.array(train_data)
backpropagation(patterns, 3, seed=440)

  """Entry point for launching an IPython kernel.
Iteration 1 : error = 65990060.54243174
Iteration 2 : error = 10113825.583399063
Iteration 3 : error = 43808090.295681074
Iteration 4 : error = 12505564.676437719
Iteration 5 : error = 8089966.405235191
Iteration 6 : error = 11833632.05725339
Iteration 7 : error = 12836098.518659439
Iteration 8 : error = 11965313.508335438
Iteration 9 : error = 8680024.68645668
Iteration 10 : error = 8135515.189156784
Iteration 11 : error = 8618921.519123074
Iteration 12 : error = 8443978.62606255
Iteration 13 : error = 8326509.097579323
Iteration 14 : error = 8121047.848801743
Iteration 15 : error = 8163216.338089563
Iteration 16 : error = 8545592.633515783
Iteration 17 : error = 8656896.371558119
Iteration 18 : error = 8793583.228306344
Iteration 19 : error = 9995984.578043804
Iteration 20 : error = 7987030.096374455
Iteration 21 : error = 7956666.186538142
Iteration 22 : error = 10745125.030414928
Iteration 23 : error = 8825952.889630657
Iteration 24

(array([[ 1.71858636e-01,  9.65145829e-01],
        [ 8.47339514e-01,  3.97995080e-01],
        [ 6.16147498e-01,  6.29039082e-01],
        [ 3.13448399e+02, -3.69468170e+04]]),
 array([[-127.77235014,  128.42128315,  128.78880676,  249.38554076]]),
 7954415.602024415)

In [None]:
patterns = np.array(train_data)
backpropagationWithMatrix(patterns, 3, seed=50)

  """Entry point for launching an IPython kernel.
Iteration 1 : error = 8816929.259708159
Iteration 2 : error = 35416253.22628992
Iteration 3 : error = 8229024.724786728
Iteration 4 : error = 13164051.38481994
Iteration 5 : error = 15566757.510988269
Iteration 6 : error = 23564751.692394815
Iteration 7 : error = 53413296.88368054
Iteration 8 : error = 8874842.832893368
Iteration 9 : error = 16104438.438315174
Iteration 10 : error = 19138031.8672025
Iteration 11 : error = 8787954.196188327
Iteration 12 : error = 9487006.006960494
Iteration 13 : error = 8848522.9449617
Iteration 14 : error = 8041621.976536453
Iteration 15 : error = 8811230.183558227
Iteration 16 : error = 7983583.827350191
Iteration 17 : error = 8200299.000644428
Iteration 18 : error = 8601616.187121753
Iteration 19 : error = 9006039.439315354
Iteration 20 : error = 8088580.127783587
Iteration 21 : error = 8830037.52002777
Iteration 22 : error = 9067076.772440638
Iteration 23 : error = 8332115.190639583
Iteration 24 : er

(array([[  0.62495165, -21.52694768],
        [  0.25547392,   0.39632991],
        [  0.3773151 ,   0.99657423]]),
 array([[-128.24385088, 1238.84623456,  129.41258477,  128.96205743]]),
 7953714.347670076)

In [None]:
patterns2 = np.insert(np.array(train_data), 1, np.square(train_data['x']), axis=1)
backpropagation(patterns2, 3)

Iteration 1 : error = 88280657.92971183
Iteration 2 : error = 78291309.58459358
Iteration 3 : error = 21538937.469661705
Iteration 4 : error = 63672657.85242866
Iteration 5 : error = 7953698.8945387
Iteration 6 : error = 21926469.828171175
Iteration 7 : error = 12495620.773744613
Iteration 8 : error = 33722797.814106144
Iteration 9 : error = 8119178.015434197
Iteration 10 : error = 14318492.390151966
Iteration 11 : error = 20106416.49474501
Iteration 12 : error = 10238905.356222112
Iteration 13 : error = 34003572.146542765
Iteration 14 : error = 7989858.2323091
Iteration 15 : error = 9526449.375450313
Iteration 16 : error = 11577355.230264327
Iteration 17 : error = 7953653.602025525
Iteration 18 : error = 8341029.254973405
Iteration 19 : error = 8049490.186094718
Iteration 20 : error = 8051395.941854635
Iteration 21 : error = 10968614.132510625
Iteration 22 : error = 8409331.516944073
Iteration 23 : error = 8626602.000976456
Iteration 24 : error = 8528248.111838715
Iteration 25 : error

(array([[0.17185864, 0.96514583, 0.84733951],
        [0.39799508, 0.6161475 , 0.62903908],
        [0.15285732, 0.17501496, 0.38176556],
        [0.26716745, 0.63469106, 0.28434213]]),
 array([[-96.51737743,  98.39875203,  98.06407988,  97.76263623]]),
 7954322.33853856)

In [None]:
def averageError(w_matrix, W_matrix, test_data):
    inputLayers = np.transpose(np.insert(test_data, 0, -1, axis=1)[:,:-1]) # h1, ..., hj
    desiredOutputs = test_data[:,-1].reshape(-1,1)
    actualHiddens = sigmoidalFunc(w_matrix @ inputLayers)
    actualOutputMatrix = W_matrix @ np.insert(actualHiddens, 0, -1, axis=0) # o1, ..., oi
    squareResiduals = np.square(desiredOutputs - np.transpose(actualOutputMatrix))
    sse = np.sum(squareResiduals)
    mse = sse / np.size(desiredOutputs)
    variance = np.sum(np.square(mse-squareResiduals)) / (np.size(desiredOutputs) - 1)
    return mse, variance

In [None]:
def hiddenUnit(train_data, test_data, Jq = 3, epsilon = 0.001, seed = 440):
    train = np.array(train_data)
    test = np.array(test_data)
    q = 1
    Et = np.infty
    while(True):
        patterns = np.copy(train)
        w, W, total_error = backpropagation(patterns, Jq, epsilon=epsilon, seed = seed)
        Etp, var = averageError(w, W, test)
        print("{0} hidden units : MSE = {1} , variance = {2}".format(Jq,Etp,var))
        if(Etp >= Et):
            break
        Jq += 1
        q += 1
        Et = Etp
    return Jq-1, Et

In [None]:
hiddenUnit(train_data, test_data, epsilon=0.001, seed = 440)

  """Entry point for launching an IPython kernel.
Iteration 1 : error = 65990060.54243174
Iteration 2 : error = 10113825.583399063
Iteration 3 : error = 43808090.295681074
Iteration 4 : error = 12505564.676437719
Iteration 5 : error = 8089966.405235191
Iteration 6 : error = 11833632.05725339
Iteration 7 : error = 12836098.518659439
Iteration 8 : error = 11965313.508335438
Iteration 9 : error = 8680024.68645668
Iteration 10 : error = 8135515.189156784
Iteration 11 : error = 8618921.519123074
Iteration 12 : error = 8443978.62606255
Iteration 13 : error = 8326509.097579323
Iteration 14 : error = 8121047.848801743
Iteration 15 : error = 8163216.338089563
Iteration 16 : error = 8545592.633515783
Iteration 17 : error = 8656896.371558119
Iteration 18 : error = 8793583.228306344
Iteration 19 : error = 9995984.578043804
Iteration 20 : error = 7987030.096374455
Iteration 21 : error = 7956666.186538142
Iteration 22 : error = 10745125.030414928
Iteration 23 : error = 8825952.889630657
Iteration 24

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 5 is different from 4)

In [None]:
train_d = np.insert(np.array(train_data), 1, np.square(train_data['x']), axis=1)
test_d = np.insert(np.array(test_data), 1, np.square(test_data['x']), axis=1)
hiddenUnit(train_d, test_d)

Iteration 1 : error = 88280657.92971183
Iteration 2 : error = 78291309.58459358
Iteration 3 : error = 21538937.469661705
Iteration 4 : error = 63672657.85242866
Iteration 5 : error = 7953698.8945387
Iteration 6 : error = 21926469.828171175
Iteration 7 : error = 12495620.773744613
Iteration 8 : error = 33722797.814106144
Iteration 9 : error = 8119178.015434197
Iteration 10 : error = 14318492.390151966
Iteration 11 : error = 20106416.49474501
Iteration 12 : error = 10238905.356222112
Iteration 13 : error = 34003572.146542765
Iteration 14 : error = 7989858.2323091
Iteration 15 : error = 9526449.375450313
Iteration 16 : error = 11577355.230264327
Iteration 17 : error = 7953653.602025525
Iteration 18 : error = 8341029.254973405
Iteration 19 : error = 8049490.186094718
Iteration 20 : error = 8051395.941854635
Iteration 21 : error = 10968614.132510625
Iteration 22 : error = 8409331.516944073
Iteration 23 : error = 8626602.000976456
Iteration 24 : error = 8528248.111838715
Iteration 25 : error

ValueError: matmul: Input operand 1 has a mismatch in its core dimension 0, with gufunc signature (n?,k),(k,m?)->(n?,m?) (size 5 is different from 4)