# Homework 6

In [None]:
import pandas as pd
import numpy as np
from sympy import Symbol, lambdify

In [None]:
train_data = pd.read_csv('Input/training.dat', sep=' ', header=None, names=['x', 'y']);
test_data = pd.read_csv('Input/test.dat', sep=' ', header=None, names=['x', 'y']);

x_train = np.array(train_data['x'])
y_train = np.array(train_data['y'])

w0 = Symbol("w0")
w1 = Symbol("w1")
w2 = Symbol("w2")

func_a = np.sum(np.square(y_train - w0 - w1 * x_train))
f_a = lambdify([[w0, w1]], func_a, "numpy")
gf_a = lambdify([[w0, w1]], func_a.diff([[w0, w1]]), "numpy")
grad_fa = lambda x_arr : np.array(gf_a(x_arr), 'float64').reshape(1,len(x_arr))

func_b = np.sum(np.square(y_train - w0 - w1 * x_train - w2 * x_train**2))
f_b = lambdify([[w0, w1, w2]], func_b, "numpy")
gf_b = lambdify([[w0, w1, w2]], func_b.diff([[w0, w1, w2]]), "numpy")
grad_fb = lambda x_arr : np.array(gf_b(x_arr), 'float64').reshape(1,len(x_arr))

### Useful Functions

In [None]:
np_str = lambda x_k : np.array2string(x_k.reshape(len(x_k)), precision=3, separator=',')

f_str = lambda x : "{0:.4f}".format(x)

In [None]:
class OutputTable:    
    def __init__(self):
        self.table = pd.DataFrame([],columns=['k', 'x^k', 'f(x^k)', 'd^k', 'a^k', 'x^k+1'])
    def add_row(self, k, xk, fxk, dk, ak, xkp):
        self.table.loc[len(self.table)] = [k, np_str(xk), f_str(fxk.item()), np_str(dk), ak, np_str(xkp)]
    def print_latex(self):
        print(self.table.to_latex(index=False))

## Part B : Neural Network

In [None]:
sigmoidalFunc = lambda output_array : 1 / (1 + np.exp(-output_array))
sigmoidalDeriv = lambda hiddenlayer : hiddenlayer * (1 - hiddenlayer)

In [None]:
def backpropagation(patterns, hiddenLayerSize, alpha = 0.5, learningRate = 0.9, epsilon = 0.001):
    t = 0
    P = np.size(patterns, 0)
    w_matrix = np.random.rand(hiddenLayerSize, np.size(patterns,1)) # patterns data includes y values, its column size is selected since we will add x0 to input layer
    W_matrix = np.random.rand(1, hiddenLayerSize+1) # we will add h0 to hidden layer
    while(alpha > epsilon):
        np.random.shuffle(patterns)
        desiredOutputs = patterns[:,:-1].reshape(-1,1)
        inputLayers = np.transpose(np.insert(patterns, 0, -1, axis=1)[:,:-1]) # x0 is added to all patterns and its value is -1, output values are excluded
        hiddenLayer = np.zeros((hiddenLayerSize+1, 1)) # hiddenlayersize doesn't include h0 so it's added
        hiddenLayer[0,:] = -1 # h0 is equal to -1
        actualOutput = np.zeros_like(desiredOutputs)
        for p in range(P):
            hiddenLayer[1:] = sigmoidalFunc(w_matrix @ inputLayers[:,p].reshape(-1,1))
            actualOutput[p] = W_matrix @ hiddenLayer
            # since the function is linear, net output is equal to actual output
            S_output = (1 * (desiredOutputs[p] - actualOutput[p])).reshape(-1,1)
            S_hidden = (sigmoidalDeriv(hiddenLayer[1:]) * (np.transpose(W_matrix[:,1:]) @ S_output)).reshape(-1,1)
            delta_W = alpha * S_output @ np.transpose(hiddenLayer)
            W_matrix += delta_W
            delta_w = alpha * S_hidden @ np.transpose(inputLayers[:,p].reshape(-1,1))
            w_matrix += delta_w
        alpha = learningRate * alpha
        t += 1
        actualHiddens = w_matrix @ inputLayers # h1, ..., hj
        actualOutputMatrix = W_matrix @ np.insert(actualHiddens, 0, -1, axis=0) # o1, ..., oi
        error = np.sum(np.square(desiredOutputs - actualOutputMatrix))
        print("Iteration {0} : error = {1}".format(t,error))
    actualHiddens = w_matrix @ inputLayers # h1, ..., hj
    actualOutputMatrix = W_matrix @ np.insert(actualHiddens, 0, -1, axis=0) # o1, ..., oi
    error = np.sum(np.square(desiredOutputs - actualOutputMatrix))
    return w_matrix, W_matrix, error

In [None]:
patterns = np.array(train_data)
backpropagation(patterns, 3)

Iteration 1 : error = 1.3560985520073212e+16
Iteration 2 : error = 6341638102981.615
Iteration 3 : error = 158939293125500.06
Iteration 4 : error = 4190674158747.2886
Iteration 5 : error = 265637470951350.0
Iteration 6 : error = 3931663049387.9385
Iteration 7 : error = 94533724902792.62
Iteration 8 : error = 437660714567587.75
Iteration 9 : error = 413865606076296.6
Iteration 10 : error = 39359456449019.67
Iteration 11 : error = 18882484166805.047
Iteration 12 : error = 25990695426746.055
Iteration 13 : error = 25708786306437.594
Iteration 14 : error = 13675090364300.066
Iteration 15 : error = 159332923059270.2
Iteration 16 : error = 40951095469277.89
Iteration 17 : error = 23446353322013.61
Iteration 18 : error = 97536758521753.03
Iteration 19 : error = 118786373932563.03
Iteration 20 : error = 17280065486303.55
Iteration 21 : error = 12314520607918.656
Iteration 22 : error = 51398546472395.234
Iteration 23 : error = 123971246947390.88
Iteration 24 : error = 58902870683136.92
Iteratio

(array([[0.26019469, 0.38254563],
        [0.14178385, 0.84176895],
        [0.92196021, 0.51127934]]),
 array([[-92.50422294,  93.09305246,  92.97657676,  92.7308395 ]]),
 54836373765539.31)

In [None]:
patterns2 = np.insert(np.array(train_data), 1, np.square(train_data['x']), axis=1)
backpropagation(patterns2, 3)

(array([[ 2.38821993e+11, -5.90337080e+13, -2.35398118e+16],
        [ 2.38821648e+11, -5.90341186e+13, -2.35401062e+16],
        [ 2.38821796e+11, -5.90339426e+13, -2.35399800e+16]]),
 array([[-111324.93304916,   99123.48907882,   99138.75872513,
           99132.2131042 ]]),
 1.2332644345039047e+59)

In [None]:
def averageError(w_matrix, W_matrix, test_data):
    inputLayers = np.transpose(np.insert(test_data, 0, -1, axis=1)[:,:-1]) # h1, ..., hj
    desiredOutputs = test_data[:,:-1].reshape(-1,1)
    actualHiddens = w_matrix @ inputLayers 
    actualOutputMatrix = W_matrix @ np.insert(actualHiddens, 0, -1, axis=0) # o1, ..., oi
    squareResiduals = np.square(desiredOutputs - actualOutputMatrix)
    sse = np.sum(squareResiduals)
    mse = sse / np.size(desiredOutputs)
    variance = np.sum(np.square(mse-squareResiduals)) / (np.size(desiredOutputs) - 1)
    return mse, variance

In [None]:
def hiddenUnit(train_data, test_data, Jq = 3, epsilon = 0.001):
    train = np.array(train_data)
    test = np.array(test_data)
    q = 1
    Et = np.infty
    while(True):
        patterns = np.copy(train)
        w, W, total_error = backpropagation(patterns, Jq, epsilon=epsilon)
        Etp, var = averageError(w, W, test)
        print("{0} hidden units : MSE = {1}".format(Jq,Etp))
        if(Etp >= Et):
            break
        Jq += 1
        q += 1
        Et = Etp
    return Jq-1, Et

In [None]:
hiddenUnit(train_data, test_data, epsilon=0.001)

  """Entry point for launching an IPython kernel.
Iteration 1 : error = 2033226643273957.2
Iteration 2 : error = 3230668282252178.0
Iteration 3 : error = 1994390938542850.8
Iteration 4 : error = 2152785718311086.8
Iteration 5 : error = 3005865593498625.5
Iteration 6 : error = 2288708863593874.0
Iteration 7 : error = 2373841813838735.5
Iteration 8 : error = 2967739917438883.0
Iteration 9 : error = 3494459705305237.0
Iteration 10 : error = 2171283631237008.0
Iteration 11 : error = 2821511755266679.0
Iteration 12 : error = 2274317204692394.0
Iteration 13 : error = 2370863666923321.0
Iteration 14 : error = 2557801062312494.5
Iteration 15 : error = 2194870358275789.5
Iteration 16 : error = 2633312919586308.0
Iteration 17 : error = 2547455126184101.0
Iteration 18 : error = 2687990440456719.0
Iteration 19 : error = 2403316904342257.0
Iteration 20 : error = 2552636850228919.5
Iteration 21 : error = 2600017808166717.5
Iteration 22 : error = 2611254479692464.0
Iteration 23 : error = 272173439453

(3, 4228165628942.7915)