In [90]:
# imports

import os 
import math
import numpy as np 


# methods

'''
Map each letter to a number in the range 0-25 (26 letters in total).
'''
def letter2num(letter):
    return int(ord(letter) - 65)

'''
Add +1 column for the bias. 
'''
def add_bias_input(X):
    bias_array = np.ones(len(X))
    X = np.c_[X,bias_array]
    return X

'''
Shuffle the data.
'''
def shuffle_data(X, y,features):
    data_table = np.c_[X,y]
    np.random.shuffle(data_table)
    return data_table[:,:features], data_table[:,features]

'''
Split the data of a given table (input or output) into training and testing sets considering a fraction.
'''
def train_test_split(table,fraction):
    train_size = len(table) * fraction
    train_size = math.floor(train_size)
    test_size = train_size + 1
    return table[:test_size], table[test_size:]

'''
Create numpy array (either 1d or 2d) and fill it with NaNs.
'''
def create_array(dim1, dim2):
    if dim2 != 0:   # 2d
        arr=np.empty((dim1,dim2))
    else:           # 1d
        arr=np.empty(dim1)
    arr.fill(np.NaN)
    return arr

In [108]:
X = list()
y = list()

# load the data
file = open(os.path.dirname(os.path.abspath("__file__")) + '/datasets/problem2_data.txt')
for line in file.readlines():
    vector = line.split(',')

    y.append(letter2num(vector[0]))
    current_list = list()
    for i in range(1,len(vector)):
        num = int(vector[i].replace('\n', ''))
        num = num / 15.0    # scaling
        current_list.append(num)
    X.append(current_list)
file.close()

# add bias
np.random.seed(1)
X = add_bias_input(X)

# shuffle the data
features = X.shape[1]
X, y = shuffle_data(X, y,features)

# split to train and test sets
fraction = 0.8
X_train, X_test = train_test_split(X,fraction)
y_train, y_test = train_test_split(y,fraction)

In [122]:
# Neural network constants

numInput = features
numHid1 = 10
numHid2 = 10
numOutput = 26 # letters of english alphabet
numLayers = 4

learning_rate = 0.1
epochs = 1000

in2hid_weights = np.random.rand(numInput,numHid1)
hid2hid_weights = np.random.rand(numHid1,numHid2)
hid2out_weights = np.random.rand(numHid1,numOutput)

# values
nodesHid1_values = create_array(dim1=numHid1,dim2=0)
nodesHid2_values = create_array(dim1=numHid2,dim2=0)
nodesOut_values = create_array(dim1=numOutput,dim2=0)

# derivatives
nodesHid1_deriv = create_array(dim1=numHid1,dim2=0)
nodesHid2_deriv = create_array(dim1=numHid2,dim2=0)
nodesOut_deriv = create_array(dim1=numOutput,dim2=0)

In [123]:
for row in range(X_train):

    # forward phase
    forward_phase()

    # calculate success and error
    calculate_success()
    calculate_error()

    # backward phase
    backward_phase(row)

In [1]:
'''
Sigmoid activation function.
'''
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

def forward_phase():
    for i in range(numHid1):
        nodesHid1_values[i] = sigmoid(np.dot(X_train[row,:],in2hid_weights[:,i]))
    for i in range(numHid2):
        nodesHid2_values[i] = sigmoid(np.dot(nodesHid1_values[:],hid2hid_weights[:,i]))
    for i in range(numOutput):
        nodesOut_values[i] = sigmoid(np.dot(nodesHid2_values[:],hid2out_weights[:,i]))

def calculate_success():
    pass

def calculate_error():
    pass

def make_target_values(row):
    arr=np.zeros(26)
    arr[letter2num(X_test[row])] = 1
    return arr

def backward_phase(row):
    target_values = make_target_values(row)

    nodesOut_deriv = nodesOut_values - target_values
    for i in hid2out_weights.shape[0]:
        for j in hid2out_weights.shape[1]:
            hid2out_weights[i][j] = hid2out_weights[i][j] - learning_rate * (nodesHid2_values[i] * nodesOut_values[j] * (1-nodesOut_values[j]) * nodesOut_deriv[j])
    
    for z in range(nodesHid2_deriv):
        nodesHid2_deriv[z] = 0
        for w in numOutput:
            nodesHid2_deriv[z] = nodesHid2_deriv[z] + (hid2out_weights[z][w] * nodesOut_values[w] * nodesOut_deriv[w])
    for i in hid2hid_weights.shape[0]:
        for j in hid2hid_weights.shape[1]:
            hid2hid_weights[i][j] = hid2hid_weights[i][j] - learning_rate * (nodesHid1_values[i] * nodesHid2_values[j] * (1-nodesHid2_values[j]) * nodesHid2_deriv[j])

    for z in range(nodesHid1_deriv):
        nodesHid1_deriv[z] = 0
        for w in numHid2:
            nodesHid1_deriv[z] = nodesHid1_deriv[z] + (hid2hid_weights[z][w] * nodesHid2_values[w] * nodesHid2_deriv[w])
    for i in in2hid_weights.shape[0]:
        for j in in2hid_weights.shape[1]:
            in2hid_weights[i][j] = in2hid_weights[i][j] - learning_rate * (X_train[row][i] * nodesHid1_values[j] * (1-nodesHid1_values[j]) * nodesHid1_deriv[j])