# Artificial Neural Network
###### iris classification using numpy

Import dependencies

In [1]:
import numpy as np
import random
import urllib.request

#### Download iris dataset

In [2]:
urllib.request.urlretrieve(
    "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data", 
    "iris-data.txt")

('iris-data.txt', <http.client.HTTPMessage at 0x7fa0b0f89d68>)

### Pre-process data

seed random-generators

In [3]:
random.seed(0)
np.random.seed(0)

In [4]:
train_test_ratio = 0.8

tmp_list = []
tmp_set = set()
features = []
labels = []

text-file to numpy arrays

In [5]:
with open("iris-data.txt") as f:
    for line in f.readlines():
        if not line.isspace():
            tmp_list.append(line)

    random.shuffle(tmp_list)

for line in tmp_list:
    split_line = line.strip().split(',')
    length_line = len(split_line)

    for i in range(length_line - 1):
        split_line[i] = float(split_line[i])

    label = split_line[length_line - 1]
    tmp_set.add(label)

    features.append(split_line[:length_line - 1])
    labels.append(label)

Scale data

In [6]:
max_val = max([item for i in features for item in i])
min_val = min([item for i in features for item in i])

for i in range(len(features)):
    for j in range(len(features[0])):
        features[i][j] = (features[i][j] - min_val) / (max_val - min_val)

One-hot encoding

In [7]:
tmp_list = list(tmp_set)
for i in range(len(labels)):
    labels[i] = tmp_list.index(labels[i])

label_idx = np.array(labels)
labels = np.zeros((len(labels), len(tmp_list)))
labels[np.arange(len(labels)), label_idx] = 1

split into train-test set

In [8]:
features_train = np.array(features[:int(train_test_ratio * len(features))])
features_test = np.array(features[int(train_test_ratio * len(features)):])

labels_train = labels[:int(train_test_ratio * len(labels))]
labels_test = labels[int(train_test_ratio * len(labels)):]

## Neural Network

hyper-parameters

In [9]:
n_input_layers = len(features_test[0])
n_hidden_layers = 5
n_output_layers = len(tmp_list)

learning_rate = 0.01
momentum = 0.9

n_epoch = 100

Activation Functions and their derivative

In [10]:
activation_f = {
    'identity': lambda x: x,
    'sigmoid': lambda x: 1.0 / (1.0 + np.exp(-x)),
    'tanh': lambda x: np.tanh(x),
    'relu': lambda x: x * (x > 0),
}

activation_f_prime = {
    'identity': lambda x: 1,
    'sigmoid': lambda x: x * (1.0 - x),
    'tanh': lambda x: 1 - x**2,
    'relu': lambda x: 1.0 * (x > 0),
}

Activation Function Parameters

In [11]:
f1 = 'tanh'
f2 = 'sigmoid'

act_f1 = activation_f[f1]
act_f2 = activation_f[f2]

act_f1_prime = activation_f_prime[f1]
act_f2_prime = activation_f_prime[f2]

#### Training Function

In [12]:
def train(input_features, output_label, i_h_weights, h_o_weights):
    input_features = input_features.reshape(1, -1)

    # forward prop
    h_inter = np.dot(input_features, i_h_weights)
    h_result = act_f1(h_inter)
    o_inter = np.dot(h_result, h_o_weights)
    o_result = act_f2(o_inter)

    error = np.mean(0.5 * np.square(o_result - output_label))

    # back prop
    del_h_o = -np.multiply(output_label - o_result, act_f2_prime(o_result))
    change_h_o = np.dot(h_result.T, del_h_o)
    del_i_h = np.dot(del_h_o, h_o_weights.T) * act_f1_prime(h_result)
    change_i_h = np.dot(input_features.T, del_i_h)

    return error, change_i_h, change_h_o

#### Predict Function

In [13]:
# uses just forward prop
def predict(input_features, i_h_weights, h_o_weights):
    h_inter = np.dot(input_features, i_h_weights)
    h_result = act_f1(h_inter)
    o_inter = np.dot(h_result, h_o_weights)
    o_result = act_f2(o_inter)
    return (o_result >= max(o_result)).astype(int)

### Train Neural Network

In [14]:
print("*********** Train ***********")

# Initial Random Weights
V = np.random.normal(scale=0.1, size=(n_input_layers, n_hidden_layers))
W = np.random.normal(scale=0.1, size=(n_hidden_layers, n_output_layers))

# Training-set
X = features_train
T = labels_train

# Epoch-training
for epoch in range(n_epoch):
    tr_err = []

    for i in range(X.shape[0]):
        loss, grad_V, grad_W = train(X[i], T[i], V, W)

        # Adjust Weights
        V -= learning_rate * grad_V + momentum * grad_V
        W -= learning_rate * grad_W + momentum * grad_W

        tr_err.append(loss)
        
    if epoch % 10 == 0:
        val_err = []
        
        # use test set as validiation set
        for i in range(features_test.shape[0]):
            loss, _, _ = train(features_test[i], labels_test[i], V, W)
            val_err.append(loss)
            
        train_error = sum(tr_err) / len(tr_err)
        valid_error = sum(val_err) / len(val_err)
        
        print("Epoch:", epoch, " Train-error:", train_error, " Validation-error:", valid_error)

*********** Train ***********
Epoch: 0  Train-error: 0.110355817438  Validation-error: 0.0890507943417
Epoch: 10  Train-error: 0.0426840919513  Validation-error: 0.0397109277993
Epoch: 20  Train-error: 0.0221095706162  Validation-error: 0.020960218994
Epoch: 30  Train-error: 0.016501428106  Validation-error: 0.0114677158671
Epoch: 40  Train-error: 0.0143184820028  Validation-error: 0.0135157142136
Epoch: 50  Train-error: 0.0120528834448  Validation-error: 0.00838103175258
Epoch: 60  Train-error: 0.00950958946663  Validation-error: 0.0203980764745
Epoch: 70  Train-error: 0.00861524656063  Validation-error: 0.0084052920011
Epoch: 80  Train-error: 0.0084945926808  Validation-error: 0.00795761149001
Epoch: 90  Train-error: 0.00819203245711  Validation-error: 0.00802635459742


### Test Neural Network

In [15]:
print("*********** Test ***********")

success = 0
for i in range(len(features_test)):
    a = predict(features_test[i], V, W)
    b = labels_test[i]
    if np.array_equal(a, b):
        success += 1

print("Total = %d Success = %d Accuracy = %f" %
      (len(features_test), success, success * 100 / len(features_test)))

*********** Test ***********
Total = 30 Success = 29 Accuracy = 96.666667
