In [17]:
import numpy as np
from sklearn import datasets
import pandas as pd
from sklearn.cross_validation import train_test_split
from sklearn.externals import joblib

In [2]:
iris = datasets.load_iris()

In [6]:
data = iris.data
target = iris.target
header = iris.feature_names
header

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [4]:
print(data[:2])

[[ 5.1  3.5  1.4  0.2]
 [ 4.9  3.   1.4  0.2]]


In [11]:
train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.2
                                                   )

In [12]:
print(len(train_x), len(train_y), len(test_x), len(test_y))
print(train_y.shape)

120 120 30 30
(120,)


In [14]:
idim = train_x.shape[0]
hdim = 100
odim = len(np.unique(train_y))

alpha = 0.01 # learning rate
reg_lambda = 0.01 # Regularization hyper parameter

In [16]:
np.random.seed(123)
model = {'W1':None, 'b1':None, 'W2': None, 'b2':None}

$ Y = b + WX $

In [18]:
def forward_prop(model, x):
    W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']
    
    # inp & hidden layer
    z1 = x.dot(W1)+b1
    a1 = np.tanh(z1)
    
    # hidden layer & output
    z2 = a1.dot(W2)+b2
    exp_scores = np.exp(z2)
    probs = exp_scores/np.sum(exp_scores, axis=1, keepdims=True)
    
    return probs  
    

In [25]:
def get_loss(model, x):
    probs = forward_prop(model, x)
    
    targets = -np.log(probs[range(train_x), train_y])
    loss = np.sum(targets)
    
    loss += reg_lambda/2 * (np.sum(np.square(model['W1'])) + np.sum(np.square(model['W2'])))
    
    return 1./len(train_X) * loss
    

In [21]:
def predict(model, x):
    probs = forward_prop(model, x)

    return np.argmax(probs, axis=1)
    

In [24]:
def get_accuract(model, x):
    prediction = predict(model, x)
    accuracy = np.sum(prediction == y)/len(x)
    
    return accuracy

In [27]:
def trainer(hdim, epochs):
    W1 = np.random.rand(idim, hdim)/np.sqrt(idim)
    b1 = np.zeros((1, hdim))
    W2 = np.random.rand(hdim, odim)/np.sqrt(idim)
    b2 = np.zeros((1, hdim))
    
    model = {'W1':W1, 'b1':b1, 'W2': W2, 'b2':b2}
    
    for epoch in range(epochs):
        # inp & hidden layer
        z1 = train_x.dot(W1)+b1
        a1 = np.tanh(z1)
        # hidden layer & output
        z2 = a1.dot(W2)+b2
        exp_scores = np.exp(z2)
        probs = exp_scores/np.sum(exp_scores, axis=1, keepdims=True)

        # Backpropagation
        delta3 = probs
        delta3[range(len(train_x)), train_y] -= 1   # x -= 1  ==> x = 1 -x
        dW2 = (a1.T).dot(delta3)
        db2 = np.sum(delta3, axis=0, keepdims=True)
        
        delta2 = delta3.dot(W2.T)*(1-np.power(a1, 2))
        dW1 = np.dot(train_x.T, delta2)
        db1 = np.dot(delta2, axis=0)
        
        # add regularization
        dW1 += reg_lambda * W1
        dW2 += reg_lambda * W2
        
        # gradient calculation
        W1 += -alpha * dW1           
        b1 += -alpha * db1
        W2 += -alpha * dW2
        b2 += -alpha * db2
        
        
        
        
        