In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder

In [2]:
df = pd.read_csv('Iris.csv')
df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [3]:
X = df.iloc[:, 1:-1].values
X = (X - np.mean(X)) / np.std(X)
Y = LabelEncoder().fit_transform(df.iloc[:, -1])
onehot = np.zeros((Y.shape[0], np.unique(Y).shape[0]))
onehot[range(Y.shape[0]),Y] = 1.0

In [4]:
learning_rate = 0.005
prob = 0.5
EPOCH = 50

W1 = np.random.randn(X.shape[1], 50) / np.sqrt(df.shape[1])
b1 = np.zeros((50))
W2 = np.random.randn(50, 100) / np.sqrt(50)
b2 = np.zeros((100))
W3 = np.random.randn(100, np.unique(Y).shape[0]) / np.sqrt(100)
b3 = np.zeros((np.unique(Y).shape[0]))

In [5]:
def alpha_dropout(x, prob,alpha=-1.7580993408473766, mean=0.0, var=1.0):
    keep_prob = 1.0 - prob
    random_tensor = np.random.uniform(size=x.shape) + keep_prob
    binary_tensor = np.floor(random_tensor)
    ret = x * binary_tensor + alpha * (1-binary_tensor)
    a = np.sqrt(var / (keep_prob *((1-keep_prob) * np.square(alpha-mean) + var)))
    b = mean - a * (keep_prob * mean + (1 - keep_prob) * alpha)
    return a * ret + b, (a, binary_tensor)

def grad_alpha_dropout(x, de, cached):
    a, binary_tensor = cached
    dret = de * a
    return dret * binary_tensor

def selu(X, grad=False):
    alpha = 1.6732632423543772848170429916717
    scale = 1.0507009873554804934193349852946
    if grad:
        return scale * np.where(X >= 0, 1, alpha * np.exp(X))
    else:
        return scale * np.where(X >= 0, X, alpha * (np.exp(X) - 1))
    
def softmax(X, grad=False):
    if grad:
        p = softmax(X)
        return p * (1-p)
    else:
        e_x = np.exp(X - np.max(X, axis=-1, keepdims=True))
        return e_x / np.sum(e_x, axis=-1, keepdims=True)
    
def cross_entropy(X, Y, grad=False):
    if grad:
        X = np.clip(X, 1e-15, 1 - 1e-15)
        return -(Y / X) + (1 - Y) / (1 - X)
    else:
        X = np.clip(X, 1e-15, 1 - 1e-15)
        return -Y * np.log(X) - (1 - Y) * np.log(1 - X)

In [6]:
for i in range(EPOCH):
    z1 = X.dot(W1) + b1
    a1 = selu(z1,False)
    d1, cached1 = alpha_dropout(a1, prob)
    z2 = d1.dot(W2) + b2
    a2 = selu(z2,False)
    d2, cached2 = alpha_dropout(a2, prob)
    z3 = d2.dot(W3) + b3
    y_hat = softmax(z3,False)
    accuracy = np.mean(np.argmax(y_hat,axis = 1) == np.argmax(onehot,axis = 1))
    cost = np.mean(cross_entropy(y_hat,onehot))
    dy_hat = cross_entropy(y_hat,onehot, grad=True)
    dz3 = softmax(z3, True) * dy_hat
    dW3 = d2.T.dot(dz3)
    db3 = np.sum(dz3, axis=0)
    dd2 = dz3.dot(W3.T)
    da2 = grad_alpha_dropout(a2, dd2, cached2)
    dz2 = selu(z2,True) * da2
    dW2 = d1.T.dot(dz2)
    db2 = np.sum(dz2, axis=0)
    dd1 = dz2.dot(W2.T)
    da1 = grad_alpha_dropout(a1, dd1, cached1)
    dz1 = selu(z1,True) * da1
    dW1 = X.T.dot(dz1)
    db1 = np.sum(dz1, axis=0)
    W1 += -learning_rate * dW1
    b1 += -learning_rate * db1
    W2 += -learning_rate * dW2
    b2 += -learning_rate * db2
    W3 += -learning_rate * dW3
    b3 += -learning_rate * db3
    print('epoch %d, accuracy %f, cost %f'%(i, accuracy, cost))

epoch 0, accuracy 0.300000, cost 0.814855
epoch 1, accuracy 0.460000, cost 0.659884
epoch 2, accuracy 0.386667, cost 0.657797
epoch 3, accuracy 0.506667, cost 0.557343
epoch 4, accuracy 0.593333, cost 0.509546
epoch 5, accuracy 0.560000, cost 0.532756
epoch 6, accuracy 0.640000, cost 0.471053
epoch 7, accuracy 0.573333, cost 0.554835
epoch 8, accuracy 0.620000, cost 0.494645
epoch 9, accuracy 0.620000, cost 0.482041
epoch 10, accuracy 0.540000, cost 0.473024
epoch 11, accuracy 0.646667, cost 0.414588
epoch 12, accuracy 0.653333, cost 0.389835
epoch 13, accuracy 0.700000, cost 0.412783
epoch 14, accuracy 0.693333, cost 0.374972
epoch 15, accuracy 0.666667, cost 0.395956
epoch 16, accuracy 0.720000, cost 0.434741
epoch 17, accuracy 0.673333, cost 0.378635
epoch 18, accuracy 0.740000, cost 0.348896
epoch 19, accuracy 0.720000, cost 0.379456
epoch 20, accuracy 0.720000, cost 0.416553
epoch 21, accuracy 0.633333, cost 0.416789
epoch 22, accuracy 0.760000, cost 0.350133
epoch 23, accuracy 0.