In [1]:
import numpy as np
import time
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.metrics import accuracy_score, recall_score, precision_score
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB

In [2]:
# Import the Iris dataset
iris = datasets.load_iris()
x_data = iris.data[:, :4]
y_data = iris.target
C = 3  # Number of classes

In [3]:
# Split the training set and validation set
x_train, x_test, y_train, y_test = train_test_split(x_data, y_data,
                                                    test_size=0.2, random_state=42)

In [4]:
def softmax(V):
    e_V = np.exp(V - np.max(V, axis=0, keepdims=True))
    Z = e_V / e_V.sum(axis=0)
    return Z


# cost or loss function
def cost(Y, Yhat):
    return -np.sum(Y * np.log(Yhat)) / Y.shape[0]

def ann(d0, d1, d2):
    # ANN
    h = d1  # size of hidden layer
    C = d2
    # initialize parameters randomly
    W1 = 0.01 * np.random.randn(d0, d1)
    b1 = np.zeros((d1, 1))
    W2 = 0.01 * np.random.randn(d1, d2)
    b2 = np.zeros((d2, 1))
    
    N = x_data.shape[1]
    eta = 1  # learning rate
    
    # Gradient descent method
    for i in range(10000):
        ## Feedforward
        Z1 = np.dot(W1.T, x_train.T) + b1
        A1 = np.maximum(Z1, 0)
        Z2 = np.dot(W2.T, A1) + b2
        Yhat = softmax(Z2)
    
        # print loss after each 1000 iterations
        if i % 1000 == 0:
            # compute the loss: average cross-entropy loss
            loss = cost(y_train, Yhat)
            print("iter %d, loss: %f" % (i, loss))
    
        # backpropagation
        E2 = (Yhat - y_train) / N
        dW2 = np.dot(A1, E2.T)
        db2 = np.sum(E2, axis=1, keepdims=True)
        E1 = np.dot(W2, E2)
        E1[Z1 <= 0] = 0  # gradient of ReLU
        dW1 = np.dot(x_train.T, E1.T)
        db1 = np.sum(E1, axis=1, keepdims=True)
    
        # Gradient Descent update
        W1 += -eta * dW1
        b1 += -eta * db1
        W2 += -eta * dW2
        b2 += -eta * db2
    
    return W1, b1, W2, b2

In [5]:
W1, b1, W2, b2 = ann(4, 100, 3)
Z1 = np.dot(W1.T, x_train.T) + b1
A1 = np.maximum(Z1, 0)
Z2 = np.dot(W2.T, A1) + b2
predicted_class = np.argmax(Z2, axis=0)
acc = 100 * np.mean(predicted_class == y_train)
print('training accuracy: %.2f %%' % (acc))

iter 0, loss: 3.268375
iter 1000, loss: nan
iter 2000, loss: nan


  e_V = np.exp(V - np.max(V, axis=0, keepdims=True))


iter 3000, loss: nan
iter 4000, loss: nan
iter 5000, loss: nan
iter 6000, loss: nan
iter 7000, loss: nan
iter 8000, loss: nan
iter 9000, loss: nan
training accuracy: 33.33 %


In [6]:
# Multinomial Logistic Regression (Softmax) approach
# Train model
start_train_softmax = time.perf_counter() # start of training time
softmax_approach = LogisticRegression()
softmax_approach.fit(x_train, y_train)
end_train_softmax = time.perf_counter() # end of training time
# Predict results
start_test_softmax = time.perf_counter() # start of predicting time
y_pred_softmax = softmax_approach.predict(x_test)
end_test_softmax = time.perf_counter() # start of predicting time
# Print accuracy, recall, precision score
print(f'Accuracy score: {accuracy_score(y_test, y_pred_softmax)}')
print(f'Training time: {end_train_softmax - start_train_softmax}')
print(f'Predicting time: {end_test_softmax - start_test_softmax}')

Accuracy score: 1.0
Training time: 0.009376399997563567
Predicting time: 0.00022139999782666564


In [7]:
# Naive Bayes approach
# Train model
start_train_naive = time.perf_counter() # start of training time
naive_bayes = GaussianNB()
naive_bayes.fit(x_train, y_train)
end_train_naive = time.perf_counter() # end of training time
# Predict result
start_test_naive = time.perf_counter() # start of predicting time
y_pred_naive = naive_bayes.predict(x_test)
end_test_naive = time.perf_counter() # start of predicting time
# Print accuracy, recall, precision score
print(f'Accuracy score: {accuracy_score(y_test, y_pred_naive)}')
print(f'Training time: {end_train_naive - start_train_naive}')
print(f'Predicting time: {end_test_naive - start_test_naive}')

Accuracy score: 1.0
Training time: 0.0011805000031017698
Predicting time: 0.00021189999824855477


In [8]:
W1_75, b1_75, W2_75, b2_75 = ann(4, 75, 3)
Z1_75 = np.dot(W1_75.T, x_train.T) + b1_75
A1_75 = np.maximum(Z1_75, 0)
Z2_75 = np.dot(W2_75.T, A1_75) + b2_75
predicted_class_75 = np.argmax(Z2_75, axis=0)
acc_75 = 100 * np.mean(predicted_class_75 == y_train)
print('training accuracy: %.2f %%' % (acc_75))

iter 0, loss: 3.268379
iter 1000, loss: nan
iter 2000, loss: nan


  e_V = np.exp(V - np.max(V, axis=0, keepdims=True))


iter 3000, loss: nan
iter 4000, loss: nan
iter 5000, loss: nan
iter 6000, loss: nan
iter 7000, loss: nan
iter 8000, loss: nan
iter 9000, loss: nan
training accuracy: 33.33 %


In [9]:
W1_50, b1_50, W2_50, b2_50 = ann(4, 50, 3)
Z1_50 = np.dot(W1_50.T, x_train.T) + b1_50
A1_50 = np.maximum(Z1_50, 0)
Z2_50 = np.dot(W2_50.T, A1_50) + b2_50
predicted_class_50 = np.argmax(Z2_50, axis=0)
acc_50 = 100 * np.mean(predicted_class_50 == y_train)
print('training accuracy: %.2f %%' % (acc_50))

iter 0, loss: 3.268384
iter 1000, loss: nan
iter 2000, loss: nan


  e_V = np.exp(V - np.max(V, axis=0, keepdims=True))


iter 3000, loss: nan
iter 4000, loss: nan
iter 5000, loss: nan
iter 6000, loss: nan
iter 7000, loss: nan
iter 8000, loss: nan
iter 9000, loss: nan
training accuracy: 33.33 %
