In [6]:
from sklearn import datasets
import numpy as np

iris = datasets.load_iris()
X = iris.data[:, [2, 3]]
X_with_bias = np.c_[np.ones([len(X), 1]), X]
y = iris.target

In [7]:
test_ratio = 0.2
validation_ratio = 0.2

total_size = len(X_with_bias)
test_size = int(total_size * test_ratio)
validation_size = int(total_size * validation_ratio)
train_size = total_size - test_size - validation_size

random = np.random.permutation(total_size)
X_train = X_with_bias[random[:train_size]]
y_train = y[random[:train_size]]
X_validation = X_with_bias[random[train_size:-test_size]]
y_validation = y[random[train_size:-test_size]]
X_test = X_with_bias[random[-test_size:]]
y_test = y[random[-test_size:]]

In [9]:
def one_hot(y):
    n_classes = y.max() + 1
    m = len(y)
    Y_one_hot = np.zeros((m, n_classes))
    Y_one_hot[np.arange(m), y] = 1
    return Y_one_hot

y_train_one_hot = one_hot(y_train)
y_validation_one_hot = one_hot(y_validation)
y_test_one_hot = one_hot(y_test)

In [10]:
def softmax(logits):
    exps = np.exp(logits)
    exp_sums = np.sum(exps, axis=1, keepdims=True)
    return exps / exp_sums

In [11]:
n_in = X_train.shape[1]
n_out = len(np.unique(y_train))

In [14]:
lr = 0.01
n_iterations = 5001
m = len(X_train)
epsilon = 1e-7

weights = np.random.randn(n_in, n_out)

for iteration in range(n_iterations):
    logits = X_train.dot(weights)
    y_proba = softmax(logits)
    loss = -np.mean(np.sum(y_train_one_hot * np.log(y_proba + epsilon), axis=1))
    error = y_proba - y_train_one_hot
    if iteration % 500 == 0:
        print(iteration, loss)
    gradients = 1 / m * X_train.T.dot(error)
    weights = weights - lr * gradients

0 6.016748934029294
500 0.8899144254395553
1000 0.7393795511082445
1500 0.6447666177283835
2000 0.580355717625486
2500 0.5333106913421415
3000 0.4970597540028746
3500 0.4679976772883212
4000 0.44399717159106394
4500 0.42371951057059093
5000 0.40627579737341507


In [16]:
logits = X_validation.dot(weights)
y_proba = softmax(logits)
y_predict = np.argmax(y_proba, axis=1)
accuracy = np.mean(y_predict == y_validation)
print("accuracy", accuracy)

accuracy 0.8666666666666667
