In [None]:
import numpy as np

In [None]:
from sklearn.datasets import load_iris
iris = load_iris(as_frame=True)

Add bias term

In [None]:
X = iris.data[["petal length (cm)", "petal width (cm)"]].values
y = iris["target"].values
X_b = np.insert(X,0,1,axis=1)
X_b

Split the data set into training set, validation set, test set.

In [None]:
train_size = int(0.6*len(X))
test_size = int(0.2*len(X))
val_size = int(0.2*len(X))
shuffle = np.random.permutation(len(X))

X_b = X_b[shuffle]
y = y[shuffle]

X_train = X_b[:train_size]
y_train = y[:train_size]
X_val = X_b[train_size:train_size+val_size]
y_val = y[train_size:train_size+val_size]
X_test = X_b[train_size+val_size:]
y_test = y[train_size+val_size:]

One hot vectorization for classification.

In [None]:
y_train_class = np.eye(3)[y_train]
y_test_class = np.eye(3)[y_test]
y_val_class = np.eye(3)[y_val]

Standardization

In [None]:
mean = X_train[:,1:].mean(axis=0)
std = X_train[:,1:].std(axis=0)
X_train[:, 1:] = (X_train[:, 1:] - mean) / std
X_val[:, 1:] = (X_val[:, 1:] - mean) / std
X_test[:, 1:] = (X_test[:, 1:] - mean) / std

Softmax

$\sigma\left(\mathbf{s}(\mathbf{x})\right)_k = \dfrac{\exp\left(s_k(\mathbf{x})\right)}{\sum\limits_{j=1}^{K}{\exp\left(s_j(\mathbf{x})\right)}}$

In [None]:
def softmax(x):
    exps = np.exp(x)
    exp_sums = exps.sum(axis=1, keepdims=True)
    return exps / exp_sums

In [None]:
n_inputs = X_train.shape[1]
print(n_inputs)
n_outputs = len(np.unique(y_train))
print(n_outputs)

3
3


In [None]:
def calculate_loss(y,A,epilson):
  loss = -(y * np.log(A+epilson))
  return loss

In [None]:
def calculate_cost(loss):
  cost = np.sum(loss) / len(X_train)
  return cost

In [None]:
def gradient(A,X,Y):
  gradients = 1 / len(X) * X.T @ (A-Y)
  return gradients

In [None]:
def update(gradient,theta,learning_rate):
  new_theta = theta - learning_rate * gradient
  return new_theta

Training

In [None]:
num_iterations = 30001
learning_rate = 0.1
epilson = 1e-5
costs = []
np.random.seed(42)
theta = np.random.rand(n_inputs,n_outputs)

best_val_loss = np.inf  # initialize best validation loss
patience = 1000  # number of iterations to wait before stopping
wait = 0  # counter for how long to wait
prev_cost = np.inf 
for i in range(num_iterations):
    B_training = X_train @ theta
    A = softmax(B_training)
    B_val = softmax(X_val @ theta)
    loss = calculate_loss(y_val_class,B_val,epilson)
    val_total_cost = loss.sum(axis=1).mean()
    costs.append(val_total_cost)
    if i % 500 == 0:
        print(i, val_total_cost)
    if val_total_cost < prev_cost:
      wait = 0
    if val_total_cost > prev_cost:
      wait += 1
      if wait >= patience:
        print(i,val_total_cost, prev_cost)
        print("Stopping early due to increasing cost")
        break
    prev_cost = val_total_cost
    gradients = gradient(A,X_train,y_train_class)
    theta = theta - learning_rate * gradients

0 1.0920339628358051
500 0.2149770617605408
1000 0.18071820409577685
1500 0.17242847862385752
2000 0.17098106361702115
2500 0.1721146265841299
2959 0.1741858542728771 0.1741806895956106
Stopping early due to increasing cost


In [None]:
theta

array([[-0.09821036,  3.9408568 , -1.78539808],
       [-3.52002429,  0.0556803 ,  4.37501564],
       [-3.60455799,  0.07686006,  5.0530727 ]])

In [None]:
logits = X_val @ theta
Y_proba = softmax(logits)
y_predict = Y_proba.argmax(axis=1)

accuracy_score = (y_predict == y_val).mean()
accuracy_score

0.9