In [None]:
!pip install ucimlrepo

Collecting ucimlrepo
  Downloading ucimlrepo-0.0.7-py3-none-any.whl.metadata (5.5 kB)
Downloading ucimlrepo-0.0.7-py3-none-any.whl (8.0 kB)
Installing collected packages: ucimlrepo
Successfully installed ucimlrepo-0.0.7


In [None]:
import numpy as np

In [None]:
# Loading Iris Dataset
from ucimlrepo import fetch_ucirepo

# fetch dataset
iris = fetch_ucirepo(id=53)

# data (as pandas dataframes)
X = iris.data.features
y = iris.data.targets

In [None]:
print(X.info())
print(y.info())

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal length  150 non-null    float64
 1   sepal width   150 non-null    float64
 2   petal length  150 non-null    float64
 3   petal width   150 non-null    float64
dtypes: float64(4)
memory usage: 4.8 KB
None
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 1 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   class   150 non-null    object
dtypes: object(1)
memory usage: 1.3+ KB
None


In [None]:
# Preprocessing the data
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

# Standardizing features, because we are using a gradient based algorithm
X = (X - X.mean(axis=0)) / X.std(axis=0)

# OneHotEncoding labels, because they are given as strings
encoder = OneHotEncoder(sparse_output=False)
y = encoder.fit_transform(y)

X = np.array(X)
y = np.array(y)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Creating variables
np.random.seed(42) # Setting a seed, so the code is reproducible

n_instances = X_train.shape[0]
n_features = X_train.shape[1]
n_classes = y_train.shape[1]

W = np.random.randn(n_features, n_classes) # Matrix of weights
b = np.zeros((1, n_classes))

# Creating hyperparameters
learning_rate = 0.1
max_epochs = 1000

In [None]:
def softmax(logits): # The model we are going to optimize
  exp_logits = np.exp(logits - np.max(logits, axis=1, keepdims=True))
  return exp_logits / np.sum(exp_logits, axis=1, keepdims=True)

def loss(y, y_predicted, m): # Cross-entropy loss function
  return -np.sum(y * np.log(y_predicted)) / m # m = n_instances

def gradient(y_predicted, y, m): # m = n_instances
  error = y_predicted - y
  dW = X_train.T @ error / m # gradient of W weight matrix
  db = np.sum(error, axis=0) / m # Gradient of b bias
  return dW, db

In [None]:
n_epochs = 1000 # Number of epochs (how many times model goes through whole dataset once)
patience = 15 # Number of epochs to stop training if there is no improvement
count = patience # Counter that will be usede for early stopping
best_loss = np.inf # Variable that stores the best loss

# Softmax regression model training
for epoch in range(n_epochs):
  logits = X_train @ W + b # Logit = raw output score
  y_prediction = softmax(logits) # Predicting using softmax

  current_loss = loss(y_train, y_prediction, n_instances) # Computing current loss

  if current_loss < best_loss: # Early stopping mechanism
    best_loss = current_loss
    count = patience # Counter
  else:
    count -= 1 # Counter decrease

  if count == 0: # If statement for Early stopping
    print(f'Early stopping has occured at {epoch} epoch.')

  dW, db = gradient(y_prediction, y_train, n_instances) # Getting gradients
  W = W - learning_rate * dW # Applying changes
  b = b - learning_rate * db # Applying changes

In [None]:
# Testing the model
test_logits = X_test @ W + b
y_test_pred = softmax(test_logits)

y_test_pred_class = np.argmax(y_test_pred, axis=1) # Getting class label of highest prediction
y_test_actual_class = np.argmax(y_test, axis=1)  # True labels from one-hot encoding

accuracy = np.mean(y_test_pred_class == y_test_actual_class)
print(f'Accuracy score for Softmax regression model: {round(accuracy, 2) * 100}%')

Accuracy score for Softmax regression model: 100.0%
