**Description:** a multinomial Logistic Regression for the Iris dataset is implemented first using the scikitlearn library and then I create an algorithm to find the classes based on a batch gradient descent equations.

**Input:** Iris dataset. 
       A public available set of 150 flowers with details of the their Petal Length and Width

**Output:** Multiclass Prediction Classifier

**Version:** Python 3.6.9
        

In [80]:
# Python ≥3.6.9 is required
import sys
assert sys.version_info >= (3, 6, 9)

# Common imports
import numpy as np
import os

# to make this notebook's output stable across runs
np.random.seed(42)

# To plot pretty figures
%matplotlib inline
import matplotlib as mpl
import matplotlib.pyplot as plt
mpl.rc('axes', labelsize=14)
mpl.rc('xtick', labelsize=12)
mpl.rc('ytick', labelsize=12)

# Where to save the figures
PROJECT_ROOT_DIR = "."
SUBFOLDER_ID = "SGD"
IMAGES_PATH = os.path.join(PROJECT_ROOT_DIR, "images", SUBFOLDER_ID)
os.makedirs(IMAGES_PATH, exist_ok=True)

def save_fig(fig_id, tight_layout=True, fig_extension="png", resolution=300):
    path = os.path.join(IMAGES_PATH, fig_id + "." + fig_extension)
    print("Saving figure", fig_id)
    if tight_layout:
        plt.tight_layout()
    plt.savefig(path, format=fig_extension, dpi=resolution)

**Train and Test Sets**

In [81]:
# Load Iris Virginica Flowers Dataset
from sklearn import datasets

iris = datasets.load_iris()

# Training (125 instances) and Test Sets (25 instances)
X_train, X_test = iris["data"][:125, (2,3)], iris["data"][125:, (2,3)] # Petal Length (2) and Petal Width (3)
y_train, y_test = iris["target"][:125], iris["target"][125:]

# 1. Multinomial Logistic Regression - Using scikitlearn


In [82]:
# Train the Multinomial Logistic Regression
from sklearn.linear_model import LogisticRegression

softmax_reg = LogisticRegression(multi_class="multinomial", solver="lbfgs", C=10)
softmax_reg.fit(X_train,y_train)

LogisticRegression(C=10, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='multinomial', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [83]:
# Predict Class of a Flower X_test[0]. True = Correct Prediction, False = Incorrect prediction
softmax_reg.predict([X_test[10]]) == y_test[10]

array([ True])

In [84]:
softmax_reg.predict_proba([X_test[10]])

array([[3.12177997e-11, 7.20014719e-04, 9.99279985e-01]])

# 2. Multinomial Logistic Regression
**Implementing Batch Gradient Descent Algorithm and Using Softmax Functions**

In [85]:
import numpy as np

In [86]:
# Batch SGD Parameters
eta = 0.1                 # Learning Rate
n_iteractions = 1000      # Total number of Interactions

In [87]:
m = X_train.shape[0]      # row of X_train = 125
n = X_train.shape[1]      # columns of X_train = 2
k = 3                     # Number of Classes in y

In [88]:
theta = np.random.randn(n+1,k)

In [89]:
yk = np.c_[ (y_train==0) , (y_train==1), (y_train==2) ].astype(int)

In [90]:
X_t = np.c_[np.ones((m,1)), X_train].T

In [91]:
# Batch SGD Search Algorithm
gradients = np.zeros(theta.shape)
gk = np.zeros(X_t.T.shape)  

for iteration in range(n_iteractions):
  Sk_theta = np.matmul(X_t.T, theta)

  pk = np.zeros(Sk_theta.shape)
  for i in range(len(Sk_theta)):
    pk[i] = np.exp(Sk_theta[i]) / sum(np.exp(Sk_theta[i]))
  
  ak = pk - yk

  for k in range(Sk_theta.shape[1]):
    gk = np.multiply( ak[:,k].reshape(X_t.T.shape[0],1), X_t.T)
    gradients[:,k] = (1 / m) * gk.sum(axis=0)

  theta = theta - eta * gradients

theta

array([[ 5.08249442, -0.27828195, -3.79807408],
       [-0.76116402,  1.06607106,  0.74983249],
       [-0.88659123, -0.03475194,  2.79851632]])

**Predictions**

In [92]:
# Predict Class of a Flower X_train[0]
X_tst = np.c_[np.ones((X_test.shape[0],1)), X_test].T
X_p = X_tst.T[10]
X_p

array([1. , 6.1, 2.3])

In [93]:
yk_tst = np.c_[ (y_test==0) , (y_test==1), (y_test==2) ].astype(int)
yk_tst[10,:]

array([0, 0, 1])

In [94]:
Sk_theta_pr = np.matmul(X_p, theta)

pr = np.exp(Sk_theta_pr) / sum(np.exp(Sk_theta_pr))

pr    # Probability of Each Class

array([1.10789745e-04, 2.55818181e-01, 7.44071029e-01])

In [95]:
(pr > 0.5)      # Class Predictions for the Training Set element 0

array([False, False,  True])

# Pretty Graphs