# Logistic Regression - Multiclass
- Iris dataset
- Multiclass classification

In [None]:
import os, sys, pathlib
UTILS_FOLDER = 'S00 - Utils'
curPath = os.getcwd()
parPath = pathlib.Path(curPath).parent
utilPath = os.path.join(parPath, UTILS_FOLDER)
for p in [curPath, str(parPath), utilPath]:
    sys.path.append(p)

In [None]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from utils import plot_ds

In [None]:
# Load data
iris = datasets.load_iris()

In [None]:
# Extract the last 2 columns
X = iris.data[:, 2:4]
y = iris.target

In [None]:
# Split data into training and testing data
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.3, random_state=1, stratify=y)

In [None]:
# Standardization
sc = StandardScaler()
sc.fit(X_train)
X_train_std = sc.transform(X_train)
X_test_std = sc.transform(X_test)

In [None]:
# Hyper-parameters
solver='lbfgs'
C=0.0001
max_iter=100

In [None]:
# Logistic regression object
lr = LogisticRegression(
    random_state=1,
    verbose=1,
    solver=solver,
    C=C,
    max_iter=max_iter)

In [None]:
# Training
lr.fit(X_train_std, y_train)

In [None]:
# Prediction from test data
y_pred = lr.predict(X_test_std)

In [None]:
# How class label is predicted from probability.
comb = np.concatenate((X_test_std, lr.predict_proba(X_test_std), y_pred.reshape(-1,1)), axis=1) 
temp = pd.DataFrame(comb, columns=['X1_std','X2_std','Prob of Class 0', 'Prob of Class 1', 'Prob of Class 2', 'Prediction'])
temp.head()

In [None]:
# Norm of weight coefficients (affected by C values)
print(f"Norm of W: {np.linalg.norm(lr.coef_)}")

In [None]:
# Printing results
sumMiss = (y_test != y_pred).sum()
accuracyScore = accuracy_score(y_test, y_pred)
print(f"Misclassified examples: {sumMiss}")
print(f"Accuracy score: {accuracyScore}")

In [None]:
# Plot decision regions
plot_ds(X_train_std, X_test_std, y_train, y_test, lr)

## Searching for best parameters

In [None]:
paramSet = {
    "ex1": {"solver": "lbfgs", "C": 0.0001, "max_iter": 100},
    "ex2": {"solver": "lbfgs", "C": 0.01, "max_iter": 100},
    "ex3": {"solver": "lbfgs", "C": 1, "max_iter": 100},
    "ex4": {"solver": "lbfgs", "C": 100, "max_iter": 100},
    "ex5": {"solver": "lbfgs", "C": 10000, "max_iter": 100},
}

In [None]:
for paramName, paramValue in paramSet.items():
    lr = LogisticRegression(
        random_state=1,
        verbose=0,
        solver=paramValue["solver"],
        C=paramValue["C"],
        max_iter=paramValue["max_iter"],
    )

    # Training
    lr.fit(X_train_std, y_train)

    # Prediction
    y_pred = lr.predict(X_test_std)

    # Misclassification from the test samples
    sumMiss = (y_test != y_pred).sum()

    # Accuracy score from the test samples
    accuracyScore = accuracy_score(y_test, y_pred)

    print(f"Parameters: {paramValue}")
    print(f"Misclassified examples: {sumMiss}")
    print(f"Accuracy score: {accuracyScore}")
    print(f"Norm of W: {np.linalg.norm(lr.coef_)}")
    print(f"--------------------------------------------------")
    # Plot decision regions
    plot_ds(
        X_train_std, X_test_std, y_train, y_test, lr)