# Week 10 - Multinomial Logistic Regression

In [10]:
# multi-label confusion matrix
import numpy as np
import pandas as pd
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix, multilabel_confusion_matrix, classification_report


# create data
features, multi_class = make_classification(n_samples=100, n_features=4,  
                                             n_informative=4, n_redundant=0, 
                                             n_classes=3, random_state=13)
# create a dataframe of the features and add the binary class (label, output)
df = pd.DataFrame(features)
df.columns = ['Feature_1', 'Feature_2', 'Feature_3', 'Feature_4']
df['Class'] = multi_class

# train test split
X_train, X_test, y_train, y_test = train_test_split(df.drop(['Class'], axis=1), df['Class'], test_size=0.25, random_state=42)
X_train.head()

# create model
model = LogisticRegression(solver='liblinear', random_state=42)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# create matrix number map
print(np.array(range(1, 10)).reshape(3, 3))
print()
print(confusion_matrix(y_test, y_pred))
print()
print(pd.crosstab(y_test, y_pred, rownames=['Actual'], colnames=['Predicted'], margins=True))
print()
print(classification_report(y_test, y_pred))

[[1 2 3]
 [4 5 6]
 [7 8 9]]

[[6 2 1]
 [2 7 0]
 [0 0 7]]

Predicted  0  1  2  All
Actual                 
0          6  2  1    9
1          2  7  0    9
2          0  0  7    7
All        8  9  8   25

              precision    recall  f1-score   support

           0       0.75      0.67      0.71         9
           1       0.78      0.78      0.78         9
           2       0.88      1.00      0.93         7

    accuracy                           0.80        25
   macro avg       0.80      0.81      0.81        25
weighted avg       0.80      0.80      0.80        25



In [11]:
# https://www.analyticsvidhya.com/blog/2021/06/confusion-matrix-for-multi-class-classification/
multilabel_confusion_matrix(y_test, y_pred)

array([[[14,  2],
        [ 3,  6]],

       [[14,  2],
        [ 2,  7]],

       [[17,  1],
        [ 0,  7]]], dtype=int64)

### Class 0

In [12]:
print(np.array(range(1, 10)).reshape(3, 3))
print()
print(confusion_matrix(y_test, y_pred))
print()
print(multilabel_confusion_matrix(y_test, y_pred)[0])
print(classification_report(y_test, y_pred))

[[1 2 3]
 [4 5 6]
 [7 8 9]]

[[6 2 1]
 [2 7 0]
 [0 0 7]]

[[14  2]
 [ 3  6]]
              precision    recall  f1-score   support

           0       0.75      0.67      0.71         9
           1       0.78      0.78      0.78         9
           2       0.88      1.00      0.93         7

    accuracy                           0.80        25
   macro avg       0.80      0.81      0.81        25
weighted avg       0.80      0.80      0.80        25



Class 0
* TN = add cells 5, 6, 8, 9
* FP = add cells 4, 7
* FN = add cells 2, 3
* TP = cell 1

### Class 1

In [13]:
print(np.array(range(1, 10)).reshape(3, 3))
print()
print(confusion_matrix(y_test, y_pred))
print()
print(multilabel_confusion_matrix(y_test, y_pred)[1])
print(classification_report(y_test, y_pred))

[[1 2 3]
 [4 5 6]
 [7 8 9]]

[[6 2 1]
 [2 7 0]
 [0 0 7]]

[[14  2]
 [ 2  7]]
              precision    recall  f1-score   support

           0       0.75      0.67      0.71         9
           1       0.78      0.78      0.78         9
           2       0.88      1.00      0.93         7

    accuracy                           0.80        25
   macro avg       0.80      0.81      0.81        25
weighted avg       0.80      0.80      0.80        25



Class 1
* TN = add the corners cells 1, 3, 7, 9
* FP = add cells 2, 8
* FN = add cells 4, 6
* TP = cell 5

### Class 2

In [14]:
print(np.array(range(1, 10)).reshape(3, 3))
print()
print(confusion_matrix(y_test, y_pred))
print()
print(multilabel_confusion_matrix(y_test, y_pred)[2])
print(classification_report(y_test, y_pred))

[[1 2 3]
 [4 5 6]
 [7 8 9]]

[[6 2 1]
 [2 7 0]
 [0 0 7]]

[[17  1]
 [ 0  7]]
              precision    recall  f1-score   support

           0       0.75      0.67      0.71         9
           1       0.78      0.78      0.78         9
           2       0.88      1.00      0.93         7

    accuracy                           0.80        25
   macro avg       0.80      0.81      0.81        25
weighted avg       0.80      0.80      0.80        25



Class 2
* TN = add the corners cells 1, 2, 4, 5
* FP = add cells 3, 6
* FN = add cells 7, 8
* TP = cell 9

### Test Statistics

* tn = pred 0 actual 0
* fp = pred 1 actual 0
* fn = pred 0 actual 1
* tp = pred 1 actual 1
* tn = cases except for the values of the class for which we are computing the values, the sum of all columns and rows
* fp = sum of values of the columns except tp
* fn = sum of values of the row except for tp
* tp = case where the predicted values match the actual values

https://www.projectpro.io/recipes/explain-multiclass-confusion-matrix