## Logistic Regression For Multiclass Classification

In [1]:
# make a prediction with a multinomial logistic regression model
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression

In [2]:
# define dataset
X, y = make_classification(n_samples=1000, n_features=10, n_informative=5, n_redundant=5, n_classes=3, random_state=1)

In [3]:
X[0]

array([ 1.89149379, -0.39847585,  1.63856893,  0.01647165,  1.51892395,
       -3.52651223,  1.80998823,  0.58810926, -0.02542177, -0.52835426])

In [4]:
# define the multinomial logistic regression model For multiclass classification
model = LogisticRegression(multi_class='ovr', solver='lbfgs')

In [5]:
# Train test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=48)

In [6]:
# trianing model
model.fit(X_train, y_train)

In [7]:
y_pred = model.predict(X_test)

In [8]:
y_pred

array([1, 2, 0, 0, 0, 2, 0, 1, 1, 1, 0, 1, 0, 0, 2, 1, 0, 1, 2, 1, 1, 0,
       2, 2, 0, 1, 2, 0, 2, 0, 2, 2, 1, 1, 1, 2, 0, 1, 0, 2, 1, 2, 0, 1,
       1, 1, 2, 1, 2, 2, 1, 0, 2, 0, 2, 1, 1, 0, 2, 1, 2, 1, 0, 0, 0, 2,
       2, 0, 2, 0, 0, 1, 0, 1, 0, 0, 2, 2, 2, 0, 1, 0, 1, 1, 2, 1, 0, 0,
       1, 1, 0, 0, 1, 1, 2, 1, 2, 1, 0, 0, 0, 1, 1, 0, 2, 1, 0, 1, 0, 0,
       1, 0, 2, 2, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 1, 1, 0, 2, 1, 1,
       0, 1, 0, 2, 2, 2, 0, 1, 0, 0, 2, 2, 2, 1, 1, 2, 0, 1, 1, 1, 2, 0,
       0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 2, 1, 1, 0, 0, 2, 1, 0, 1, 1, 0,
       2, 0, 2, 0, 1, 2, 0, 1, 1, 0, 1, 2, 2, 1, 1, 2, 1, 2, 1, 1, 0, 1,
       2, 1])

In [9]:
model.predict_proba(X_test)

array([[0.22113241, 0.76966054, 0.00920704],
       [0.01794311, 0.40652621, 0.57553068],
       [0.75178736, 0.17760859, 0.07060405],
       [0.52564689, 0.12401068, 0.35034242],
       [0.53797853, 0.10851332, 0.35350815],
       [0.01448608, 0.2608887 , 0.72462522],
       [0.59530452, 0.37704865, 0.02764684],
       [0.40294365, 0.5789616 , 0.01809476],
       [0.00388864, 0.5264783 , 0.46963306],
       [0.38313295, 0.46283966, 0.15402739],
       [0.55713124, 0.07105919, 0.37180957],
       [0.1832067 , 0.58871281, 0.22808049],
       [0.38741036, 0.38310624, 0.2294834 ],
       [0.63109999, 0.35596211, 0.01293789],
       [0.15842284, 0.01464192, 0.82693525],
       [0.32362383, 0.60238132, 0.07399485],
       [0.46500224, 0.36607909, 0.16891867],
       [0.0587278 , 0.91754807, 0.02372413],
       [0.25389685, 0.08847273, 0.65763042],
       [0.15041961, 0.7812874 , 0.06829299],
       [0.06553444, 0.86047462, 0.07399094],
       [0.6150932 , 0.24914053, 0.13576627],
       [0.

In [10]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
print(confusion_matrix(y_pred, y_test))
print(accuracy_score(y_pred, y_test))
print(classification_report(y_pred, y_test))

[[45  7 21]
 [11 50 14]
 [ 2  8 42]]
0.685
              precision    recall  f1-score   support

           0       0.78      0.62      0.69        73
           1       0.77      0.67      0.71        75
           2       0.55      0.81      0.65        52

    accuracy                           0.69       200
   macro avg       0.70      0.70      0.68       200
weighted avg       0.71      0.69      0.69       200



## IRIS DATASET

In [11]:
from sklearn.datasets import load_iris

In [12]:
datasets = load_iris()

In [13]:
datasets

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

In [14]:
import pandas as pd
import numpy as np
df = pd.DataFrame(datasets.data, columns=datasets.feature_names)

In [15]:
df['target'] = datasets.target

In [16]:
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [17]:
datasets.target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [18]:
model1 = LogisticRegression(multi_class='ovr', solver='lbfgs')

In [19]:
df.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [20]:
X = df.iloc[:, :-1]
y = df.iloc[:,-1]

In [21]:
y

0      0
1      0
2      0
3      0
4      0
      ..
145    2
146    2
147    2
148    2
149    2
Name: target, Length: 150, dtype: int64

In [25]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=20, random_state=48)

In [31]:
model1.fit(X_train, y_train)

In [32]:
y_pred = model1.predict(X_test)

In [33]:
y_pred

array([1, 1, 2, 0, 2, 2, 0, 2, 0, 1, 2, 0, 0, 2, 1, 2, 0, 1, 1, 2])

In [34]:
model1.predict_proba(X_test)

array([[3.03679494e-02, 8.23939189e-01, 1.45692861e-01],
       [5.55060837e-03, 7.49448860e-01, 2.45000531e-01],
       [2.42178756e-04, 4.70957493e-01, 5.28800328e-01],
       [6.08492281e-01, 3.91504348e-01, 3.37160177e-06],
       [4.05085747e-03, 2.95897347e-01, 7.00051796e-01],
       [3.13952221e-04, 3.05754908e-01, 6.93931140e-01],
       [9.40885101e-01, 5.91115449e-02, 3.35424463e-06],
       [2.87200321e-03, 3.58091269e-01, 6.39036727e-01],
       [9.73546998e-01, 2.64515408e-02, 1.46080901e-06],
       [2.68183263e-02, 8.90187615e-01, 8.29940587e-02],
       [3.72906049e-04, 2.79328120e-01, 7.20298974e-01],
       [9.03035786e-01, 9.69627459e-02, 1.46849356e-06],
       [8.54607944e-01, 1.45387323e-01, 4.73300994e-06],
       [1.97204039e-03, 4.18713984e-01, 5.79313976e-01],
       [5.09303842e-03, 8.45520942e-01, 1.49386019e-01],
       [8.29266767e-03, 4.06600286e-01, 5.85107047e-01],
       [8.53961026e-01, 1.46034564e-01, 4.40989191e-06],
       [1.76484390e-02, 9.49081

In [35]:
from sklearn.metrics import confusion_matrix, accuracy_score, classification_report
print(confusion_matrix(y_pred, y_test))
print(accuracy_score(y_pred, y_test))
print(classification_report(y_pred, y_test))

[[6 0 0]
 [0 6 0]
 [0 2 6]]
0.9
              precision    recall  f1-score   support

           0       1.00      1.00      1.00         6
           1       0.75      1.00      0.86         6
           2       1.00      0.75      0.86         8

    accuracy                           0.90        20
   macro avg       0.92      0.92      0.90        20
weighted avg       0.93      0.90      0.90        20

