In [1]:
import warnings
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold, RandomizedSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
%matplotlib inline
warnings.filterwarnings('ignore')

In [3]:
X, y = make_classification(n_samples=1000, n_features=10, n_informative=3, n_classes=3, random_state=15)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

In [9]:
model1 = LogisticRegression(multi_class='ovr')
model1.fit(X_train, y_train)

In [10]:
y_pred = model1.predict(X_test)
y_pred

array([2, 1, 2, 1, 1, 0, 0, 0, 2, 0, 2, 1, 2, 2, 2, 2, 2, 0, 0, 2, 2, 1,
       1, 1, 1, 0, 0, 0, 2, 1, 0, 2, 2, 1, 2, 0, 0, 2, 2, 1, 2, 2, 2, 1,
       2, 0, 1, 2, 0, 1, 0, 0, 0, 1, 1, 2, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0,
       2, 1, 0, 1, 0, 1, 2, 1, 2, 2, 1, 0, 1, 0, 1, 0, 1, 2, 2, 0, 1, 2,
       2, 1, 1, 2, 2, 0, 0, 0, 2, 2, 0, 1, 2, 1, 2, 1, 0, 2, 0, 2, 0, 1,
       2, 1, 2, 2, 1, 1, 1, 1, 2, 0, 2, 0, 1, 2, 0, 0, 2, 2, 2, 1, 2, 0,
       2, 2, 0, 0, 0, 2, 0, 2, 0, 1, 2, 1, 1, 2, 0, 0, 1, 1, 2, 2, 2, 1,
       2, 0, 2, 2, 2, 1, 0, 2, 0, 0, 2, 0, 2, 0, 0, 1, 2, 0, 1, 1, 1, 1,
       0, 2, 1, 0, 0, 1, 2, 2, 2, 2, 2, 0, 1, 1, 2, 2, 1, 2, 2, 2, 2, 1,
       0, 0, 1, 2, 2, 0, 0, 2, 1, 2, 1, 0, 0, 2, 1, 1, 1, 2, 2, 1, 2, 1,
       0, 1, 0, 0, 1, 0, 2, 1, 0, 2, 2, 1, 1, 1, 2, 1, 1, 0, 1, 1, 0, 0,
       0, 0, 2, 0, 0, 2, 2, 2, 2, 0, 2, 0, 1, 2, 2, 2, 1, 0, 0, 1, 0, 2,
       1, 2, 0, 0, 0, 2, 2, 1, 2, 0, 1, 1, 0, 0, 0, 1, 0, 2, 2, 0, 2, 0,
       0, 0, 1, 1, 2, 0, 1, 2, 2, 0, 1, 2, 0, 2])

In [11]:
model1.predict_proba(X_test)

array([[8.54668277e-05, 1.78566218e-01, 8.21348315e-01],
       [2.20265008e-01, 6.90757491e-01, 8.89775015e-02],
       [3.71298636e-06, 2.56276461e-01, 7.43719826e-01],
       [1.00022078e-01, 8.36714179e-01, 6.32637428e-02],
       [9.44885593e-02, 8.91168568e-01, 1.43428729e-02],
       [8.49268672e-01, 3.48176979e-02, 1.15913630e-01],
       [7.88828023e-01, 6.27852789e-02, 1.48386698e-01],
       [9.15633848e-01, 1.74625415e-02, 6.69036106e-02],
       [3.90959229e-02, 4.33110068e-01, 5.27794009e-01],
       [7.87582452e-01, 2.04652117e-01, 7.76543050e-03],
       [6.08109017e-04, 3.68450984e-01, 6.30940907e-01],
       [3.56088708e-01, 5.81837438e-01, 6.20738545e-02],
       [1.39665988e-01, 3.95142394e-03, 8.56382588e-01],
       [2.38007502e-02, 4.78483943e-01, 4.97715306e-01],
       [7.71767342e-03, 3.79366378e-01, 6.12915948e-01],
       [2.45643187e-02, 3.55553676e-02, 9.39880314e-01],
       [9.30748616e-03, 1.64815301e-01, 8.25877213e-01],
       [6.55172190e-01, 3.38008

In [12]:
score = accuracy_score(y_test, y_pred)
print("Accuracy Score: ", score)

cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix: \n", cm)
print("Classification Report: \n", classification_report(y_test, y_pred))

Accuracy Score:  0.79
Confusion Matrix: 
 [[84 10  8]
 [ 3 74 25]
 [10  7 79]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.87      0.82      0.84       102
           1       0.81      0.73      0.77       102
           2       0.71      0.82      0.76        96

    accuracy                           0.79       300
   macro avg       0.79      0.79      0.79       300
weighted avg       0.80      0.79      0.79       300



## HPT and CV

### Grid Search

In [13]:
model2 = LogisticRegression(multi_class='ovr')
penalty = ['l1', 'l2', 'elasticnet']
c_values = [100, 10, 1.0, 0.1, 0.01]
solver = ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']

In [14]:
params = {'penalty': penalty, 'C': c_values, 'solver': solver}
params

{'penalty': ['l1', 'l2', 'elasticnet'],
 'C': [100, 10, 1.0, 0.1, 0.01],
 'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']}

In [15]:
cv = StratifiedKFold()

In [28]:
grid = GridSearchCV(estimator=model2, param_grid=params, scoring='accuracy', cv=cv, n_jobs=-1)
grid

In [17]:
grid.fit(X_train, y_train)

In [18]:
grid.best_params_

{'C': 0.1, 'penalty': 'l1', 'solver': 'liblinear'}

In [19]:
grid.best_score_

0.8028571428571428

In [20]:
y_pred2 = grid.predict(X_test)
y_pred2

array([2, 1, 2, 1, 1, 0, 0, 0, 2, 0, 2, 1, 2, 1, 2, 2, 2, 0, 0, 2, 2, 1,
       1, 1, 1, 0, 0, 2, 2, 1, 0, 2, 2, 1, 2, 0, 0, 2, 2, 1, 2, 2, 2, 1,
       2, 0, 1, 2, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0,
       2, 1, 0, 1, 0, 1, 2, 1, 2, 2, 1, 2, 1, 0, 1, 0, 1, 2, 2, 0, 1, 2,
       2, 1, 1, 2, 2, 0, 0, 0, 2, 2, 0, 1, 2, 1, 2, 1, 0, 2, 0, 2, 0, 1,
       2, 1, 2, 2, 1, 1, 1, 1, 2, 0, 2, 1, 1, 2, 0, 0, 2, 2, 2, 1, 2, 0,
       2, 2, 0, 0, 0, 2, 0, 2, 0, 1, 2, 1, 1, 2, 0, 0, 1, 1, 2, 2, 2, 1,
       2, 0, 2, 2, 2, 1, 0, 2, 0, 0, 2, 0, 2, 0, 0, 1, 2, 0, 1, 1, 1, 2,
       0, 2, 1, 0, 0, 1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 2, 2, 2, 1,
       0, 0, 1, 2, 2, 0, 0, 2, 1, 2, 1, 0, 0, 2, 1, 1, 1, 2, 2, 1, 2, 1,
       0, 1, 0, 0, 1, 0, 2, 1, 0, 2, 2, 1, 1, 1, 2, 0, 1, 0, 1, 1, 0, 0,
       0, 0, 2, 0, 0, 2, 2, 2, 2, 0, 2, 0, 1, 2, 2, 2, 1, 0, 0, 1, 0, 2,
       1, 2, 0, 0, 0, 2, 2, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 2, 2, 0, 2, 0,
       0, 0, 1, 1, 2, 0, 1, 2, 2, 0, 1, 2, 0, 2])

In [21]:
score2 = accuracy_score(y_test, y_pred2)
print("Accuracy Score: ", score2)

cm2 = confusion_matrix(y_test, y_pred2)
print("Confusion Matrix: \n", cm2)
print("Classification Report: \n", classification_report(y_test, y_pred2))

Accuracy Score:  0.7766666666666666
Confusion Matrix: 
 [[83 11  8]
 [ 4 72 26]
 [ 7 11 78]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.88      0.81      0.85       102
           1       0.77      0.71      0.73       102
           2       0.70      0.81      0.75        96

    accuracy                           0.78       300
   macro avg       0.78      0.78      0.78       300
weighted avg       0.78      0.78      0.78       300



### Randomized Search

In [22]:
model3 = LogisticRegression(multi_class='ovr')
randomcv = RandomizedSearchCV(estimator=model3, param_distributions=params, n_iter=100, cv=5, scoring='accuracy', n_jobs=-1)

In [23]:
randomcv.fit(X_train, y_train)

In [24]:
randomcv.best_params_

{'solver': 'liblinear', 'penalty': 'l1', 'C': 0.1}

In [25]:
randomcv.best_score_

0.8028571428571428

In [26]:
y_pred3 = randomcv.predict(X_test)
y_pred3

array([2, 1, 2, 1, 1, 0, 0, 0, 2, 0, 2, 1, 2, 1, 2, 2, 2, 0, 0, 2, 2, 1,
       1, 1, 1, 0, 0, 2, 2, 1, 0, 2, 2, 1, 2, 0, 0, 2, 2, 1, 2, 2, 2, 1,
       2, 0, 1, 2, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0,
       2, 1, 0, 1, 0, 1, 2, 1, 2, 2, 1, 2, 1, 0, 1, 0, 1, 2, 2, 0, 1, 2,
       2, 1, 1, 2, 2, 0, 0, 0, 2, 2, 0, 1, 2, 1, 2, 1, 0, 2, 0, 2, 0, 1,
       2, 1, 2, 2, 1, 1, 1, 1, 2, 0, 2, 1, 1, 2, 0, 0, 2, 2, 2, 1, 2, 0,
       2, 2, 0, 0, 0, 2, 0, 2, 0, 1, 2, 1, 1, 2, 0, 0, 1, 1, 2, 2, 2, 1,
       2, 0, 2, 2, 2, 1, 0, 2, 0, 0, 2, 0, 2, 0, 0, 1, 2, 0, 1, 1, 1, 2,
       0, 2, 1, 0, 0, 1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 1, 1, 2, 2, 2, 2, 1,
       0, 0, 1, 2, 2, 0, 0, 2, 1, 2, 1, 0, 0, 2, 1, 1, 1, 2, 2, 1, 2, 1,
       0, 1, 0, 0, 1, 0, 2, 1, 0, 2, 2, 1, 1, 1, 2, 0, 1, 0, 1, 1, 0, 0,
       0, 0, 2, 0, 0, 2, 2, 2, 2, 0, 2, 0, 1, 2, 2, 2, 1, 0, 0, 1, 0, 2,
       1, 2, 0, 0, 0, 2, 2, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 2, 2, 0, 2, 0,
       0, 0, 1, 1, 2, 0, 1, 2, 2, 0, 1, 2, 0, 2])

In [27]:
score3 = accuracy_score(y_test, y_pred3)
print("Accuracy Score: ", score3)

cm3 = confusion_matrix(y_test, y_pred3)
print("Confusion Matrix: \n", cm3)
print("Classification Report: \n", classification_report(y_test, y_pred3))

Accuracy Score:  0.7766666666666666
Confusion Matrix: 
 [[83 11  8]
 [ 4 72 26]
 [ 7 11 78]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.88      0.81      0.85       102
           1       0.77      0.71      0.73       102
           2       0.70      0.81      0.75        96

    accuracy                           0.78       300
   macro avg       0.78      0.78      0.78       300
weighted avg       0.78      0.78      0.78       300

