In [75]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

1. Creating dataset

In [76]:
from sklearn.datasets import make_classification
X, y = make_classification(n_samples=1000, n_features=10, n_classes=2, random_state=15)

2. Train test split

In [77]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.30,random_state=42)

3. Logistic regression model training

In [78]:
from sklearn.linear_model import LogisticRegression
logistic=LogisticRegression()
logistic.fit(X_train,y_train)
y_pred=logistic.predict(X_test)
print(y_pred)

[0 0 0 1 1 0 0 1 1 1 1 0 1 0 1 0 0 1 0 1 1 1 1 0 0 0 0 0 0 1 1 0 1 1 1 0 1
 1 0 0 0 0 0 1 0 1 1 1 1 0 1 1 1 1 1 1 1 0 1 0 1 0 0 0 1 1 0 0 1 0 0 0 1 0
 1 0 1 0 0 0 0 0 0 0 1 0 1 0 0 1 1 1 0 1 1 1 0 1 1 1 1 0 0 0 0 0 1 0 0 0 1
 1 0 0 0 0 0 1 0 0 1 0 0 0 0 0 0 0 0 1 1 0 1 0 0 1 1 0 0 1 1 1 1 0 0 0 1 1
 0 1 0 0 1 0 0 0 1 1 0 1 0 0 0 1 0 1 1 0 0 1 0 1 1 0 1 1 0 0 1 1 0 1 0 1 0
 0 1 1 0 1 1 1 1 0 1 1 1 1 1 0 1 1 0 1 0 0 1 0 1 0 1 1 0 1 0 1 0 0 0 0 0 0
 0 1 0 0 0 0 0 1 0 1 0 0 1 0 1 1 0 0 1 1 1 0 1 1 0 0 1 1 0 1 1 0 0 1 0 0 1
 0 0 1 0 1 0 1 0 0 1 1 0 0 1 1 0 1 1 1 0 1 1 0 0 0 1 0 0 0 1 0 0 1 0 1 0 1
 0 1 0 0]


4. Evaluation - scoring/ metrics calculation

In [79]:
from sklearn.metrics import accuracy_score,classification_report,confusion_matrix
score=accuracy_score(y_pred,y_test)
print(score)
print(classification_report(y_pred,y_test))
print(confusion_matrix(y_pred,y_test))

0.9166666666666666
              precision    recall  f1-score   support

           0       0.93      0.91      0.92       160
           1       0.90      0.92      0.91       140

    accuracy                           0.92       300
   macro avg       0.92      0.92      0.92       300
weighted avg       0.92      0.92      0.92       300

[[146  14]
 [ 11 129]]


## Hyperparameter Tuning And Cross Validation

<br> model=LogisticRegression()
<br> penalty=['l1', 'l2', 'elasticnet']
<br> c_values=[100,10,1.0,0.1,0.01]
<br> solver=['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
<br> params=dict(penalty=penalty,C=c_values,solver=solver)

<br> Incorrect Combinations Causing Errors:
<br> newton-cg, lbfgs, and sag only support l2 penalty (no l1 or elasticnet).
<br> liblinear only supports l1 and l2 penalties (no elasticnet).
<br> saga is the only solver that supports all three penalties (l1, l2, elasticnet).

In [80]:
model = LogisticRegression()
params = [
    {"penalty": ["l1"], "C": [100, 10, 1.0, 0.1, 0.01], "solver": ["liblinear", "saga"]},
    {"penalty": ["l2"], "C": [100, 10, 1.0, 0.1, 0.01], "solver": ["newton-cg", "lbfgs", "liblinear", "sag", "saga"]},
    {"penalty": ["elasticnet"], "C": [100, 10, 1.0, 0.1, 0.01], "solver": ["saga"], "l1_ratio": [0.1, 0.5, 0.9]},
]

1. Grid SeachCV

In [81]:
from sklearn.model_selection import StratifiedKFold
cv=StratifiedKFold()

In [82]:
from sklearn.model_selection import GridSearchCV
grid=GridSearchCV(estimator=model,param_grid=params,scoring='accuracy',cv=cv,n_jobs=-1)
grid

In [83]:
grid.fit(X_train,y_train)

In [84]:
grid.best_params_

{'C': 0.01, 'l1_ratio': 0.5, 'penalty': 'elasticnet', 'solver': 'saga'}

In [85]:
grid.best_score_

0.927142857142857

In [87]:
y_pred=grid.predict(X_test)
score=accuracy_score(y_pred,y_test)
print(score)
print(classification_report(y_pred,y_test))
print(confusion_matrix(y_pred,y_test))

0.92
              precision    recall  f1-score   support

           0       0.95      0.90      0.93       165
           1       0.89      0.94      0.91       135

    accuracy                           0.92       300
   macro avg       0.92      0.92      0.92       300
weighted avg       0.92      0.92      0.92       300

[[149  16]
 [  8 127]]


2. Randomized SearchCV

In [88]:
from sklearn.model_selection import RandomizedSearchCV
model=LogisticRegression()
randomcv=RandomizedSearchCV(estimator=model,param_distributions=params,cv=5,scoring='accuracy')
randomcv.fit(X_train,y_train)

In [89]:
randomcv.best_score_

0.9242857142857142

In [90]:
randomcv.best_params_

{'solver': 'saga', 'penalty': 'elasticnet', 'l1_ratio': 0.9, 'C': 0.01}

In [91]:
y_pred=randomcv.predict(X_test)
score=accuracy_score(y_pred,y_test)
print(score)
print(classification_report(y_pred,y_test))
print(confusion_matrix(y_pred,y_test))

0.92
              precision    recall  f1-score   support

           0       0.95      0.90      0.93       165
           1       0.89      0.94      0.91       135

    accuracy                           0.92       300
   macro avg       0.92      0.92      0.92       300
weighted avg       0.92      0.92      0.92       300

[[149  16]
 [  8 127]]
