# Logistic Regression Implementation

In [68]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [69]:
from sklearn.datasets import make_classification

In [70]:
# create datasets
x,y = make_classification(n_samples=1000, n_features=10, n_classes=2, random_state=15)

In [71]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.3, random_state=15)

In [72]:
# Model training
from sklearn.linear_model import LogisticRegression
logistic = LogisticRegression()

In [73]:
logistic.fit(x_train, y_train)

In [74]:
y_predict = logistic.predict(x_test)
print(y_predict)

[0 1 0 1 0 0 1 0 1 0 0 1 1 1 0 1 0 0 1 0 1 1 1 0 1 1 0 1 0 0 0 1 1 1 0 1 1
 0 1 1 1 1 0 0 1 0 0 0 1 1 1 1 1 0 0 1 1 0 0 0 0 1 1 0 1 1 0 0 1 0 0 1 0 1
 1 1 1 1 0 0 0 1 0 0 1 1 1 0 1 0 0 1 0 1 0 0 1 0 0 0 1 0 1 0 0 1 1 0 1 0 1
 0 1 1 1 1 0 0 0 1 1 0 1 0 1 1 0 0 1 1 1 0 0 1 1 0 0 1 1 1 1 1 1 1 0 0 0 0
 1 0 0 1 1 0 1 0 1 0 0 0 1 1 0 0 0 1 0 0 1 1 0 0 1 0 0 0 1 0 1 1 0 0 0 0 1
 1 1 0 0 0 0 1 1 0 0 0 0 1 0 1 1 0 0 1 1 0 0 1 1 0 1 0 1 1 1 1 1 0 0 0 0 1
 0 0 1 1 0 0 0 0 1 1 1 1 0 1 0 0 0 1 1 0 1 1 0 1 0 0 0 1 1 1 1 1 1 1 1 1 0
 0 0 0 1 0 0 0 0 1 0 0 0 1 1 1 1 0 0 1 1 1 1 0 1 1 0 1 1 0 0 1 0 1 1 1 0 1
 0 0 1 1]


In [75]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [76]:
score = accuracy_score(y_test, y_predict)
print(score)
cm = confusion_matrix(y_test, y_predict)
print(cm)
print(classification_report(y_test, y_predict))

0.92
[[134  11]
 [ 13 142]]
              precision    recall  f1-score   support

           0       0.91      0.92      0.92       145
           1       0.93      0.92      0.92       155

    accuracy                           0.92       300
   macro avg       0.92      0.92      0.92       300
weighted avg       0.92      0.92      0.92       300



# Hyperparameter Tuning and Cross Validation

In [77]:
model = LogisticRegression()
penalty = ['l1', 'l2', 'elasticnet']
c_values = [100,10,1.0,0.1,0.01]
solver = ['lbfgs', 'liblinear' , 'newton-cg', 'sag', 'saga'] 

In [78]:
params = dict(penalty=penalty, C= c_values, solver = solver)

In [79]:
from sklearn.model_selection import StratifiedKFold
cv = StratifiedKFold()

In [80]:
## Grid Search CV
from sklearn.model_selection import GridSearchCV
grid =  GridSearchCV(estimator=model, param_grid = params, scoring = 'accuracy', cv = cv , n_jobs=-1)

In [81]:
grid

In [82]:
grid.fit(x_train, y_train)

200 fits failed out of a total of 375.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
25 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\KIIT\anaconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 732, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\KIIT\anaconda3\Lib\site-packages\sklearn\base.py", line 1151, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\KIIT\anaconda3\Lib\site-packages\sklearn\linear_model\_logistic.py", line 1168, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

In [83]:
grid.best_params_

{'C': 0.1, 'penalty': 'l2', 'solver': 'liblinear'}

In [84]:
grid.best_score_

0.9228571428571429

In [85]:
y_predict = grid.predict(x_test)

In [86]:
score = accuracy_score(y_test, y_predict)
print(score)
cm = confusion_matrix(y_test, y_predict)
print(cm)
print(classification_report(y_test, y_predict))

0.9233333333333333
[[137   8]
 [ 15 140]]
              precision    recall  f1-score   support

           0       0.90      0.94      0.92       145
           1       0.95      0.90      0.92       155

    accuracy                           0.92       300
   macro avg       0.92      0.92      0.92       300
weighted avg       0.92      0.92      0.92       300



# Randomized SearchCV

In [87]:
from sklearn.model_selection import RandomizedSearchCV

In [88]:
model = LogisticRegression()
randomcv = RandomizedSearchCV(estimator= model, param_distributions=params, cv = 5, scoring='accuracy')

In [89]:
randomcv.fit(x_train, y_train)

25 fits failed out of a total of 50.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\KIIT\anaconda3\Lib\site-packages\sklearn\model_selection\_validation.py", line 732, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\KIIT\anaconda3\Lib\site-packages\sklearn\base.py", line 1151, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\KIIT\anaconda3\Lib\site-packages\sklearn\linear_model\_logistic.py", line 1168, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

In [90]:
randomcv.best_score_

0.9214285714285715

In [91]:
randomcv.best_params_

{'solver': 'saga', 'penalty': 'l2', 'C': 0.1}

In [92]:
y_predict = randomcv.predict(x_test)

In [93]:
score = accuracy_score(y_test, y_predict)
print(score)
cm = confusion_matrix(y_test, y_predict)
print(cm)
print(classification_report(y_test, y_predict))

0.92
[[136   9]
 [ 15 140]]
              precision    recall  f1-score   support

           0       0.90      0.94      0.92       145
           1       0.94      0.90      0.92       155

    accuracy                           0.92       300
   macro avg       0.92      0.92      0.92       300
weighted avg       0.92      0.92      0.92       300



# Logistic for Multiclass Classification problem

In [95]:
# create datasets
x,y = make_classification(n_samples=1000, n_features=10, n_informative= 3 ,n_classes=3, random_state=15)

In [98]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y, test_size=0.3, random_state=15)

In [101]:
from sklearn.linear_model import LogisticRegression
logistic = LogisticRegression(multi_class='ovr')
logistic.fit(x_train, y_train)
y_predict = logistic.predict(x_test)

In [102]:
score = accuracy_score(y_test, y_predict)
print(score)
cm = confusion_matrix(y_test, y_predict)
print(cm)
print(classification_report(y_test, y_predict))

0.7833333333333333
[[83 16  6]
 [ 2 69 26]
 [ 6  9 83]]
              precision    recall  f1-score   support

           0       0.91      0.79      0.85       105
           1       0.73      0.71      0.72        97
           2       0.72      0.85      0.78        98

    accuracy                           0.78       300
   macro avg       0.79      0.78      0.78       300
weighted avg       0.79      0.78      0.78       300

