# Logistic Regression

In [56]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [57]:
from sklearn.datasets import make_classification

In [58]:
## Create the dataset
x,y = make_classification(n_samples = 1000, n_features = 10,n_classes = 2, random_state = 42)

In [59]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)


In [60]:
from sklearn.linear_model import LogisticRegression
logistic = LogisticRegression()

In [61]:
logistic.fit(x_train,y_train)

In [62]:
y_pred = logistic.predict(x_test)
print(y_pred)

[0 1 0 1 0 1 0 0 0 0 0 1 0 1 0 0 1 1 1 1 1 0 1 0 0 1 0 0 1 1 1 1 0 0 0 0 1
 1 1 1 0 1 1 0 0 0 1 1 1 1 0 1 1 0 1 0 1 0 1 0 1 0 0 1 1 1 0 0 1 1 1 1 1 0
 1 0 0 1 0 1 0 0 1 0 1 0 0 0 0 1 1 1 1 1 1 1 0 0 1 0 1 0 1 0 0 1 0 1 1 1 1
 1 1 1 1 0 0 1 0 0 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1 1 0 0 0 1 1 0 0 0 0 0 1 0
 0 0 1 0 0 1 0 0 1 0 1 1 0 1 0 0 0 0 0 0 0 1 1 1 0 0 1 1 0 0 1 1 1 1 1 1 1
 0 0 0 0 1 0 0 0 0 1 0 0 1 1 1 0 1 0 0 0 1 1 1 1 1 0 0 0 0 0 1 0 1 0 1 1 0
 0 1 1 1 0 1 1 0 0 0 0 0 0 0 0 0 1 1 0 1 0 1 1 0 1 0 1 0]


In [63]:
#logistic.predict_proba(x_test)
#this shows probabilty that whats the probaboilty that the point is above the fitline or below

In [64]:
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report

In [65]:
score = accuracy_score(y_test,y_pred)
print(score)
cm = confusion_matrix(y_test,y_pred)
print(cm)
print(classification_report(y_test,y_pred))

0.84
[[ 98  15]
 [ 25 112]]
              precision    recall  f1-score   support

           0       0.80      0.87      0.83       113
           1       0.88      0.82      0.85       137

    accuracy                           0.84       250
   macro avg       0.84      0.84      0.84       250
weighted avg       0.84      0.84      0.84       250



#  HyperParameter tunning and CrossValidation

In [66]:
model = LogisticRegression()
penalty = ['l1', "l2", "elasticnet"]
c_values = [100,10,1.0,0.1,0.01]
solvers = ['lbfgs', 'liblinear', 'newton-cg', 'sag', 'saga']

In [67]:
params =dict(penalty = penalty,C=c_values,solver = solvers)

## GridSearchCv
GridSearchCV is a method in machine learning used for hyperparameter tuning. Its primary goal is to find the best combination of hyperparameters for a given model to improve its performance
It can not be used for all the algorthim only few of them ckech gpt for that

In [68]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
cv=StratifiedKFold()


In [69]:
grid = GridSearchCV(estimator = model ,param_grid = params , scoring = 'accuracy',cv =cv ,n_jobs =-1 )

In [70]:
grid

In [71]:
grid.fit(x_train,y_train)

200 fits failed out of a total of 375.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
25 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\LAPTOP WORLD\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\model_selection\_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\LAPTOP WORLD\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\base.py", line 1473, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\LAPTOP WORLD\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\linear_model\_logistic.py", line 1194, in fit
    sol

In [72]:
grid.best_params_

{'C': 0.01, 'penalty': 'l2', 'solver': 'lbfgs'}

In [73]:
grid.best_score_

np.float64(0.8773333333333333)

In [74]:
y_pred = grid.predict(x_test)

In [75]:
score = accuracy_score(y_test,y_pred)
print(score)
cm = confusion_matrix(y_test,y_pred)
print(cm)
print(classification_report(y_test,y_pred))

0.84
[[103  10]
 [ 30 107]]
              precision    recall  f1-score   support

           0       0.77      0.91      0.84       113
           1       0.91      0.78      0.84       137

    accuracy                           0.84       250
   macro avg       0.84      0.85      0.84       250
weighted avg       0.85      0.84      0.84       250



## Randomised Search Cv
(best)

In [76]:
from sklearn.model_selection import RandomizedSearchCV
model = LogisticRegression()
randomcv = RandomizedSearchCV(estimator = model ,param_distributions = params , cv= 5 , scoring = 'accuracy')

In [77]:
randomcv.fit(x_train,y_train)

25 fits failed out of a total of 50.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\LAPTOP WORLD\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\model_selection\_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\LAPTOP WORLD\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\base.py", line 1473, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\LAPTOP WORLD\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\linear_model\_logistic.py", line 1194, in fit
    solver

In [78]:
randomcv.best_score_

np.float64(0.8773333333333333)

In [79]:
randomcv.best_params_

{'solver': 'sag', 'penalty': 'l2', 'C': 0.01}

In [80]:
y_pred = randomcv.predict(x_test)

In [81]:
score = accuracy_score(y_test,y_pred)
print(score)
cm = confusion_matrix(y_test,y_pred)
print(cm)
print(classification_report(y_test,y_pred))

0.84
[[103  10]
 [ 30 107]]
              precision    recall  f1-score   support

           0       0.77      0.91      0.84       113
           1       0.91      0.78      0.84       137

    accuracy                           0.84       250
   macro avg       0.84      0.85      0.84       250
weighted avg       0.85      0.84      0.84       250



# Logistic Regression For Multiple class slcassfication using OvR

In [82]:
from sklearn.datasets import make_classification
## Create the dataset
x,y = make_classification(n_samples = 1000, n_features = 10,n_classes = 3,n_informative=3, random_state = 42)

In [84]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42)


In [87]:
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
base_logistic = LogisticRegression()
# Wrap it with OneVsRestClassifier
model = OneVsRestClassifier(base_logistic)
# Fit the model
model.fit(x_train, y_train)
# Predict using the model
y_pred = model.predict(x_test)

In [88]:
score = accuracy_score(y_test,y_pred)
print(score)
cm = confusion_matrix(y_test,y_pred)
print(cm)
print(classification_report(y_test,y_pred))

0.668
[[64 15  7]
 [27 34 21]
 [ 3 10 69]]
              precision    recall  f1-score   support

           0       0.68      0.74      0.71        86
           1       0.58      0.41      0.48        82
           2       0.71      0.84      0.77        82

    accuracy                           0.67       250
   macro avg       0.66      0.67      0.65       250
weighted avg       0.66      0.67      0.66       250



# Gridserach cv

In [97]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
# Define parameter grid for LogisticRegression
param_grid = {
    'estimator__penalty': ['l1', 'l2', 'elasticnet'],
    'estimator__C': [100, 10, 1.0, 0.1, 0.01],
    'estimator__solver': ['lbfgs', 'liblinear', 'newton-cg', 'sag', 'saga']  # Note: some solvers don't support some penalties
}

# Create the GridSearchCV object
cv = StratifiedKFold()
grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring='accuracy', cv=cv, n_jobs=-1)

# Fit the GridSearchCV object
grid.fit(x_train, y_train)

# Get the best model and its parameters
best_model = grid.best_estimator_
best_params = grid.best_params_

print("Best parameters found:", best_params)
print("Best cross-validation accuracy:", grid.best_score_)


Best parameters found: {'estimator__C': 0.1, 'estimator__penalty': 'l1', 'estimator__solver': 'liblinear'}
Best cross-validation accuracy: 0.6506666666666667


200 fits failed out of a total of 375.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
25 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\LAPTOP WORLD\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\model_selection\_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\LAPTOP WORLD\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\base.py", line 1473, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\LAPTOP WORLD\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\multiclass.py", line 370, in fit
    self.estimators_

In [98]:
y_pred = grid.predict(x_test)

In [99]:
score = accuracy_score(y_test,y_pred)
print(score)
cm = confusion_matrix(y_test,y_pred)
print(cm)
print(classification_report(y_test,y_pred))

0.676
[[67 12  7]
 [30 30 22]
 [ 3  7 72]]
              precision    recall  f1-score   support

           0       0.67      0.78      0.72        86
           1       0.61      0.37      0.46        82
           2       0.71      0.88      0.79        82

    accuracy                           0.68       250
   macro avg       0.67      0.67      0.66       250
weighted avg       0.67      0.68      0.66       250



# Randomised Search Cv

In [100]:
from sklearn.model_selection import RandomizedSearchCV
model = LogisticRegression()
randomcv = RandomizedSearchCV(estimator = model ,param_distributions = params , cv= 5 , scoring = 'accuracy')

In [101]:
randomcv.fit(x_train,y_train)

40 fits failed out of a total of 50.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
10 fits failed with the following error:
Traceback (most recent call last):
  File "C:\Users\LAPTOP WORLD\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\model_selection\_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "C:\Users\LAPTOP WORLD\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\base.py", line 1473, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Users\LAPTOP WORLD\AppData\Local\Programs\Python\Python312\Lib\site-packages\sklearn\linear_model\_logistic.py", line 1204, in fit
    raise

In [103]:
randomcv.best_score_


np.float64(0.6293333333333333)

In [104]:
randomcv.best_params_

{'solver': 'liblinear', 'penalty': 'l1', 'C': 10}

In [105]:
y_pred = randomcv.predict(x_test)

In [106]:
score = accuracy_score(y_test,y_pred)
print(score)
cm = confusion_matrix(y_test,y_pred)
print(cm)
print(classification_report(y_test,y_pred))

0.668
[[64 15  7]
 [27 34 21]
 [ 3 10 69]]
              precision    recall  f1-score   support

           0       0.68      0.74      0.71        86
           1       0.58      0.41      0.48        82
           2       0.71      0.84      0.77        82

    accuracy                           0.67       250
   macro avg       0.66      0.67      0.65       250
weighted avg       0.66      0.67      0.66       250

