# **Logistic Regression - Practical Implementation**

In [1]:
# importing libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [2]:
# loading dataset
df = sns.load_dataset('iris')
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [3]:
df['species'].unique()

array(['setosa', 'versicolor', 'virginica'], dtype=object)

In [4]:
df.isnull().sum()

sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
species         0
dtype: int64

In [7]:
# removing setosa category for binary classification
df = df[df['species'] != 'setosa']
df['species'].unique()

array(['versicolor', 'virginica'], dtype=object)

**Label Encoding**

In [9]:
df['species'] = df['species'].map({'versicolor': 0, 'virginica': 1})
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
50,7.0,3.2,4.7,1.4,0
51,6.4,3.2,4.5,1.5,0
52,6.9,3.1,4.9,1.5,0
53,5.5,2.3,4.0,1.3,0
54,6.5,2.8,4.6,1.5,0


**Train Test Split**

In [11]:
# importing library
from sklearn.model_selection import train_test_split

In [12]:
# splitting
X = df.drop('species', axis=1)
y = df[['species']]

In [14]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

## **Logistic Regression Implementation**

In [18]:
# importing logistic regression
from sklearn.linear_model import LogisticRegression

# importing gridsearchcv
from sklearn.model_selection import GridSearchCV

**Training**

In [19]:
logistic_reg = LogisticRegression()

In [40]:
parameters = {
    'penalty': ['l1', 'l2', 'elasticnet'],
    'C': [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 20, 30, 40, 50],
    'max_iter': [100, 200, 300]
}
logistic_reg_cv = GridSearchCV(logistic_reg, param_grid=parameters, scoring='accuracy', cv=5)
logistic_reg_cv.fit(X_train, y_train)

In [41]:
# best parameters
logistic_reg_cv.best_params_

{'C': 0.3, 'max_iter': 100, 'penalty': 'l2'}

In [42]:
# best score
logistic_reg_cv.best_score_

0.9733333333333334

**Testing**

In [43]:
logistic_reg_pred = logistic_reg_cv.predict(X_test)
logistic_reg_pred

array([1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 1, 0,
       0, 0, 1])

In [33]:
# accuracy
from sklearn.metrics import accuracy_score, classification_report

In [44]:
accuracy_score(logistic_reg_pred, y_test)

0.88

In [46]:
print(classification_report(logistic_reg_pred, y_test))

              precision    recall  f1-score   support

           0       0.93      0.87      0.90        15
           1       0.82      0.90      0.86        10

    accuracy                           0.88        25
   macro avg       0.87      0.88      0.88        25
weighted avg       0.88      0.88      0.88        25

