In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import warnings 
warnings.filterwarnings('ignore')

In [2]:
df=sns.load_dataset('iris')
df.head(5)

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [3]:
df.species.unique()

array(['setosa', 'versicolor', 'virginica'], dtype=object)

In [4]:
df.isnull().sum()

sepal_length    0
sepal_width     0
petal_length    0
petal_width     0
species         0
dtype: int64

In [5]:
df=df[df.species!="setosa"]
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
50,7.0,3.2,4.7,1.4,versicolor
51,6.4,3.2,4.5,1.5,versicolor
52,6.9,3.1,4.9,1.5,versicolor
53,5.5,2.3,4.0,1.3,versicolor
54,6.5,2.8,4.6,1.5,versicolor
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,virginica
146,6.3,2.5,5.0,1.9,virginica
147,6.5,3.0,5.2,2.0,virginica
148,6.2,3.4,5.4,2.3,virginica


In [6]:
# label encoding
df['species'].map({'versicolor':0,'virginica':1})

50     0
51     0
52     0
53     0
54     0
      ..
145    1
146    1
147    1
148    1
149    1
Name: species, Length: 100, dtype: int64

In [7]:
df['species']=df['species'].map({'versicolor':0,'virginica':1})
df

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
50,7.0,3.2,4.7,1.4,0
51,6.4,3.2,4.5,1.5,0
52,6.9,3.1,4.9,1.5,0
53,5.5,2.3,4.0,1.3,0
54,6.5,2.8,4.6,1.5,0
...,...,...,...,...,...
145,6.7,3.0,5.2,2.3,1
146,6.3,2.5,5.0,1.9,1
147,6.5,3.0,5.2,2.0,1
148,6.2,3.4,5.4,2.3,1


In [8]:
## split dataset into independent features

x=df.iloc[:,:-1]
y=df.iloc[:,-1]

In [9]:
x,y

(     sepal_length  sepal_width  petal_length  petal_width
 50            7.0          3.2           4.7          1.4
 51            6.4          3.2           4.5          1.5
 52            6.9          3.1           4.9          1.5
 53            5.5          2.3           4.0          1.3
 54            6.5          2.8           4.6          1.5
 ..            ...          ...           ...          ...
 145           6.7          3.0           5.2          2.3
 146           6.3          2.5           5.0          1.9
 147           6.5          3.0           5.2          2.0
 148           6.2          3.4           5.4          2.3
 149           5.9          3.0           5.1          1.8
 
 [100 rows x 4 columns],
 50     0
 51     0
 52     0
 53     0
 54     0
       ..
 145    1
 146    1
 147    1
 148    1
 149    1
 Name: species, Length: 100, dtype: int64)

In [10]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test= train_test_split(x,y,test_size=0.25,random_state=42)

In [11]:
from sklearn.linear_model import LogisticRegression
lg=LogisticRegression()

In [12]:
from sklearn.model_selection import GridSearchCV
parameter={
    'penalty':['l1','l2','elasticnet'],
    'C':[1,2,3,4,5,6,10,20],
    'max_iter':[2000,3000]
}

In [13]:
aftercv=GridSearchCV(lg,param_grid=parameter,scoring='accuracy',cv=5)

In [14]:
aftercv.fit(X_train,y_train)

GridSearchCV(cv=5, estimator=LogisticRegression(),
             param_grid={'C': [1, 2, 3, 4, 5, 6, 10, 20],
                         'max_iter': [2000, 3000],
                         'penalty': ['l1', 'l2', 'elasticnet']},
             scoring='accuracy')

In [15]:
print(aftercv.best_params_)

{'C': 1, 'max_iter': 2000, 'penalty': 'l2'}


In [16]:
aftercv.best_score_

0.9733333333333334

In [17]:
## prediction
y_pred=aftercv.predict(X_test)
y_pred

array([1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 0, 1, 0,
       0, 0, 1], dtype=int64)

In [18]:
## accuracy score
from sklearn.metrics import accuracy_score,classification_report

In [19]:
score=accuracy_score(y_pred,y_test)
score

0.92

In [20]:
classification_report(y_pred,y_test)

'              precision    recall  f1-score   support\n\n           0       0.93      0.93      0.93        14\n           1       0.91      0.91      0.91        11\n\n    accuracy                           0.92        25\n   macro avg       0.92      0.92      0.92        25\nweighted avg       0.92      0.92      0.92        25\n'