In [1]:
import numpy as np
import pandas as pd
import seaborn as sns

In [2]:
import warnings
warnings.filterwarnings('ignore')

In [37]:
df = sns.load_dataset('iris')
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [38]:
df['species'].unique()

array(['setosa', 'versicolor', 'virginica'], dtype=object)

In [39]:
df.isnull().sum()

Unnamed: 0,0
sepal_length,0
sepal_width,0
petal_length,0
petal_width,0
species,0


In [40]:
#Logistic Regression works best for binary classification
#But this dataset has 3(multi) classes
#So we've removed setosa rows

df=df[df['species'] != 'setosa']

In [42]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
50,7.0,3.2,4.7,1.4,versicolor
51,6.4,3.2,4.5,1.5,versicolor
52,6.9,3.1,4.9,1.5,versicolor
53,5.5,2.3,4.0,1.3,versicolor
54,6.5,2.8,4.6,1.5,versicolor


In [43]:
df['species'] = df['species'].map({'versicolor':0, 'virginica':1})

In [44]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
50,7.0,3.2,4.7,1.4,0
51,6.4,3.2,4.5,1.5,0
52,6.9,3.1,4.9,1.5,0
53,5.5,2.3,4.0,1.3,0
54,6.5,2.8,4.6,1.5,0


In [45]:
from google.colab import sheets
sheet = sheets.InteractiveSheet(df=df)

https://docs.google.com/spreadsheets/d/11dTaUaERsSbPFhV8J8zICXSaZJ5S_bhQJWMjbUDaBQg#gid=0


In [46]:
# Split dataset into independent and dependent features
X = df.iloc[:,:-1]
y = df.iloc[:,-1]

In [47]:
X

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
50,7.0,3.2,4.7,1.4
51,6.4,3.2,4.5,1.5
52,6.9,3.1,4.9,1.5
53,5.5,2.3,4.0,1.3
54,6.5,2.8,4.6,1.5
...,...,...,...,...
145,6.7,3.0,5.2,2.3
146,6.3,2.5,5.0,1.9
147,6.5,3.0,5.2,2.0
148,6.2,3.4,5.4,2.3


In [48]:
y

Unnamed: 0,species
50,0
51,0
52,0
53,0
54,0
...,...
145,1
146,1
147,1
148,1


In [49]:
from sklearn.model_selection import train_test_split

In [50]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

In [51]:
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression()

In [52]:
from sklearn.model_selection import GridSearchCV
parameters = {'penalty':['l1', 'l2', 'elasticnet'], 'C':[1,2,3,4,5,6,10,15,20,25,30,40,50], 'max_iter':[100,150,200,300]}

In [53]:
classifier_regressor = GridSearchCV(classifier, param_grid=parameters, scoring='accuracy', cv = 5)

In [54]:
classifier_regressor.fit(X_train, y_train)

In [55]:
print(classifier_regressor.best_params_)

{'C': 1, 'max_iter': 100, 'penalty': 'l2'}


In [56]:
print(classifier_regressor.best_score_)

0.9733333333333334


In [58]:
#prediction
y_pred = classifier_regressor.predict(X_test)

In [59]:
from sklearn.metrics import accuracy_score, classification_report

In [60]:
score = accuracy_score(y_test, y_pred)
print(score)

0.92


In [61]:
classification_report(y_test, y_pred)

'              precision    recall  f1-score   support\n\n           0       0.93      0.93      0.93        14\n           1       0.91      0.91      0.91        11\n\n    accuracy                           0.92        25\n   macro avg       0.92      0.92      0.92        25\nweighted avg       0.92      0.92      0.92        25\n'

In [62]:
df.corr()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
sepal_length,1.0,0.553855,0.828479,0.593709,0.494305
sepal_width,0.553855,1.0,0.519802,0.566203,0.30808
petal_length,0.828479,0.519802,1.0,0.823348,0.786424
petal_width,0.593709,0.566203,0.823348,1.0,0.828129
species,0.494305,0.30808,0.786424,0.828129,1.0
