In [118]:
import pandas as pd
import numpy as np
import matplotlib.pylab as plt
%matplotlib inline
import seaborn as sns
import matplotlib
# default image size
matplotlib.rcParams['figure.figsize'] = (4,4)
import warnings
warnings.filterwarnings('ignore')

In [119]:
# Importing dataset
from sklearn.datasets import load_iris
data = load_iris()
df = pd.DataFrame(data.data)
df.columns  = data.feature_names
df['iris'] = data.target

In [120]:
df.shape

(150, 5)

In [121]:
df.head(3)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),iris
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0


In [122]:
df.corr()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),iris
sepal length (cm),1.0,-0.11757,0.871754,0.817941,0.782561
sepal width (cm),-0.11757,1.0,-0.42844,-0.366126,-0.426658
petal length (cm),0.871754,-0.42844,1.0,0.962865,0.949035
petal width (cm),0.817941,-0.366126,0.962865,1.0,0.956547
iris,0.782561,-0.426658,0.949035,0.956547,1.0


In [123]:
df.head(1)

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm),iris
0,5.1,3.5,1.4,0.2,0


In [124]:
X = df.iloc[:,:-1]
y = df.iloc[:,-1]

In [125]:
X.head()

Unnamed: 0,sepal length (cm),sepal width (cm),petal length (cm),petal width (cm)
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2
2,4.7,3.2,1.3,0.2
3,4.6,3.1,1.5,0.2
4,5.0,3.6,1.4,0.2


In [126]:
y.value_counts()
# multi-class Classification

iris
0    50
1    50
2    50
Name: count, dtype: int64

In [127]:
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression
lr = LogisticRegression()
cvs = cross_val_score(lr,X,y,scoring='accuracy',cv = 10)
print(cvs)

[1.         0.93333333 1.         1.         0.93333333 0.93333333
 0.93333333 1.         1.         1.        ]


In [128]:
print(np.mean(cvs)) # Getting a avg of 97%, Simple model to see the model performance

0.9733333333333334


Building a Log Regression 

In [129]:
from sklearn.model_selection import train_test_split as tts
# here ill use X_train and y_train
X_train,X_test,y_train,y_test = tts(X,y,test_size=0.22,random_state=143)
print(f"X_train : {X_train.shape},X_test : {X_test.shape}, y_train :{y_train.shape},y_test : {y_test.shape}")

X_train : (117, 4),X_test : (33, 4), y_train :(117,),y_test : (33,)


In [130]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [131]:
lr.fit(X_train,y_train)

In [132]:
lr.predict(X_test)

array([1, 2, 0, 2, 0, 2, 2, 2, 1, 0, 0, 2, 0, 1, 2, 1, 0, 2, 1, 0, 2, 2,
       0, 0, 0, 0, 1, 1, 2, 0, 1, 0, 1])

In [133]:
from sklearn.metrics import accuracy_score,confusion_matrix
ac = accuracy_score(y_test,lr.predict(X_test))
print(ac)

0.9393939393939394


In [134]:
cm = confusion_matrix(y_test,lr.predict(X_test))
print(cm)

[[13  0  0]
 [ 0  7  0]
 [ 0  2 11]]


Hyperparameter , even classification have l1, l2 know as penalty

In [135]:
from sklearn.model_selection import GridSearchCV
# Log Regression can be learnt from two ways geometric and probablity
# since we already have X,y splits

In [136]:
# earlier in Regression is was only one parameter so we took one dict now we have two 
param = [{'C' : [1,5,10]},{'max_iter' : [50,100,150]}]

In [137]:
model1 = LogisticRegression(C = 100, max_iter=100)
# This is the default value we are giving 
# since we dont know what is TP,TN is impt we are using f1
m1 = GridSearchCV(model1,param_grid=param,scoring='f1',cv=10)
print(m1)

GridSearchCV(cv=10, estimator=LogisticRegression(C=100),
             param_grid=[{'C': [1, 5, 10]}, {'max_iter': [50, 100, 150]}],
             scoring='f1')


In [138]:
m1.fit(X_train,y_train)

In [139]:
print(m1.best_score_)

nan


In [140]:
m1.predict(X_test)

array([1, 2, 0, 2, 0, 2, 2, 2, 1, 0, 0, 2, 0, 1, 2, 1, 0, 2, 1, 0, 2, 2,
       0, 0, 0, 0, 1, 1, 2, 0, 1, 0, 1])

In [143]:
from sklearn.metrics import accuracy_score,classification_report
ac = accuracy_score(y_test,m1.predict(X_test))
print(ac)

0.9393939393939394


In [144]:
cl = classification_report(y_test,m1.predict(X_test))
print(cl)

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        13
           1       0.78      1.00      0.88         7
           2       1.00      0.85      0.92        13

    accuracy                           0.94        33
   macro avg       0.93      0.95      0.93        33
weighted avg       0.95      0.94      0.94        33

