# Hyperparameter Tuning

In [179]:
import pandas as pd
import warnings
warnings.filterwarnings(action='ignore')

In [180]:
#import data
df = pd.read_csv('iris.csv')

In [181]:
df.head()

Unnamed: 0,Id,SepalLengthCm,SepalWidthCm,PetalLengthCm,PetalWidthCm,Species
0,1,5.1,3.5,1.4,0.2,Iris-setosa
1,2,4.9,3.0,1.4,0.2,Iris-setosa
2,3,4.7,3.2,1.3,0.2,Iris-setosa
3,4,4.6,3.1,1.5,0.2,Iris-setosa
4,5,5.0,3.6,1.4,0.2,Iris-setosa


In [182]:
df.Species.value_counts() #Class distribution

Species
Iris-setosa        50
Iris-versicolor    50
Iris-virginica     50
Name: count, dtype: int64

In [183]:
#Split data into attributes and class
y = df.Species
X = df.drop(columns=["Species","Id"])

In [184]:
#perform training and test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42,stratify=df.Species)

In [185]:
#Logistic regression
from sklearn.linear_model import LogisticRegression

In [186]:
clf = LogisticRegression()
clf.fit(X_train,y_train)

In [187]:
y_pred = clf.predict(X_test)
# Model Evaluation metrics 
from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score
print('Accuracy Score : ' + str(accuracy_score(y_test,y_pred)))
print('Precision Score : ' + str(precision_score(y_test,y_pred,average="weighted")))
print('Recall Score : ' + str(recall_score(y_test,y_pred,average="weighted")))
print('F1 Score : ' + str(f1_score(y_test,y_pred,average="weighted")))

Accuracy Score : 0.9473684210526315
Precision Score : 0.9473684210526315
Recall Score : 0.9473684210526315
F1 Score : 0.9473684210526315


##### Grid Search to maximize Recall

In [188]:
#Grid Search
from sklearn.neighbors import KNeighborsClassifier

In [189]:
clf = KNeighborsClassifier()

In [190]:
grid_values = {
    'n_neighbors': [3,5,7],
    'weights':['uniform', 'distance'],
              }

In [191]:
grid_clf_acc = GridSearchCV(clf, param_grid = grid_values,
                            scoring = 'accuracy',
                           verbose=3) # try for accuracy, precision, f1

In [192]:
grid_clf_acc.fit(X_train, y_train)

Fitting 5 folds for each of 6 candidates, totalling 30 fits
[CV 1/5] END ....n_neighbors=3, weights=uniform;, score=1.000 total time=   0.0s
[CV 2/5] END ....n_neighbors=3, weights=uniform;, score=0.957 total time=   0.0s
[CV 3/5] END ....n_neighbors=3, weights=uniform;, score=0.909 total time=   0.0s
[CV 4/5] END ....n_neighbors=3, weights=uniform;, score=1.000 total time=   0.0s
[CV 5/5] END ....n_neighbors=3, weights=uniform;, score=0.909 total time=   0.0s
[CV 1/5] END ...n_neighbors=3, weights=distance;, score=1.000 total time=   0.0s
[CV 2/5] END ...n_neighbors=3, weights=distance;, score=0.957 total time=   0.0s
[CV 3/5] END ...n_neighbors=3, weights=distance;, score=0.909 total time=   0.0s
[CV 4/5] END ...n_neighbors=3, weights=distance;, score=1.000 total time=   0.0s
[CV 5/5] END ...n_neighbors=3, weights=distance;, score=0.909 total time=   0.0s
[CV 1/5] END ....n_neighbors=5, weights=uniform;, score=0.957 total time=   0.0s
[CV 2/5] END ....n_neighbors=5, weights=uniform;,

In [193]:
#Predict values based on new parameters
y_pred_acc = grid_clf_acc.predict(X_test)

In [194]:
# New Model Evaluation metrics 
print('Accuracy Score : ' + str(accuracy_score(y_test,y_pred_acc)))
print('Precision Score : ' + str(precision_score(y_test,y_pred_acc,average="weighted")))
print('Recall Score : ' + str(recall_score(y_test,y_pred_acc,average="weighted")))
print('F1 Score : ' + str(f1_score(y_test,y_pred_acc,average="weighted")))

Accuracy Score : 0.9473684210526315
Precision Score : 0.9543859649122807
Recall Score : 0.9473684210526315
F1 Score : 0.9470551378446114


In [195]:
grid_clf_acc.best_params_

{'n_neighbors': 7, 'weights': 'uniform'}

In [196]:
grid_clf_acc.best_estimator_

In [197]:
model = KNeighborsClassifier(n_neighbors=3, weights='uniform')

In [198]:
model.fit(X_train, y_train)

In [199]:
model.score(X_train, y_train)

0.9553571428571429