In [1]:
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.preprocessing import MinMaxScaler
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report

In [2]:
iris_dict = load_iris()

In [3]:
x = pd.DataFrame(iris_dict["data"], columns=iris_dict["feature_names"]).iloc[0:100, :]
y = pd.Series(iris_dict["target"]).iloc[0:100]

In [4]:
min_max_scaler = MinMaxScaler(feature_range=(1, 2))
scaled_x = min_max_scaler.fit_transform(x)

In [5]:
x_train, x_test, y_train, y_test = train_test_split(scaled_x, y, random_state=42, test_size=0.30)

In [6]:
knn_model = KNeighborsClassifier().fit(x_train, y_train)

In [8]:
predicted_train_y = knn_model.predict(x_train)
predicted_test_y = knn_model.predict(x_test)

In [9]:
confusion_matrix(y_true=y_train, y_pred=predicted_train_y)

array([[33,  0],
       [ 0, 37]], dtype=int64)

In [11]:
print(classification_report(
    y_true=y_train,
    y_pred=predicted_train_y
))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        33
           1       1.00      1.00      1.00        37

    accuracy                           1.00        70
   macro avg       1.00      1.00      1.00        70
weighted avg       1.00      1.00      1.00        70



In [12]:
confusion_matrix(y_true=y_test, y_pred=predicted_test_y)

array([[17,  0],
       [ 0, 13]], dtype=int64)

In [14]:
print(classification_report(
    y_true=y_test,
    y_pred=predicted_test_y
))


              precision    recall  f1-score   support

           0       1.00      1.00      1.00        17
           1       1.00      1.00      1.00        13

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [7]:
from sklearn.model_selection import GridSearchCV
knn_classifier = KNeighborsClassifier()
parameters={
    "n_neighbors": [1, 2, 3, 4, 50],
}
clf = GridSearchCV(estimator=knn_classifier, param_grid=parameters, cv=5, n_jobs=-1, scoring="f1")

In [8]:
KNN_TUNED_MODEL = clf.fit(x_train, y_train)

In [9]:
KNN_TUNED_MODEL.best_params_

{'n_neighbors': 1}

In [10]:
KNN_TUNED_MODEL.best_score_

1.0

In [11]:
pred_y = KNN_TUNED_MODEL.best_estimator_.predict(x_train)

In [12]:
print(classification_report(y_train, pred_y))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        33
           1       1.00      1.00      1.00        37

    accuracy                           1.00        70
   macro avg       1.00      1.00      1.00        70
weighted avg       1.00      1.00      1.00        70



In [13]:
pred_y_test = KNN_TUNED_MODEL.best_estimator_.predict(x_test)

In [14]:
print(classification_report(y_test, pred_y_test))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        17
           1       1.00      1.00      1.00        13

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30



In [15]:
from sklearn.model_selection import RandomizedSearchCV
knn_classifier = KNeighborsClassifier()
parameters={
    "n_neighbors": [1, 2, 3, 4, 50],
}
clf = RandomizedSearchCV(
    estimator=knn_classifier, param_distributions=parameters, cv=5, n_jobs=-1, scoring="f1", n_iter=2
)

In [17]:
KNN_RTUNED_MODEL = clf.fit(x_train, y_train)

In [18]:
KNN_RTUNED_MODEL.best_params_

{'n_neighbors': 50}

In [21]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import GridSearchCV

In [23]:
dt_classifier = DecisionTreeClassifier(random_state=0)

In [26]:
parameters = {
    "criterion": ["gini", "entropy"],
    "max_depth": [2, 3, 4, 5],
}
clf = GridSearchCV(estimator=dt_classifier, param_grid=parameters, cv=5, n_jobs=-1, scoring="f1")

In [29]:
dt_model = clf.fit(x_train, y_train)

In [31]:
dt_model.best_estimator_

In [32]:
dt_model.best_params_

{'criterion': 'gini', 'max_depth': 2}

In [33]:
pred_y = dt_model.best_estimator_.predict(x_train)

In [34]:
print(classification_report(y_train, pred_y))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        33
           1       1.00      1.00      1.00        37

    accuracy                           1.00        70
   macro avg       1.00      1.00      1.00        70
weighted avg       1.00      1.00      1.00        70



In [35]:
pred_y_test = dt_model.best_estimator_.predict(x_test)

In [36]:
print(classification_report(y_test, pred_y_test))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        17
           1       1.00      1.00      1.00        13

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

