In [0]:
import pandas as pd
import numpy as np

from sklearn import preprocessing

from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier, RadiusNeighborsClassifier
from sklearn.metrics import accuracy_score

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.model_selection import GridSearchCV

import matplotlib.pyplot as plt

In [0]:
df = pd.read_csv('mushrooms.csv')


# One Hot Encoding - with 'odor' feature

In [52]:
x_o = pd.get_dummies(df.drop(['veil-type', 'class'],axis=1))
a = pd.get_dummies(df['class'])
y_o = a.drop(['p'],axis=1).to_numpy().ravel()
print(y)

[0 1 1 ... 1 0 1]


# Split Train - Test - with 'odor' feature

In [0]:
x_train_o, x_test_o, y_train_o, y_test_o = train_test_split(x_o, y_o, test_size=0.33, random_state=0)

# Looking for the best parameters - with 'odor' feature

In [55]:
params = {"n_neighbors": range(2, 101),
          "p": range(1,2),
          "weights": ["uniform", "distance"]}

knn = KNeighborsClassifier()

knn_classifier_search = GridSearchCV(knn, params, cv=5)
knn_classifier_search.fit(x_train_o, y_train_o)
knn_classifier_search.best_params_

{'n_neighbors': 2, 'p': 1, 'weights': 'uniform'}

In [56]:
best_knn_classifier = \
    KNeighborsClassifier(n_neighbors=knn_classifier_search.best_params_["n_neighbors"],
                           p=knn_classifier_search.best_params_["p"],
                           weights=knn_classifier_search.best_params_["weights"])
best_knn_classifier.fit(x_train_o, y_train_o)
print(best_knn_classifier.score(x_test_o, y_test_o))
y_pred_o = best_knn_classifier.predict(x_test_o)
print(y_pred_o)
print(accuracy_score(y_test_o,y_pred_o))

1.0
[0 1 1 ... 0 1 0]
1.0


In [67]:
best_score_r = 0.0
best_params_r = {}

for i in range(1, 51):
    r = i / 20
    for w in ["uniform", "distance"]:
        for p in [1, 2]:
            classifier = RadiusNeighborsClassifier(radius=r, p=p, weights=w)
            try:
                scores = cross_val_score(classifier, x_train_o, y_train_o, cv=5)
                score = scores.mean()
            except ValueError:
                score = 0.0
            if score > best_score_r:
                best_score_r = score
                best_params_r["radius"] = r
                best_params_r["weights"] = w
                best_params_r["p"] = p
print(best_score_r)
print(best_params_r)

1.0
{'radius': 2.0, 'weights': 'uniform', 'p': 2}


In [68]:
classifier_r = RadiusNeighborsClassifier(radius=best_params_r["radius"],
                                       p=best_params_r["p"],
                                       weights=best_params_r["weights"])
classifier_r.fit(x_train_o, y_train_o)
y_predr_o = classifier_r.predict(x_test_o)
print(y_predr_o)
print(accuracy_score(y_test_o, y_predr_o))

[0 1 1 ... 0 1 0]
1.0


# One Hot Encoding - without 'odor' feature

In [58]:
x = pd.get_dummies(df.drop(['veil-type', 'class', 'odor'],axis=1))
a = pd.get_dummies(df['class'])
y = a.drop(['p'],axis=1).to_numpy().ravel()
print(y)

[0 1 1 ... 1 0 1]


# Split Train - Test - without 'odor' feature

In [0]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=0)

# Looking for the best parameters - without 'odor' feature

In [60]:
params = {"n_neighbors": range(2, 101),
          "p": range(1,2),
          "weights": ["uniform", "distance"]}

knn = KNeighborsClassifier()

knn_classifier_search = GridSearchCV(knn, params, cv=5)
knn_classifier_search.fit(x_train, y_train)
knn_classifier_search.best_params_

{'n_neighbors': 2, 'p': 1, 'weights': 'uniform'}

In [62]:
best_knn_classifier = \
    KNeighborsClassifier(n_neighbors=knn_classifier_search.best_params_["n_neighbors"],
                           p=knn_classifier_search.best_params_["p"],
                           weights=knn_classifier_search.best_params_["weights"])
best_knn_classifier.fit(x_train, y_train)
print(best_knn_classifier.score(x_test, y_test))
y_pred = best_knn_classifier.predict(x_test)
print(y_pred)
print(accuracy_score(y_test,y_pred))

1.0
[0 1 1 ... 0 1 0]
1.0


In [69]:
best_score_r = 0.0
best_params_r = {}

for i in range(1, 51):
    r = i / 20
    for w in ["uniform", "distance"]:
        for p in [1, 2]:
            classifier = RadiusNeighborsClassifier(radius=r, p=p, weights=w)
            try:
                scores = cross_val_score(classifier, x_train, y_train, cv=5)
                score = scores.mean()
            except ValueError:
                score = 0.0
            if score > best_score_r:
                best_score_r = score
                best_params_r["radius"] = r
                best_params_r["weights"] = w
                best_params_r["p"] = p
print(best_score_r)
print(best_params_r)

1.0
{'radius': 2.0, 'weights': 'uniform', 'p': 2}


In [71]:
classifier_r = RadiusNeighborsClassifier(radius=best_params_r["radius"],
                                       p=best_params_r["p"],
                                       weights=best_params_r["weights"])
classifier_r.fit(x_train, y_train)
y_predr = classifier_r.predict(x_test)
print(y_predr)
print(accuracy_score(y_test, y_predr))

[0 1 1 ... 0 1 0]
1.0
