In [16]:
import pandas as pd
import numpy as np

from sklearn import preprocessing

from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier, RadiusNeighborsClassifier
from sklearn.metrics import accuracy_score

from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.model_selection import GridSearchCV

import matplotlib.pyplot as plt

In [2]:
df = pd.read_csv('../mushrooms.csv')


# One Hot Encoding - with 'odor' feature

In [3]:
x = pd.get_dummies(df.drop(['veil-type', 'class'],axis=1))
y = (df['class']).to_numpy()

In [12]:
x = pd.get_dummies(df.drop(['veil-type', 'class'],axis=1))
a = pd.get_dummies(df['class'])
y = a.drop(['p'],axis=1).to_numpy().ravel()
print(y)

[0 1 1 ... 1 0 1]


# Split Train - Test - with 'odor' feature

In [13]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=0)

# Looking for the best parameters - with 'odor' feature

In [20]:
params = {"n_neighbors": range(2, 11),
          "p": range(1,2),
          "weights": ["uniform", "distance"]}

knn = KNeighborsClassifier()

rf_classifier_search = GridSearchCV(knn, params, cv=5)
rf_classifier_search.fit(x_train, y_train)
rf_classifier_search.best_params_

KeyboardInterrupt: 

In [14]:
best_score = 0.0
best_params = {}

for neighbors in range(1, 101):
    for w in ["uniform", "distance"]:
        for p in [1, 2]:
            classifier = KNeighborsClassifier(n_neighbors=neighbors, p=p, weights=w)
            scores = cross_val_score(classifier, x_train, y_train, cv=5)
            score = scores.mean()
            if score > best_score:
                best_score = score
                best_params["neighbors"] = neighbors
                best_params["weights"] = w
                best_params["p"] = p
print(best_score)
print(best_params)

KeyboardInterrupt: 

In [None]:
classifier = KNeighborsClassifier(n_neighbors=best_params["neighbors"],
                                  p=best_params["p"],
                                  weights=best_params["weights"])
classifier.fit(x_train, y_train)
preds = classifier.predict(x_test)
accuracy_score(y_test, preds)

In [None]:
best_score_r = 0.0
best_params_r = {}

for i in range(1, 201):
    r = i / 20
    for w in ["uniform", "distance"]:
        for p in [1, 2]:
            classifier = RadiusNeighborsClassifier(radius=r, p=p, weights=w)
            try:
                scores = cross_val_score(classifier, x_train, y_train, cv=5)
                score = scores.mean()
            except ValueError:
                score = 0.0
            if score > best_score_r:
                best_score_r = score
                best_params_r["radius"] = r
                best_params_r["weights"] = w
                best_params_r["p"] = p
print(best_score_r)
print(best_params_r)

In [None]:
classifier_r = RadiusNeighborsClassifier(radius=best_params_r["radius"],
                                       p=best_params_r["p"],
                                       weights=best_params_r["weights"])
classifier_r.fit(x_train, y_train)
preds = classifier_r.predict(x_test)
accuracy_score(y_test, preds)

# One Hot Encoding - without 'odor' feature

In [None]:
x = pd.get_dummies(df.drop(['veil-type', 'class', 'odor'],axis=1))
y = (df['class']).to_numpy()

# Split Train - Test - without 'odor' feature

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33, random_state=0)

# Looking for the best parameters - without 'odor' feature

In [None]:
best_score = 0.0
best_params = {}

for neighbors in range(1, 101):
    for w in ["uniform", "distance"]:
        for p in [1, 2]:
            classifier = KNeighborsClassifier(n_neighbors=neighbors, p=p, weights=w)
            scores = cross_val_score(classifier, x_train, y_train, cv=5)
            score = scores.mean()
            if score > best_score:
                best_score = score
                best_params["neighbors"] = neighbors
                best_params["weights"] = w
                best_params["p"] = p
print(best_score)
print(best_params)

In [None]:
classifier = KNeighborsClassifier(n_neighbors=best_params["neighbors"],
                                  p=best_params["p"],
                                  weights=best_params["weights"])
classifier.fit(x_train, y_train)
preds = classifier.predict(x_test)
accuracy_score(y_test, preds)

In [None]:
best_score_r = 0.0
best_params_r = {}

for i in range(1, 201):
    r = i / 20
    for w in ["uniform", "distance"]:
        for p in [1, 2]:
            classifier = RadiusNeighborsClassifier(radius=r, p=p, weights=w)
            try:
                scores = cross_val_score(classifier, x_train, y_train, cv=5)
                score = scores.mean()
            except ValueError:
                score = 0.0
            if score > best_score_r:
                best_score_r = score
                best_params_r["radius"] = r
                best_params_r["weights"] = w
                best_params_r["p"] = p
print(best_score_r)
print(best_params_r)

In [None]:
classifier_r = RadiusNeighborsClassifier(radius=best_params_r["radius"],
                                       p=best_params_r["p"],
                                       weights=best_params_r["weights"])
classifier_r.fit(x_train, y_train)
preds = classifier_r.predict(x_test)
accuracy_score(y_test, preds