In [1]:
# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Sklearn Methoden
from sklearn.datasets import load_wine
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# CrossValidation mit KFold und GridSearch
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import GridSearchCV

In [2]:
# Dataset for Classification
dataset = load_wine()
x = dataset.data
y = dataset.target

print(x.shape, y.shape)

(178, 13) (178,)


In [3]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

## KNeighborsClassifier

In [4]:
parameters = {'n_neighbors': [3, 5, 7, 9, 11], 
              'weights': ['uniform', 'distance'], 
              'p': [1, 2]}
neigh = KNeighborsClassifier()

clf = GridSearchCV(neigh, parameters, cv=3)
clf.fit(x_train, y_train)

print("Best params:")
print(clf.best_params_)
print(clf.best_score_)

Best params:
{'n_neighbors': 5, 'p': 1, 'weights': 'distance'}
0.7500967866821525


In [5]:
neigh = KNeighborsClassifier(n_neighbors=5, p=1, weights="distance")
neigh.fit(x_train, y_train)
score = neigh.score(x_test, y_test)
print("Test score: ", score)

Test score:  0.7777777777777778


## DecisionTreeClassifier

In [6]:
parameters = {"criterion": ["gini", "entropy"], 
              "splitter": ["best", "random"], 
              "max_depth": [None, 3, 5, 7, 8, 9, 10, 11, 12],
             "max_features": [None, "auto", "sqrt", "log2"]}
dec_tree = DecisionTreeClassifier()

clf = GridSearchCV(dec_tree, parameters, cv=3)
clf.fit(x_train, y_train)

print("Best params:")
print(clf.best_params_)
print(clf.best_score_)

Best params:
{'criterion': 'entropy', 'max_depth': 12, 'max_features': 'auto', 'splitter': 'best'}
0.9355400696864112


In [7]:
dt = DecisionTreeClassifier(criterion="gini", max_depth=7, max_features=None, splitter="random")
dt.fit(x_train, y_train)
score = dt.score(x_test, y_test)
print("Test score: ", score)

Test score:  0.9259259259259259


## RandomForestClassifier

In [8]:
from sklearn.ensemble import RandomForestClassifier

parameters = {"n_estimators": [10, 20, 40, 80, 100, 120], 
              "criterion": ["gini", "entropy"], 
              "max_depth": [None, 3, 5, 7, 8, 9, 10, 11, 12],
             "max_features": [None, "auto", "sqrt", "log2"]}

random_forest = RandomForestClassifier()

clf = GridSearchCV(random_forest, parameters, cv=3, n_jobs=-1)
clf.fit(x_train, y_train)

print("Best params:")
print(clf.best_params_)
print(clf.best_score_)

Best params:
{'criterion': 'gini', 'max_depth': 8, 'max_features': 'sqrt', 'n_estimators': 20}
0.983933410762679


In [11]:
dt = RandomForestClassifier(
    criterion="gini",
    max_depth=8,
    max_features='sqrt',
    n_estimators=20
)
dt.fit(x_train, y_train)
score = dt.score(x_test, y_test)
print("Test score: ", score)

Test score:  1.0
