In [2]:
# Imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

# Sklearn Methoden
from sklearn.datasets import load_wine
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

# CrossValidation mit KFold und GridSearch
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import cross_val_predict
from sklearn.model_selection import GridSearchCV

In [3]:
# Dataset for Classification
dataset = load_wine()
x = dataset.data
y = dataset.target

print(x.shape, y.shape)

(178, 13) (178,)


In [4]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

## KNeighborsClassifier

In [10]:
parameters = {'n_neighbors': [3, 5, 7, 9, 11], 
              'weights': ['uniform', 'distance'], 
              'p': [1, 2]}
neigh = KNeighborsClassifier()

clf = GridSearchCV(neigh, parameters, cv=3)
clf.fit(x_train, y_train)

print("Best params:")
print(clf.best_params_)
print(clf.best_score_)

Best params:
{'n_neighbors': 5, 'p': 1, 'weights': 'distance'}
0.7419354838709677




In [9]:
neigh = KNeighborsClassifier(n_neighbors=5, p=1, weights="distance")
neigh.fit(x_train, y_train)
score = neigh.score(x_test, y_test)
print("Test score: ", score)

Test score:  0.7777777777777778


## DecisionTreeClassifier

In [30]:
parameters = {"criterion": ["gini", "entropy"],
              "max_depth": [None, 3, 5, 7, 8, 9, 10, 11, 12]}
dec_tree = DecisionTreeClassifier()

clf = GridSearchCV(dec_tree, parameters, cv=3)
clf.fit(x_train, y_train)

print("Best params:")
print(clf.best_params_)
print(clf.best_score_)

Best params:
{'criterion': 'gini', 'max_depth': 8}
0.9193548387096774




In [31]:
dt = DecisionTreeClassifier(criterion="gini", max_depth=11, max_features="auto")
dt.fit(x_train, y_train)
score = dt.score(x_test, y_test)
print("Test score: ", score)

Test score:  0.9074074074074074


## RandomForestClassifier

In [32]:
from sklearn.ensemble import RandomForestClassifier

parameters = {"n_estimators": range(2, 20),
            "criterion": ["gini", "entropy"]}
random_forest = RandomForestClassifier()

clf = GridSearchCV(random_forest, parameters, cv=3, n_jobs=-1)
clf.fit(x_train, y_train)

print("Best params:")
print(clf.best_params_)
print(clf.best_score_)

Best params:
{'criterion': 'gini', 'n_estimators': 12}
0.9758064516129032




In [33]:
random_forest = RandomForestClassifier(criterion="gini", n_estimators=12)
random_forest.fit(x_train, y_train)
score = random_forest.score(x_test, y_test)
print("Test score: ", score)

Test score:  0.9814814814814815
