In [None]:
import sklearn.datasets
from sklearn import neighbors, tree
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score, roc_auc_score, precision_score, recall_score, f1_score
from sklearn.metrics import confusion_matrix, classification_report
import matplotlib.pyplot as plt 
from matplotlib.colors import ListedColormap
import numpy as np
import itertools as it

In [None]:
def draw_confusion_matrix(y_test, y_pred):
    cm = confusion_matrix(y_test, y_pred)
    plt.matshow(cm)
    plt.colorbar()
    plt.show()

In [None]:
def draw_scatter(data, labels, x_feature, y_feature):
    plt.scatter(data[:, x_feature], data[:, y_feature], c=labels, alpha=0.5, s=100)
    plt.xlabel(iris.feature_names[x_feature]), plt.ylabel(iris.feature_names[y_feature])
    plt.show()

In [None]:
# Loading example dataset: IRIS dataset
iris = sklearn.datasets.load_iris()

In [None]:
for i, j in it.combinations(enumerate(iris.feature_names), 2):
    print (i,j)
    draw_scatter(iris.data, iris.target, i[0], j[0])

In [None]:
# Instantiate Decision Tree classifier
# clf = tree.DecisionTreeClassifier()

In [None]:
# Instantiate K-NN Classifier
clf = neighbors.KNeighborsClassifier(n_neighbors=1)

In [None]:
clf

In [None]:
print (iris.feature_names)
iris.data
print (iris.target_names)
iris.target

In [None]:
# K-fold construction
kf = KFold(n_splits=5, shuffle=True) 

# K-fold cross validation and performance evaluation
for train_index, test_index in kf.split(iris.data):
    # draw_scatter(X_test, x_feature=2, y_feature=3)
    print("TRAIN:", len(train_index), "TEST:", len(test_index))
    X_train, X_test = iris.data[train_index], iris.data[test_index]
    y_train, y_test = iris.target[train_index], iris.target[test_index]
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print ('Prediction: ', y_pred)
    print ('Correct:    ', y_test)
    print ('Accuracy:', accuracy_score(y_pred, y_test))
    print (classification_report(y_test, y_pred, target_names=iris.target_names))
    # draw_confusion_matrix(y_test, y_pred)


In [None]:
# Sources:
# http://www.astro.washington.edu/users/vanderplas/Astr599/notebooks/18_IntermediateSklearn
# http://matplotlib.org/examples/pylab_examples/scatter_star_poly.html
# http://scikit-learn.org/stable/auto_examples/cluster/plot_cluster_iris.html
# http://scikit-learn.org/stable/auto_examples/plot_confusion_matrix.html#example-plot-confusion-matrix-py
# https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.KFold.html
# http://scikit-learn.org/stable/tutorial/statistical_inference/supervised_learning.html