In [None]:
import time

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

from sklearn.neural_network import MLPClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.gaussian_process.kernels import RBF
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis

In [None]:
classifier_names = ["Nearest Neighbors", "Linear SVM", "RBF SVM", "Gaussian Process",
         "Decision Tree", "Random Forest", "Neural Net", "AdaBoost",
         "Naive Bayes", "QDA"]

classifiers = [
    KNeighborsClassifier(3),
    SVC(kernel="linear", C=0.025),
    SVC(gamma=2, C=1),
    GaussianProcessClassifier(1.0 * RBF(1.0)),
    DecisionTreeClassifier(max_depth=5),
    RandomForestClassifier(max_depth=5, n_estimators=10, max_features=1),
    MLPClassifier(alpha=1),
    AdaBoostClassifier(),
    GaussianNB(),
    QuadraticDiscriminantAnalysis()]

In [None]:
def run_classification(clf, clf_name, X_train, X_test, y_train, y_test):
    clf.fit(X_train, y_train)
    accuracy = clf.score(X_test, y_test)
    print("{} classifier, accuracy : {}".format(clf_name, accuracy))

In [None]:
df = pd.read_csv("breast-cancer-wisconsin.data")
df.replace('?',-99999, inplace=True)
df.drop(['id'], axis=1, inplace=True)

In [None]:
X = np.array(df.drop(['class'], axis=1))
y = np.array(df['class'])

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [None]:
for clf, clf_name in zip(classifiers, classifier_names):
    run_classification(clf, clf_name, X_train, X_test, y_train, y_test)