In [6]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import mglearn
import graphviz
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import LinearSVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import export_graphviz

%matplotlib inline

cancer = load_breast_cancer()

X_train, X_test, y_train, y_test = train_test_split(
    cancer.data,
    cancer.target,
    stratify=cancer.target,
    random_state=42
)

def kNeighbors():
    training_accuracy = []
    test_accuracy = []

    neighbors_settings = range(1, 11)

    for n_neighbors in neighbors_settings:
        clf = KNeighborsClassifier(n_neighbors=n_neighbors)
        clf.fit(X_train, y_train)
        training_accuracy.append(clf.score(X_train, y_train))
        test_accuracy.append(clf.score(X_test, y_test))

    plt.plot(neighbors_settings, training_accuracy, label='training dataset accuracy')
    plt.plot(neighbors_settings, test_accuracy, label='test dataset accuracy')
    plt.ylabel('accuracy')
    plt.xlabel('neighbors count')
    plt.legend()

def logReg():
    logreg1 = LogisticRegression(C=1, penalty='l1').fit(X_train, y_train)
    logreg001 = LogisticRegression(C=0.01, penalty='l1').fit(X_train, y_train)
    logreg100 = LogisticRegression(C=100, penalty='l1').fit(X_train, y_train)
    
    print(logreg001.score(X_train, y_train), ' ', logreg1.score(X_train, y_train), ' ', logreg100.score(X_train, y_train))
    print(logreg001.score(X_test, y_test), ' ', logreg1.score(X_test, y_test), ' ', logreg100.score(X_test, y_test))
    
    plt.plot(logreg001.coef_.T, 'o', label="C=001")
    plt.plot(logreg1.coef_.T, '^', label="C=1")
    plt.plot(logreg100.coef_.T, 'x', label="C=100")
    plt.xticks(range(cancer.data.shape[1]), cancer.feature_names, rotation=90)
#     plt.hlines(0, 0, cancer.data.shape[1])
#     plt.ylim(-5, 5)
    plt.legend()
    
def decisionTree():
    tree = DecisionTreeClassifier(random_state=0, max_depth=4)
    tree.fit(X_train, y_train)
    
    print(tree.score(X_train, y_train))
    print(tree.score(X_test, y_test))
    
#     with open('tree.dot') as f:
#         dot_graph = f.read()
    
    graphviz.Source(export_graphviz(
        tree, 
        out_file=None, 
        class_names=['bad', 'good'], 
        feature_names=cancer.feature_names
    )).view()
    
    
# kNeighbors()
# logReg()
decisionTree()

0.9882629107981221
0.951048951048951
