In [1]:
from sklearn.tree import DecisionTreeClassifier, export_graphviz
from sklearn import tree

from sklearn.datasets import load_wine, load_digits, load_iris

from sklearn.preprocessing import StandardScaler, normalize
from sklearn.decomposition import PCA

from sklearn.manifold import TSNE

from sklearn.model_selection import train_test_split

from IPython.display import SVG, display
from graphviz import Source
from IPython.display import display                               
from ipywidgets import interactive

import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap

import numpy as np

In [2]:
# class labels
# labels = data.feature_names
def plot_tree(dataset, scaling ,crit, split, depth, min_split, min_leaf=0.2):

    #parameters:
    n_classes = 2
#     max_estimators = 30
    cmap = plt.cm.RdYlBu
    plot_step = 0.02  # fine step width for decision surface contours
    plot_step_coarser = 0.5  # step widths for coarse classifier guesses
    RANDOM_SEED = 137  # fix the seed on each iteration
    
# load dataset
    if (dataset == 'wine'):
        data = load_wine()
    elif (dataset == 'mnist'):
        data = load_digits()
    elif (dataset == 'iris'):
        data = load_iris()
        
    # feature matrix
    X = data.data

    # target vector
    y = data.target
    
    if scaling is 'StandardScaler':
        # Standardize the feature matrix
        features = StandardScaler().fit_transform(X)
        
    elif scaling is 'normalize':
        features = normalize(X, norm='l2')
        
    elif scaling is 'false':
        features = X
        
    # Create a PCA that will retain 99% of variance
    pca = PCA(n_components=n_classes, whiten=True)

    # Conduct PCA
    X = pca.fit_transform(features)
    #test train split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RANDOM_SEED)
    
    estimator = DecisionTreeClassifier(random_state = 0 
      , criterion = crit
      , splitter = split
      , max_depth = depth
      , min_samples_split=min_split
      , min_samples_leaf=min_leaf)
    estimator.fit(X_train, y_train)

    scores = estimator.score(X_test, y_test)

    # Create a title for each column and the console by using str() and
    # slicing away useless parts of the string
    model_title = str(type(estimator)).split(
        ".")[-1][:-2][:-len("Classifier")]

    print("Score:", scores)

    #plotting the grid
    fig = plt.figure()
    ax = fig.add_subplot(1,1,1)

    # Now plot the decision boundary using a fine mesh as input to a
    # filled contour plot
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    
    xx, yy = np.meshgrid(np.arange(x_min, x_max, plot_step),
                         np.arange(y_min, y_max, plot_step))

    Z = estimator.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)
    cs = plt.contourf(xx, yy, Z, cmap=cmap)

    # Plot the training points, these are clustered together and have a
    # black outline
    plt.scatter(X[:, 0], X[:, 1], c=y,
#                 cmap=ListedColormap(N=10), 
                cmap=cmap, #ListedColormap(['r', 'g', 'b', 'c', 'm', 'y', 'k', 'w']), 
                edgecolor='k', s=20)
    
    plt.show()
    
    graph = Source(tree.export_graphviz(estimator
      , out_file=None
#       , feature_names=labels
      , class_names=True #['0', '1', '2']
      , filled = True))
   
    display(SVG(graph.pipe(format='svg')))
    
    return estimator

In [3]:
inter=interactive(plot_tree 
   , crit = ["gini", "entropy"]
   , scaling = ["normalize", "StandardScaler","false"]
   , dataset = ["wine", "iris", "mnist"]
   , split = ["best", "random"]
   , depth=[1,2,3,4,8]
   , min_split=(0.1,1)
   , min_leaf=(0.1,0.5))

display(inter)

interactive(children=(Dropdown(description='dataset', options=('wine', 'iris', 'mnist'), value='wine'), Dropdo…

In [4]:
data = load_wine()
data.target.max()

2