In [None]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris 

iris = load_iris()
X, y = iris.data, iris.target

print('X:\n', X[:5, :])
print('y:\n', y[:5])

In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=123)
print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)

In [None]:
import matplotlib.pyplot as plt

np.random.seed(223)

X0 = np.random.uniform(size=(100, 2))
y0 = np.repeat(0, 100)
X1 = np.random.uniform(-1.0, 0.0, size=(100, 2))
y1 = np.repeat(1, 100)

fig, ax = plt.subplots()
ax.scatter(X0[:, 0], X0[:, 1], marker='o', label='class 0')
ax.scatter(X1[:, 0], X1[:, 1], marker='x', label='class 1')

ax.set_xlabel('x')
ax.set_ylabel('y')
ax.legend()

plt.show()

In [None]:
from sklearn.svm import SVC

def plot_boundary_margin_sv(X0, y0, X1, y1, kernel, C, xmin=-1, xmax=1, ymin=-1, ymax=1):
    # サポートベクタマシンのインスタンス化
    svc = SVC(kernel=kernel, C=C)
    # 学習
    svc.fit(np.vstack((X0, X1)), np.hstack((y0, y1)))

    fig, ax = plt.subplots()
    ax.scatter(X0[:, 0], X0[:, 1], marker='o', label='class 0')
    ax.scatter(X1[:, 0], X1[:, 1], marker='x', label='class 1')

    xx, yy = np.meshgrid(np.linspace(xmin, xmax, 100), np.linspace(ymin, ymax, 100))
    xy = np.vstack([xx.ravel(), yy.ravel()]).T
    p = svc.decision_function(xy).reshape((100,100))
    ax.contour(xx, yy, p, colors='k', levels=[-1, 0, 1],
               alpha=0.5, linestyles=['--', '-', '--'])
    ax.scatter(svc.support_vectors_[:, 0], svc.support_vectors_[:, 1],
               s=250, facecolors='none', edgecolors='black')

    ax.set_xlabel('x')
    ax.set_ylabel('y')
    ax.legend()

    plt.show()

plot_boundary_margin_sv(X0, y0, X1, y1, kernel='linear', C=1e6)

In [None]:
plot_boundary_margin_sv(X0, y0, X1, y1, kernel='linear', C=0.1)

In [None]:
np.random.seed(123)

X = np.random.random(size=(100, 2))
y = (X[:, 1] > 2*(X[:,0]-0.5)**2 + 0.5).astype(int)

fig, ax = plt.subplots()
ax.scatter(X[y==0, 0], X[y==0, 1], marker='x', label='class 0')
ax.scatter(X[y==1, 0], X[y==1, 1], marker='o', label='class 1')

ax.legend()
plt.show()

In [None]:
X0, X1 = X[y==0, :], X[y==1, :]
y0, y1 = y[y==0], y[y==1]

plot_boundary_margin_sv(X0, y0, X1, y1, kernel='rbf', C=1e3, xmin=0, ymin=0)

In [None]:
from sklearn.tree import DecisionTreeClassifier

tree = DecisionTreeClassifier(max_depth=3)
tree.fit(X_train, y_train)

In [None]:
from pydotplus import graph_from_dot_data
from sklearn.tree import export_graphviz
# from IPython.display import Image

dot_data = export_graphviz(tree, filled=True, rounded=True,
                           class_names=['Setosa', 'Versicolor', 'Virginica'],
                           feature_names=['Sepal Length', 'Sepal Width', 'Petal Length', 'Petal Width'],
                           out_file=None)
graph = graph_from_dot_data(dot_data)
# graph.progs = {'dot': u'C:\\Program Files\\Graphviz 2.44.1\\bin\\dot.exe'}
# Image(graph.create_png())
graph.write_png('tree.png')


In [None]:
y_pred = tree.predict(X_test)
y_pred

In [None]:
from sklearn.ensemble import RandomForestClassifier

forest = RandomForestClassifier(n_estimators=100, random_state=123)

forest.fit(X_train, y_train)
y_pred= forest.predict(X_test)
y_pred