In [0]:
# Colab drive mount
from google.colab import drive
drive.mount('/MyDrive')

In [0]:
!pip install graphviz

In [0]:
from sklearn.datasets import load_iris
from sklearn import tree
iris = load_iris()
X, y = load_iris(return_X_y=True)
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X, y)

In [0]:
tree.plot_tree(clf) 


In [0]:
import graphviz
dot_data = tree.export_graphviz(clf, out_file=None, 
                     feature_names=iris.feature_names,  
                     class_names=iris.target_names,  
                     filled=True, rounded=True,  
                     special_characters=True)  
graph = graphviz.Source(dot_data)  
graph 


In [0]:
from sklearn import tree
X = [[0,0],[1,1]]
Y = [0,1]
clf = tree.DecisionTreeClassifier()
clf = clf.fit(X, Y) # X, Y dataset을 fit에 입력하면 학습 완료

In [0]:
clf.predict([[2.,2.]])

In [0]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

iris = datasets.load_iris()
X = iris.data[:, [2, 3]]
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0) # train dataset, test dataset 분리

# 데이터 표준화
sc = StandardScaler() 
sc.fit(X_train)

# 표준화된 dataset
X_train_std = sc.transform(X_train) 
X_test_std = sc.transform(X_test)

In [0]:
iris_tree = tree.DecisionTreeClassifier(criterion='gini', max_depth=3, random_state=0)
iris_tree.fit(X_train, y_train) # fit을 통해 train dataset 학습

In [0]:
from sklearn.metrics import accuracy_score

y_pred_tr = iris_tree.predict(X_test)
print('Accuracy: %.2f' % accuracy_score(y_test, y_pred_tr))

In [0]:
from sklearn.tree import export_graphviz
import pydotplus
from IPython.display import Image

dot_data = export_graphviz(iris_tree, out_file=None, feature_names=['petal length','petal width'],
                           class_names=iris.target_names, filled=True, rounded=True, special_characters=True)
graph = pydotplus.graph_from_dot_data(dot_data)
Image(graph.create_png())

In [0]:
from sklearn.datasets import load_wine
wine = load_wine()
key = wine.keys()

In [0]:
from sklearn.datasets import load_wine
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

wine = load_wine()

x_train, x_test, y_train, y_test = train_test_split(wine.data, wine.target, stratify=wine.target, random_state=0)

tree = DecisionTreeClassifier(random_state=0)
tree.fit(x_train, y_train)

score_tr = tree.score(x_train, y_train)
score_te = tree.score(x_test, y_test)

print('{:.3f}'.format(score_tr))
print('{:.3f}'.format(score_te))

In [0]:
tree = DecisionTreeClassifier(max_depth=2, random_state=0)
tree.fit(x_train, y_train)

score_tr = tree.score(x_train, y_train)
score_te = tree.score(x_test, y_test)

print('{:.3f}'.format(score_tr))
print('{:.3f}'.format(score_te))

In [0]:
import graphviz
from sklearn.tree import export_graphviz

export_graphviz(tree, out_file='tree.dot', class_names=wine.target_names, feature_names=wine.feature_names, impurity=False, filled=True)

with open('tree.dot') as file_reader:
  dot_graph = file_reader.read()

dot = graphviz.Source(dot_graph)
dot.render(filename='tree.png')

In [0]:
feature_imp = tree.feature_importances_
print('{}'.format(feature_imp))

In [0]:
import numpy as np
import matplotlib.pyplot as plt

n_feature = wine.data.shape[1]
idx = np.arange(n_feature)

plt.barh(idx, feature_imp, align='center')
plt.yticks(idx, wine.feature_names)
plt.xlabel('feature importance',size=15)
plt.ylabel('feature',size=15)
plt.show()