# 결정 트리 기법

~~~
import sklearn
dummy_clf = DummyClassifier()
dummy_clf.fit(X_train, y_train)
y_new = dummy_clf.predict(X_new)
~~~

1. `sklearn` 모듈을 import
2. `DummyClassifier()`라는 객체(클래스)를 통해 `dummy_clf`라는 인스턴스 생성
3. `dummy_clf`의 메서드인 `fit()` 함수를 통해 X와 y에 대한 학습 진행
4. `dummy_clf`의 메서드인 `predict()` 함수를 통해 새로운 X를 통해 y에 대한 예측

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.tree import export_graphviz
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import StratifiedShuffleSplit

In [2]:
model_data = pd.read_csv("data.csv")

In [3]:
model_data

Unnamed: 0,krw,vko,wti,cboe,label_ko
0,1906.62,14.98,105.30,12.72,neutral
1,1878.33,15.35,104.37,12.98,up
2,1883.97,14.96,103.40,12.73,up
3,1880.71,14.38,105.97,13.41,up
4,1884.83,14.72,106.11,12.81,up
...,...,...,...,...,...
1700,2167.90,24.83,56.74,34.12,up
1701,2150.25,23.63,55.59,31.44,up
1702,2186.06,24.45,54.19,27.98,up
1703,2183.61,23.63,53.14,27.57,up


In [4]:
X = model_data.iloc[:, :4]

In [5]:
X

Unnamed: 0,krw,vko,wti,cboe
0,1906.62,14.98,105.30,12.72
1,1878.33,15.35,104.37,12.98
2,1883.97,14.96,103.40,12.73
3,1880.71,14.38,105.97,13.41
4,1884.83,14.72,106.11,12.81
...,...,...,...,...
1700,2167.90,24.83,56.74,34.12
1701,2150.25,23.63,55.59,31.44
1702,2186.06,24.45,54.19,27.98
1703,2183.61,23.63,53.14,27.57


In [6]:
X_names = X.columns

In [7]:
y = model_data["label_ko"]

In [8]:
y

0       neutral
1            up
2            up
3            up
4            up
         ...   
1700         up
1701         up
1702         up
1703         up
1704         up
Name: label_ko, Length: 1705, dtype: object

In [9]:
X = X[y.notna()]

In [10]:
y = y[y.notna()]

In [11]:
sss = StratifiedShuffleSplit(n_splits=1, test_size=0.2, random_state=0)

In [12]:
sss

StratifiedShuffleSplit(n_splits=1, random_state=0, test_size=0.2,
            train_size=None)

In [13]:
for train_index, test_index in sss.split(X, y):
    X_train, X_test = X.iloc[train_index], X.iloc[test_index,]
    y_train, y_test = y.iloc[train_index], X.iloc[test_index]

In [14]:
tree_clf = DecisionTreeClassifier(max_leaf_nodes=6)

In [15]:
tree_clf.fit(X_train, y_train)

DecisionTreeClassifier(max_leaf_nodes=6)

In [16]:
print("accuracy_score of test data", tree_clf.score(X_train, y_train))

accuracy_score of test data 0.6151026392961877


In [17]:
y_names = tree_clf.classes_
export_graphviz(tree_clf, out_file="CART_sample.gv", feature_names=X_names, class_names=y_names)

~~~
dot CART_sample.dot -Tpng -o image.png
~~~

![CART_sample](https://user-images.githubusercontent.com/42334717/96284748-fe801b00-1018-11eb-9bae-a04a05658169.png)