<div class="alert alert-block alert-info">
    <h1 align="center">SKLearn Decision Tree</h1>
    <h3 align="center"> Machine Learning Course</h3>
    <h5 align="center">Alireza Aghamohammadi (http://linkedin.com/in/aaghamohammadi)</h5>
</div>

#### Load Dependencies

In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn import metrics

#### Set Configuration

In [None]:
plt.style.use('seaborn')

#### Set Hyperparameters

In [None]:
PATH_DATA = "../data/"

#### Load Data

In [None]:
bcancer = pd.read_csv(PATH_DATA + "breast_cancer.csv")
bcancer.head()

In [None]:
bcancer.drop('Unnamed: 32',axis=1,  inplace=True)

In [None]:
bcancer.set_index(bcancer.id.name, inplace=True)

In [None]:
x = bcancer.drop(bcancer.diagnosis.name,axis=1)
y = bcancer.diagnosis
y = y.map({"M":0, "B":1})
print(x.shape)
print(y.shape)

#### Split Data into Test, Train, and Validation Sets

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.1, random_state=101, shuffle=True)
x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.1, random_state=101, shuffle=True)

In [None]:
print(y_valid.shape)
print(y_valid.shape)

#### Train the Model

In [None]:
dt = DecisionTreeClassifier(max_depth=4, criterion='gini')

In [None]:
dt.fit(x_train, y_train)

In [None]:
y_valid_pred = dt.predict(x_valid)
metrics.f1_score(y_valid,y_valid_pred)

#### Tune Hyperparameters

In [None]:
train_f1 = []
valid_f1 = []
for max_depth in range(1, 11):
    dt = DecisionTreeClassifier(max_depth=max_depth, criterion='gini')
    dt.fit(x_train, y_train)
    y_valid_pred = dt.predict(x_valid)
    valid_f1.append(metrics.f1_score(y_valid, y_valid_pred))
    y_train_pred = dt.predict(x_train)
    train_f1.append(metrics.f1_score(y_train, y_train_pred))

In [None]:
fig, ax = plt.subplots(dpi=300)
ax.plot(np.arange(1,11), train_f1, label=r"Train $F_1$")
ax.plot(np.arange(1,11), valid_f1, label=r"Valid $F_1$")
ax.legend()
ax.set_xlabel("Maximum Depth")
ax.set_ylabel(r"$F_1$")
fig.show()

In [None]:
dt = DecisionTreeClassifier(max_depth=6)
dt.fit(x_train, y_train)
metrics.f1_score(y_test, dt.predict(x_test))

#### *What we learned*

* We learned the decision tree algorithm
* We learned how to split our data into train, test, and validation sets
* We learned the *Precision*, *Recall*, and $F_1$ metrics