In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from sklearn import tree

from sklearn.tree import DecisionTreeClassifier

In [None]:
data = load_iris()
df = pd.DataFrame(data.data, columns=data.feature_names)
df['target'] = data.target
df.head()

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(df[data.feature_names], df['target'], random_state=0)

In [None]:
clf = DecisionTreeClassifier(max_depth = 2, random_state = 0)
clf.fit(X_train, Y_train)
# Returns a NumPy Array
# Predict for One Observation (image)
clf.predict(X_test.iloc[0].values.reshape(1, -1))

In [None]:
score = clf.score(X_test, Y_test)
print(score)

In [None]:
# List of values to try for max_depth:
max_depth_range = list(range(1, 6))

# List to store the average RMSE for each value of max_depth:
accuracy = []

for depth in max_depth_range:
    
    clf = DecisionTreeClassifier(max_depth = depth, random_state = 0)
    clf.fit(X_train, Y_train)
    score = clf.score(X_test, Y_test)
    accuracy.append(score)

In [None]:
fig, ax = plt.subplots(nrows = 1, ncols = 1, figsize = (10,7));

marker_style = dict(color='tab:blue', linestyle=':', marker='o',markersize=15, markerfacecoloralt='tab:red')
ax.plot(max_depth_range,accuracy,lw=2,color='k',zorder = 0)
s = ax.scatter(max_depth_range[2],accuracy[2],color = 'r',s = 200,alpha = 1,zorder = 10,marker = 'o',)
s.set_edgecolor( 'black' )
ax.set_xlim([1, 5])
ax.set_ylim([.50, 1.00])
ax.grid(True,axis = 'both',zorder = 1,linestyle = ':',color = 'k')
yticks = ax.get_yticks()

y_ticklist = []
for tick in yticks:
    y_ticklist.append(str(tick).ljust(4, '0')[0:4])
    
ax.set_yticklabels(y_ticklist)
ax.tick_params(labelsize = 18)
ax.set_xticks([1,2,3,4,5])
ax.set_xlabel('max_depth', fontsize = 24)
ax.set_ylabel('Accuracy', fontsize = 24)
fig.tight_layout()

In [None]:
#Scikit-learn allows you to calculate feature importance which is the total amount that Gini index or Entropy 
# decrease due to splits over a given feature
clf = DecisionTreeClassifier(max_depth = 3, random_state = 0)
clf.fit(X_train, Y_train)
score = clf.score(X_test, Y_test)

In [None]:
importances = pd.DataFrame({'feature':X_train.columns,'importance':np.round(clf.feature_importances_,3)})
importances = importances.sort_values('importance',ascending=False)
print(importances)