In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn import datasets  
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.tree import  DecisionTreeClassifier
from sklearn import tree
from sklearn.metrics import classification_report

In [2]:
# import some data to play with
iris = datasets.load_iris()

In [3]:
#Complete Iris dataset
data=pd.DataFrame(iris.data,columns=[iris.feature_names])
fdata=pd.concat((data,pd.DataFrame(iris.target)),axis=1)
fdata=fdata.rename(columns= ({0:"Species"}))

In [4]:
x=iris.data
y=iris.target

In [5]:
fdata['Species'].unique()

array([0, 1, 2])

In [6]:
fdata.Species.value_counts()

2    50
1    50
0    50
Name: Species, dtype: int64

In [7]:
colnames = list(data.columns)
colnames

[('sepal length (cm)',),
 ('sepal width (cm)',),
 ('petal length (cm)',),
 ('petal width (cm)',)]

In [8]:
# Splitting data into training and testing data set
x_train, x_test,y_train,y_test = train_test_split(x,y, test_size=0.2,random_state=40)

### Building Decision Tree Classifier using Entropy Criteria

In [9]:
model = DecisionTreeClassifier(criterion = 'entropy',max_depth=3)
model.fit(x_train,y_train)

DecisionTreeClassifier(criterion='entropy', max_depth=3)

In [10]:
model.get_n_leaves()

5

In [11]:
preds = model.predict(x_test) # predicting on test data set 
pd.Series(preds).value_counts() # getting the count of each category 

1    13
2     9
0     8
dtype: int64

In [12]:
preds

array([0, 1, 2, 2, 1, 2, 1, 1, 1, 0, 1, 0, 0, 1, 1, 2, 2, 2, 1, 1, 2, 2,
       1, 0, 1, 0, 0, 2, 0, 1])

In [13]:
pd.crosstab(y_test,preds) # getting the 2 way table to understand the correct and wrong predictions

col_0,0,1,2
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
0,8,0,0
1,0,12,0
2,0,1,9


In [14]:
# Accuracy 
np.mean(preds==y_test)

0.9666666666666667

In [15]:
print(classification_report(preds,y_test))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         8
           1       1.00      0.92      0.96        13
           2       0.90      1.00      0.95         9

    accuracy                           0.97        30
   macro avg       0.97      0.97      0.97        30
weighted avg       0.97      0.97      0.97        30



### Building Decision Tree Classifier (CART) using Gini Criteria

In [16]:
from sklearn.tree import DecisionTreeClassifier
model_gini = DecisionTreeClassifier(criterion='gini', max_depth=3)

In [17]:
model_gini.fit(x_train, y_train)

DecisionTreeClassifier(max_depth=3)

In [18]:
#Prediction and computing the accuracy
pred=model.predict(x_test)
np.mean(preds==y_test)

0.9666666666666667

#### Decision Tree Regression Example

In [19]:
# Decision Tree Regression
from sklearn.tree import DecisionTreeRegressor

In [20]:
array = fdata.values
X = array[:,0:3]
y = array[:,3]

In [21]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=1)

In [22]:
model = DecisionTreeRegressor()
model.fit(X_train, y_train)

DecisionTreeRegressor()

In [23]:
#Find the accuracy
model.score(X_test,y_test)

0.8799375675351183