# ID3 and CART

In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score

In [2]:
data = pd.read_csv('iris.csv')
data.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety
0,5.1,3.5,1.4,0.2,Setosa
1,4.9,3.0,1.4,0.2,Setosa
2,4.7,3.2,1.3,0.2,Setosa
3,4.6,3.1,1.5,0.2,Setosa
4,5.0,3.6,1.4,0.2,Setosa


In [3]:
data['class'] = data['variety'].map({
    'Setosa': 0,
    'Versicolor': 1,
    'Virginica': 2
})
data.head()

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width,variety,class
0,5.1,3.5,1.4,0.2,Setosa,0
1,4.9,3.0,1.4,0.2,Setosa,0
2,4.7,3.2,1.3,0.2,Setosa,0
3,4.6,3.1,1.5,0.2,Setosa,0
4,5.0,3.6,1.4,0.2,Setosa,0


In [4]:
x = data[['sepal.length', 'sepal.width', 'petal.width', 'petal.length']]
y = data[['class']]

In [5]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=1)

## ID3

In [6]:
id3 = DecisionTreeClassifier(criterion='entropy')
id3_model = id3.fit(x_train, y_train)
y_id3 = id3_model.predict(x_test)

In [7]:
print("Accuracy: ", accuracy_score(y_test, y_id3))
print("Recall: ", recall_score(y_test, y_id3, average='micro'))
print("Precision: ", precision_score(y_test, y_id3, average='micro'))
print("f1: ", f1_score(y_test, y_id3, average='micro'))

Accuracy:  0.9555555555555556
Recall:  0.9555555555555556
Precision:  0.9555555555555556
f1:  0.9555555555555556


## CART

In [8]:
cart = DecisionTreeClassifier(criterion='gini')
cart_model = cart.fit(x_train, y_train)
y_cart = cart_model.predict(x_test)

In [9]:
print("Accuracy: ", accuracy_score(y_test, y_cart))
print("Recall: ", recall_score(y_test, y_cart, average='micro'))
print("Precision: ", precision_score(y_test, y_cart, average='micro'))
print("f1: ", f1_score(y_test, y_cart, average='micro'))

Accuracy:  0.9555555555555556
Recall:  0.9555555555555556
Precision:  0.9555555555555556
f1:  0.9555555555555556
