In [1]:
# Import all dependencies

from id3 import Id3Estimator
from id3 import export_text

from sklearn.preprocessing import LabelEncoder
from sklearn.datasets import load_iris
from sklearn import tree

import pandas as pd
import numpy as np



<h2> Pembelajaran dataset Iris </h2>

In [2]:
# Load iris
iris = load_iris()

**Decision Tree Model for iris:**

In [3]:
decision_tree = tree.DecisionTreeClassifier()
decision_tree = decision_tree.fit(iris.data, iris.target)
DecisionTreeModel = tree.export_text(decision_tree, feature_names=iris['feature_names'])
print(DecisionTreeModel)

|--- petal length (cm) <= 2.45
|   |--- class: 0
|--- petal length (cm) >  2.45
|   |--- petal width (cm) <= 1.75
|   |   |--- petal length (cm) <= 4.95
|   |   |   |--- petal width (cm) <= 1.65
|   |   |   |   |--- class: 1
|   |   |   |--- petal width (cm) >  1.65
|   |   |   |   |--- class: 2
|   |   |--- petal length (cm) >  4.95
|   |   |   |--- petal width (cm) <= 1.55
|   |   |   |   |--- class: 2
|   |   |   |--- petal width (cm) >  1.55
|   |   |   |   |--- petal length (cm) <= 5.45
|   |   |   |   |   |--- class: 1
|   |   |   |   |--- petal length (cm) >  5.45
|   |   |   |   |   |--- class: 2
|   |--- petal width (cm) >  1.75
|   |   |--- petal length (cm) <= 4.85
|   |   |   |--- sepal length (cm) <= 5.95
|   |   |   |   |--- class: 1
|   |   |   |--- sepal length (cm) >  5.95
|   |   |   |   |--- class: 2
|   |   |--- petal length (cm) >  4.85
|   |   |   |--- class: 2



**ID3 Model for iris:**

In [4]:
estimator = Id3Estimator()
estimator = estimator.fit(iris.data, iris.target)

ID3Model = export_text(estimator.tree_, feature_names=iris['feature_names'])
print(ID3Model)


petal length (cm) <=2.45: 0 (50) 
petal length (cm) >2.45
|   petal width (cm) <=1.75
|   |   sepal length (cm) <=7.10
|   |   |   sepal width (cm) <=2.85: 1 (27/4) 
|   |   |   sepal width (cm) >2.85: 1 (22) 
|   |   sepal length (cm) >7.10: 2 (1) 
|   petal width (cm) >1.75
|   |   sepal length (cm) <=5.95
|   |   |   sepal width (cm) <=3.10: 2 (6) 
|   |   |   sepal width (cm) >3.10: 1 (1) 
|   |   sepal length (cm) >5.95: 2 (39) 



<h2> Pembelajaran dataset play-tennis </h2>

In [5]:
# Read play-tennis dataset
df = pd.read_csv('datasets/play_tennis.csv')

# Then drop df['day']
df = df.drop('day', axis=1)

In [6]:
# Define target variable, which is the variable of classes
target = df['play']

print("Target values: ")
print(target)

# Drop play attribute
df = df.drop('play', axis=1)

Target values: 
0      No
1      No
2     Yes
3     Yes
4     Yes
5      No
6     Yes
7      No
8     Yes
9     Yes
10    Yes
11    Yes
12    Yes
13     No
Name: play, dtype: object


In [7]:
# Variable to store encoded values
df_encoded = df

# Use label encoder to encode data
LE = LabelEncoder()

# Feature names for tree generation purposes
feature_names_var = ["outlook", "temp", "humidity", "wind"]

In [8]:
# Store as a map: key -> encoded value, value -> real value
dictOfValues = {}

for key in feature_names_var:
    # Encode the data
    df_encoded[key] = LE.fit_transform(df[key])
    
    # Map encoded values with real values:
    dictOfValues[key] = {}
    
    for index in range(len(df_encoded[key])):
        encoded_value = df_encoded[key][index]
        real_value = LE.inverse_transform(df[key])[index]
        dictOfValues[key][encoded_value] = real_value

for key in feature_names_var:
    # Sort
    dictOfValues[key] = sorted(dictOfValues[key].items(), key=lambda x: x[0])

In [9]:
# Transpose the encoded data
transposed_df_encoded = df_encoded.transpose()

# Define data variable
data = []
for index in range (0, 14):
    data.append(transposed_df_encoded[index])

<h3> Hasil pembelajaran: </h3>

**Decision Tree Model for play-tennis:**

In [10]:
decision_tree = tree.DecisionTreeClassifier()
decision_tree = decision_tree.fit(data, target)
DecisionTreeModel = tree.export_text(decision_tree, feature_names=feature_names_var)

print("Tree:")
print(DecisionTreeModel)

Tree:
|--- outlook <= 0.50
|   |--- class: Yes
|--- outlook >  0.50
|   |--- humidity <= 0.50
|   |   |--- outlook <= 1.50
|   |   |   |--- wind <= 0.50
|   |   |   |   |--- class: No
|   |   |   |--- wind >  0.50
|   |   |   |   |--- class: Yes
|   |   |--- outlook >  1.50
|   |   |   |--- class: No
|   |--- humidity >  0.50
|   |   |--- wind <= 0.50
|   |   |   |--- temp <= 1.00
|   |   |   |   |--- class: No
|   |   |   |--- temp >  1.00
|   |   |   |   |--- class: Yes
|   |   |--- wind >  0.50
|   |   |   |--- class: Yes



**Outlook:**
- outlook <= 0.50, artinya outlook = Overcast
- outlook <= 1.50, artinya outlook = Rain
- selain itu, artinya outlook = Sunny

<br> **Temp:**
- temp <= 0.50, artinya temp = Cool
- temp <= 1.50 atau <= 1.00, artinya temp = Hot
- selain itu, artinya temp = Mild

<br> **Wind**
- wind <= 0.50, artinya wind = Weak
- selain itu, artinya wind = Strong

<br> **Humidity**
- humidity <= 0.50, artinya humidity = High
- selain itu, artinya humidity = High

**ID3 Model for play-tennis:**

In [11]:
estimator = Id3Estimator()
fitEstimator = estimator.fit(data, target)

ID3Model = export_text(fitEstimator.tree_, feature_names=feature_names_var)

print("Tree:")
print(ID3Model)

Tree:

outlook <=0.50: Yes (4) 
outlook >0.50
|   humidity <=0.50
|   |   temp <=1.50: No (2) 
|   |   temp >1.50
|   |   |   wind <=0.50: No (1) 
|   |   |   wind >0.50: No (1/1) 
|   humidity >0.50
|   |   wind <=0.50
|   |   |   temp <=1.00: No (1) 
|   |   |   temp >1.00: Yes (1) 
|   |   wind >0.50: Yes (3) 



**Outlook:**
- outlook <= 0.50, artinya outlook = Overcast
- outlook <= 1.50, artinya outlook = Rain
- selain itu, artinya outlook = Sunny

<br> **Temp:**
- temp <= 0.50, artinya temp = Cool
- temp <= 1.50 atau <= 1.00, artinya temp = Hot
- selain itu, artinya temp = Mild

<br> **Wind**
- wind <= 0.50, artinya wind = Weak
- selain itu, artinya wind = Strong

<br> **Humidity**
- humidity <= 0.50, artinya humidity = High
- selain itu, artinya humidity = High

**Berikut adalah arti setiap kode angka pada setiap atribut:**

In [12]:
print(sorted(dictOfValues.items(), key=lambda x: x[0]))

[('humidity', [(0, 'High'), (1, 'Normal')]), ('outlook', [(0, 'Overcast'), (1, 'Rain'), (2, 'Sunny')]), ('temp', [(0, 'Cool'), (1, 'Hot'), (2, 'Mild')]), ('wind', [(0, 'Strong'), (1, 'Weak')])]
