In [1]:
import numpy as np
import pandas as pd
import os
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt

In [2]:
src_folder = "data"
data_path = os.path.join(src_folder, "loan-toy.csv")

In [3]:
# Read raw data
df = pd.read_csv(data_path)

In [4]:
df

Unnamed: 0,Hemat,Kerja,Penghasilan,Pertemanan,Lunas
0,?,Tidak,Rendah,Jauh,Tidak
1,Tidak,Tidak,Tinggi,Jauh,Tidak
2,?,Ya,Tinggi,Jauh,Ya
3,Ya,Ya,Sedang,Sahabat,Ya
4,?,Ya,Sedang,Kolega,Ya
5,Tidak,Ya,Rendah,Sahabat,Tidak
6,Tidak,Ya,Sedang,Sahabat,Tidak
7,?,Tidak,Tinggi,Kolega,Tidak


In [5]:
# Transfer string into numerical values
d = {'?':2, 'Ya':1, 'Tidak':0}
df['Hemat'] = df['Hemat'].map(d)

d = {'Ya':1, 'Tidak':0}
df['Kerja'] = df['Kerja'].map(d)

d = {'Rendah':0, 'Sedang':1, 'Tinggi': 2}
df['Penghasilan'] = df['Penghasilan'].map(d)

d = {'Jauh': 0, 'Kolega':1, 'Sahabat':2}
df['Pertemanan'] = df['Pertemanan'].map(d)

d = {'Tidak':0, 'Ya':1}
df['Lunas'] = df['Lunas'].map(d)

df

Unnamed: 0,Hemat,Kerja,Penghasilan,Pertemanan,Lunas
0,2,0,0,0,0
1,0,0,2,0,0
2,2,1,2,0,1
3,1,1,1,2,1
4,2,1,1,1,1
5,0,1,0,2,0
6,0,1,1,2,0
7,2,0,2,1,0


In [6]:
columns = list(df.columns)
features = columns[:-1]
label = columns[-1:]

X = df[features].values
y = df[label].values.flatten()

In [7]:
# Train Decision Tree
dtree = DecisionTreeClassifier(criterion="entropy")
dtree = dtree.fit(X,y)

In [9]:
import graphviz

dot_data = tree.export_graphviz(dtree, out_file=None,
    feature_names=features)
graph = graphviz.Source(dot_data)


graph.render("loan")

'loan.pdf'

In [10]:
y_pred = dtree.predict(X[0:])

In [11]:
num_correct_pred = np.sum(y == y_pred)
train_acc = num_correct_pred*100/len(y)
print("Traininc acc: %.2f, %i out of %i are correctly predicted " % (train_acc, num_correct_pred, len(y)))

Traininc acc: 100.00, 8 out of 8 are correctly predicted 
