In [None]:
%matplotlib inline


# Graph export from Estimator


An example graph export of :class:`id3.id3.Id3Estimator` with
:file:`id3.export.export_graphviz`


$ dot -T png out.dot -o out.png

.. figure::  /_static/out.png
   :align:   center




In [2]:
from id3 import Id3Estimator, export_graphviz, export_text
import numpy as np
import pandas as pd
from sklearn import preprocessing

In [3]:
feature_names = ["age",
                 "gender",
                 "sector",
                 "degree"]

X = np.array([[45, "male", "private", "m"],
              [50, "female", "private", "m"],
              [61, "other", "public", "b"],
              [40, "male", "private", "none"],
              [34, "female", "private", "none"],
              [33, "male", "public", "none"],
              [43, "other", "private", "m"],
              [35, "male", "private", "m"],
              [34, "female", "private", "m"],
              [35, "male", "public", "m"],
              [34, "other", "public", "m"],
              [34, "other", "public", "b"],
              [34, "female", "public", "b"],
              [34, "male", "public", "b"],
              [34, "female", "private", "b"],
              [34, "male", "private", "b"],
              [34, "other", "private", "b"]])

y = np.array(["(30k,38k)",
              "(30k,38k)",
              "(30k,38k)",
              "(13k,15k)",
              "(13k,15k)",
              "(13k,15k)",
              "(23k,30k)",
              "(23k,30k)",
              "(23k,30k)",
              "(15k,23k)",
              "(15k,23k)",
              "(15k,23k)",
              "(15k,23k)",
              "(15k,23k)",
              "(23k,30k)",
              "(23k,30k)",
              "(23k,30k)"])

clf = Id3Estimator()
clf.fit(X, y, check_input=True)

export_graphviz(clf.tree_, "out.dot", feature_names)



<_io.TextIOWrapper name='out.dot' mode='w' encoding='utf8'>

In [6]:
print(X.dtype)
print(y.dtype)

<U21
<U9


In [2]:
# define features and target values
data = {
    'carrera': ['Comp', 'Neg', 'Mat', 'Meca'],
    'semestre': [5,6,7,8,9,10],
    'genero': ['H', 'M'],
    'color_de_pelo': ['Negro', 'Café', 'Rubio'],
    'estilo_de_pelo': ['Corto', 'Largo', 'Chino', 'Fleco'],
    'lentes': ['Sí', 'No'],
    'numero_de_nombres': [1, 2],
    'proyecto_1': ['Puzzle', 'GPS'],
    'nombre': []
}

# create an empty dataframe
data_df = pd.DataFrame(columns=data.keys())

In [3]:
le = preprocessing.LabelEncoder()
le.fit(['Comp', 'Neg', 'Mat', 'Meca','H', 'M','Negro', 'Café', 'Rubio','Corto', 'Largo', 'Chino', 'Fleco','Sí', 'No','Puzzle', 'GPS', '5','6','7','8','9','10', '1', '2'])

LabelEncoder()

In [9]:

np.random.seed(42)
# randomnly create 200 instances
for i in range(200):
    data_df.loc[i, 'carrera'] = le.transform([np.random.choice(data['carrera'], 1)[0]])[0],
    data_df.loc[i, 'semestre'] = le.transform([np.random.choice(data['semestre'], 1)[0]])[0],
    data_df.loc[i, 'genero'] = le.transform([np.random.choice(data['genero'], 1)[0]])[0],
    data_df.loc[i, 'color_de_pelo'] = le.transform([np.random.choice(data['color_de_pelo'], 1)[0]])[0],
    data_df.loc[i, 'estilo_de_pelo'] = le.transform([np.random.choice(data['estilo_de_pelo'], 1)[0]])[0],
    data_df.loc[i, 'lentes'] = le.transform([np.random.choice(data['lentes'], 1)[0]])[0],
    data_df.loc[i, 'numero_de_nombres'] = le.transform([np.random.choice(data['numero_de_nombres'], 1)[0]])[0],
    data_df.loc[i, 'proyecto_1'] = le.transform([np.random.choice(data['proyecto_1'], 1)[0]])[0],
    data_df.loc[i, 'nombre'] = 'Estudiante' + str(i)
data_df.head()

Unnamed: 0,carrera,semestre,genero,color_de_pelo,estilo_de_pelo,lentes,numero_de_nombres,proyecto_1,nombre
0,"(17,)","(6,)","(14,)","(23,)","(9,)","(21,)","(0,)","(22,)",Estudiante0
1,"(17,)","(4,)","(14,)","(23,)","(9,)","(24,)","(2,)","(22,)",Estudiante1
2,"(18,)","(5,)","(16,)","(20,)","(15,)","(21,)","(2,)","(13,)",Estudiante2
3,"(19,)","(4,)","(16,)","(20,)","(11,)","(21,)","(2,)","(13,)",Estudiante3
4,"(10,)","(6,)","(14,)","(20,)","(9,)","(24,)","(0,)","(13,)",Estudiante4


In [10]:
data_df['carrera'] = data_df['carrera'].astype(str).str.replace(r"\(|\)|,|'", '')
data_df['semestre'] = data_df['semestre'].astype(str).str.replace(r"\(|\)|,|'", '')
data_df['genero'] = data_df['genero'].astype(str).str.replace(r"\(|\)|,|'", '')
data_df['color_de_pelo'] = data_df['color_de_pelo'].astype(str).str.replace(r"\(|\)|,|'", '')
data_df['estilo_de_pelo'] = data_df['estilo_de_pelo'].astype(str).str.replace(r"\(|\)|,|'", '')
data_df['lentes'] = data_df['lentes'].astype(str).str.replace(r"\(|\)|,|'", '')
data_df['numero_de_nombres'] = data_df['numero_de_nombres'].astype(str).str.replace(r"\(|\)|,|'", '')
data_df['proyecto_1'] = data_df['proyecto_1'].astype(str).str.replace(r"\(|\)|,|'", '')

In [11]:
data_df.head()

Unnamed: 0,carrera,semestre,genero,color_de_pelo,estilo_de_pelo,lentes,numero_de_nombres,proyecto_1,nombre
0,17,6,14,23,9,21,0,22,Estudiante0
1,17,4,14,23,9,24,2,22,Estudiante1
2,18,5,16,20,15,21,2,13,Estudiante2
3,19,4,16,20,11,21,2,13,Estudiante3
4,10,6,14,20,9,24,0,13,Estudiante4


In [12]:
estimador = Id3Estimator()
estimador.fit(data_df.drop('nombre', axis=1), data_df['nombre'], check_input=True)

print(export_text(estimador.tree_, list(data.keys())))
export_graphviz(estimador.tree_, "arbol.dot", list(data.keys()))


carrera <=17.50
|   semestre <=4.50
|   |   proyecto_1 <=17.50
|   |   |   color_de_pelo <=21.50
|   |   |   |   estilo_de_pelo <=11.50
|   |   |   |   |   genero <=15.00
|   |   |   |   |   |   lentes <=22.50: Estudiante66 (1) 
|   |   |   |   |   |   lentes >22.50
|   |   |   |   |   |   |   numero_de_nombres <=1.00: Estudiante28 (1) 
|   |   |   |   |   |   |   numero_de_nombres >1.00: Estudiante105 (1) 
|   |   |   |   |   genero >15.00
|   |   |   |   |   |   lentes <=22.50: Estudiante156 (1) 
|   |   |   |   |   |   lentes >22.50: Estudiante32 (1) 
|   |   |   |   estilo_de_pelo >11.50
|   |   |   |   |   lentes <=22.50: Estudiante138 (1/2) 
|   |   |   |   |   lentes >22.50
|   |   |   |   |   |   numero_de_nombres <=1.00: Estudiante168 (1/1) 
|   |   |   |   |   |   numero_de_nombres >1.00
|   |   |   |   |   |   |   genero <=15.00: Estudiante7 (1) 
|   |   |   |   |   |   |   genero >15.00: Estudiante142 (1) 
|   |   |   color_de_pelo >21.50
|   |   |   |   numero_de_nombres 

<_io.TextIOWrapper name='arbol.dot' mode='w' encoding='utf8'>