**Atualizando a biblioteca para plotagem de gráficos**


In [None]:
!pip -q install plotly --upgrade
!pip -q install yellowbrick

**Importando bibliotecas**

In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
from sklearn.tree import DecisionTreeClassifier

**Abrindo um arquivo CSV do drive**

In [None]:
base = pd.read_csv('restaurante2.csv', usecols=['Alternativo','Bar','Sex/Sab','Fome','Cliente','Preco','Chuva','Res','Tipo','Tempo','Conclusao'])

In [None]:
base

In [None]:
base.head(3)

In [None]:
base.tail(2)

**Contando quantidade de instâncias**


In [None]:
np.unique(base['Conclusao'], return_counts=True)

In [None]:
sns.countplot(x = base['Conclusao'])

**Separando os atributos de entrada e de classe**

In [None]:
X_prev = base.iloc[:, 0:10].values

In [None]:
X_prev

In [None]:
X_prev[:,5]

In [None]:
y_classe = base.iloc[:, 10].values

In [None]:
y_classe

**Tratamento de dados categóricos**

> *LabelEncoder - Vamos tratar os dados categóricos colocando 1, 2, 3 e etc**



In [None]:
from sklearn.preprocessing import LabelEncoder

In [None]:
X_prev[:,5]

In [None]:
X_prev

In [None]:
lb = LabelEncoder()

In [None]:
X_prev[:,0] = lb.fit_transform(X_prev[:,0])
X_prev[:,1] = lb.fit_transform(X_prev[:,1])
X_prev[:,2] = lb.fit_transform(X_prev[:,2])
X_prev[:,3] = lb.fit_transform(X_prev[:,3])
X_prev[:,4] = lb.fit_transform(X_prev[:,4])
X_prev[:,5] = lb.fit_transform(X_prev[:,5])
X_prev[:,6] = lb.fit_transform(X_prev[:,6])
X_prev[:,7] = lb.fit_transform(X_prev[:,7])
X_prev[:,9] = lb.fit_transform(X_prev[:,9])

In [None]:
X_prev



> OneHotEncoder - Agora vamos binarizar atributos não ordinais

**Contando quantas opções de resposta tem cada atributo**

In [None]:
len(np.unique(base['Cliente']))

In [None]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

In [None]:
X_prev

In [None]:
X_prev[:,0:9]

In [None]:
onehotencoder_restaurante = ColumnTransformer(transformers=[('OneHot', OneHotEncoder(), [8])], remainder='passthrough')

In [None]:
X_prev= onehotencoder_restaurante.fit_transform(X_prev)

In [None]:
X_prev

In [None]:
X_prev.shape

**Método de amostragem Holdout**

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_prev

In [None]:
y_classe

In [None]:
y_classe.shape

In [None]:
#X_train_ds, X_test_ds, y_train_ds, y_test_ds = train_test_split(X, y, test_size=0.3, random_state=123, shuffle=True, stratify=y)
X_treino, X_teste, y_treino, y_teste = train_test_split(X_prev, y_classe, test_size = 0.20, random_state = 23)

In [None]:
X_treino.shape

In [None]:
X_teste.shape

**Vamos experimentar agora o algoritmo Decision Tree?**

In [None]:
modelo = DecisionTreeClassifier(criterion='entropy')
Y = modelo.fit(X_treino, y_treino)

**Vamos testar o modelo?**

In [None]:
previsoes = modelo.predict(X_teste)

**Será se o modelo acertou?**

In [None]:
y_teste

In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
accuracy_score(y_teste,previsoes)

In [None]:
from yellowbrick.classifier import ConfusionMatrix
confusion_matrix(y_teste, previsoes)

In [None]:
cm = ConfusionMatrix(modelo)
cm.fit(X_treino, y_treino)
cm.score(X_teste, y_teste)

In [None]:
print(classification_report(y_teste, previsoes))

**Vamos ver a árvore?**

In [None]:
from sklearn import tree
plt.show(tree.plot_tree(Y))

**O que você achou? podemos fazê-la melhor! :-)**

In [None]:
from sklearn import tree
previsores = ['Frances', 'Hamburguer', 'Italiano', 'Tailandes', 'Alternativo', 'Bar', 'SextaSabado', 'Fome', 'Cliente', 'Preco','Chuva','Res', 'Tempo']
figura, eixos = plt.subplots(nrows=1, ncols=1, figsize=(13,13))
tree.plot_tree(modelo, feature_names=previsores, filled=True);