**Importando as bibliotecas básicas**

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.offline as py
import plotly.graph_objs as go
from plotly import tools
py.init_notebook_mode(connected=True)
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.metrics import precision_score
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, classification_report, confusion_matrix
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

**Devido o Plotly estar sendo utilizado no Google Collab, precisaremos definir a função abaixo e chamá-la sempre que quisermos exibir um gráfico**

In [None]:
def configure_plotly_browser_state():
  import IPython
  display(IPython.core.display.HTML('''
        <script src="/static/components/requirejs/require.js"></script>
        <script>
          requirejs.config({
            paths: {
              base: '/static/base',
              plotly: 'https://cdn.plot.ly/plotly-1.5.1.min.js?noext',
            },
          });
        </script>
        '''))

**Lendo o conjunto de dados**

In [None]:
cars = pd.read_csv('.../car_evaluation.csv')
cars.shape

(1727, 7)

**Atribuindo os títulos das colunas**

In [None]:
cars.columns = ['Buying', 'Maint', 'Doors','Persons','LugBoot','Safety','Evaluation']

**Visão geral dos dados**

In [None]:
cars.sample(10)

Unnamed: 0,Buying,Maint,Doors,Persons,LugBoot,Safety,Evaluation
1354,low,vhigh,4,2,med,high,unacc
454,high,vhigh,2,more,med,high,unacc
710,high,med,4,4,small,low,unacc
879,med,vhigh,2,4,big,med,acc
1541,low,med,3,2,med,low,unacc
1416,low,high,2,4,med,med,acc
1699,low,low,4,more,big,high,vgood
195,vhigh,high,5more,2,big,med,unacc
1184,med,med,5more,more,big,low,unacc
613,high,high,4,more,small,high,acc


**Visão geral dos dados aceitáveis**

In [None]:
a_df=[]
for i in cars.values:
    if i[6] == 'acc':
        a_df.append(i)

df=pd.DataFrame(a_df)
df.sample(10)

Unnamed: 0,0,1,2,3,4,5,6
244,med,high,5more,4,med,high,acc
69,vhigh,low,5more,more,med,high,acc
23,vhigh,med,4,more,med,high,acc
373,low,med,5more,more,small,med,acc
303,low,vhigh,3,4,med,high,acc
97,high,high,4,more,big,high,acc
101,high,high,5more,4,big,med,acc
106,high,high,5more,more,big,med,acc
337,low,high,2,more,med,high,acc
103,high,high,5more,more,small,high,acc


**Verificando se há algum valor ausente no dataset**

In [None]:
cars.isnull().sum()

Buying        0
Maint         0
Doors         0
Persons       0
LugBoot       0
Safety        0
Evaluation    0
dtype: int64

**Como foi visto, não há velores ausentes. Agora, faremos uma análise analítica no dataset.**

In [None]:
cars.describe()

Unnamed: 0,Buying,Maint,Doors,Persons,LugBoot,Safety,Evaluation
count,1727,1727,1727,1727,1727,1727,1727
unique,4,4,4,3,3,3,4
top,low,low,3,4,big,high,unacc
freq,432,432,432,576,576,576,1209


**Como pode ser visto, nossos dados são categóricos**

In [None]:
cars.columns

Index(['Buying', 'Maint', 'Doors', 'Persons', 'LugBoot', 'Safety',
       'Evaluation'],
      dtype='object')

**Agora, vamos listar o número de carros em cada classe de avaliação**

In [None]:
cars['Evaluation'].value_counts().sort_index()

acc       384
good       69
unacc    1209
vgood      65
Name: Evaluation, dtype: int64

**Gráfico "Distribuição de carros avaliados"**

In [None]:
configure_plotly_browser_state()
fig = {
  "data": [
    {
      "values": [1210,384,69,65],
      "labels": [
        "Unacceptable",
        "Acceptable",
        "Good",
        "Very Good"
      ],
      "domain": {"column": 0},
      "name": "Car Evaluation",
      "hoverinfo":"label+percent+name",
      "hole": .6,
      "type": "pie"
    }],
  "layout": {
        #"title":"Distribuição de carros avaliados",
        "grid": {"rows": 1, "columns": 1},
        "annotations": [
            {
                "font": {
                    "size": 36
                },
                "showarrow": False,
                "text": "",
                "x": 0.5,
                "y": 0.5
            }
        ]
    }
}
print("\n\n\t\t\t\tDistribuição de carros avaliados")
py.iplot(fig, filename='cars_donut')



				Distribuição de carros avaliados


**Substituindo dados não numéricos de colunas numéricas**

In [None]:
cars.Doors.replace(('5more'),('5'),inplace=True)
cars.Persons.replace(('more'),('5'),inplace=True)

**Preparando dados para gráficos de correlação das features com a classe**

In [None]:
features = cars.iloc[:,:-1]
features[:5]
a=[]
for i in features:
    a.append(features[i].value_counts())

In [None]:
buy = pd.crosstab(cars['Buying'], cars['Evaluation'])
mc = pd.crosstab(cars['Maint'], cars['Evaluation'])
drs = pd.crosstab(cars['Doors'], cars['Evaluation'])
prsn = pd.crosstab(cars['Persons'], cars['Evaluation'])
lb = pd.crosstab(cars['LugBoot'], cars['Evaluation'])
sfty = pd.crosstab(cars['Safety'], cars['Evaluation'])
buy

Evaluation,acc,good,unacc,vgood
Buying,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
high,108,0,324,0
low,89,46,258,39
med,115,23,268,26
vhigh,72,0,359,0


**Gráfico "Preço de venda vs avaliação"**

In [None]:
configure_plotly_browser_state()
data = [
    go.Bar(
        x=a[0].index, # assign x as the dataframe column 'x'
        y=buy['unacc'],
        name='Unacceptable'
    ),
    go.Bar(
        x=a[0].index,
        y=buy['acc'],
        name='Acceptable'
    ),
    go.Bar(
        x=a[0].index,
        y=buy['good'],
        name='Good'
    ),
    go.Bar(
        x=a[0].index,
        y=buy['vgood'],
        name='Very Good'
    )

]

layout = go.Layout(
    barmode='stack',
    #title='Preço de venda vs avaliação'
)

fig = go.Figure(data=data, layout=layout)
print("\n\n\t\t\t\tPreço de venda vs avaliação")
py.iplot(fig, filename='distri')



				Preço de venda vs avaliação


**Gráfico "Custo de manutenção vs avaliação"**

In [None]:
configure_plotly_browser_state()
data = [
    go.Bar(
        x=a[1].index, # assign x as the dataframe column 'x'
        y=mc['unacc'],
        name='Unacceptable'
    ),
    go.Bar(
        x=a[1].index,
        y=mc['acc'],
        name='Acceptable'
    ),
    go.Bar(
        x=a[1].index,
        y=mc['good'],
        name='Good'
    ),
    go.Bar(
        x=a[1].index,
        y=mc['vgood'],
        name='Very Good'
    )

]

layout = go.Layout(
    barmode='stack',
    #title='Custo de manutenção vs avaliação'
)
fig = go.Figure(data=data, layout=layout)
print("\n\n\t\t\t\tCusto de manutenção vs avaliação")
py.iplot(fig, filename='cars_donut')



				Custo de manutenção vs avaliação


**Gráfico "Portas vs Avaliação"**

In [None]:
configure_plotly_browser_state()
data = [
    go.Bar(
        x=a[2].index, # assign x as the dataframe column 'x'
        y=drs['unacc'],
        name='Unacceptable'
    ),
    go.Bar(
        x=a[2].index,
        y=drs['acc'],
        name='Acceptable'
    ),
    go.Bar(
        x=a[2].index,
        y=drs['good'],
        name='Good'
    ),
    go.Bar(
        x=a[2].index,
        y=drs['vgood'],
        name='Very Good'
    )

]

layout = go.Layout(
    barmode='stack',
    #title='Portas vs Avaliação'
)

fig = go.Figure(data=data, layout=layout)
print("\n\n\t\t\t\tPortas vs Avaliação")
py.iplot(fig, filename='cars_donut')



				Portas vs Avaliação


**Gráfico "Número de passageiros vs avaliação"**

In [None]:
configure_plotly_browser_state()
data = [
    go.Bar(
        x=a[3].index, # assign x as the dataframe column 'x'
        y=prsn['unacc'],
        name='Unacceptable'
    ),
    go.Bar(
        x=a[3].index,
        y=prsn['acc'],
        name='Acceptable'
    ),
    go.Bar(
        x=a[3].index,
        y=prsn['good'],
        name='Good'
    ),
    go.Bar(
        x=a[3].index,
        y=prsn['vgood'],
        name='Very Good'
    )

]

layout = go.Layout(
    barmode='stack',
    #title='Número de passageiros vs avaliação'
)

fig = go.Figure(data=data, layout=layout)
print("\n\n\t\t\t\tNúmero de passageiros vs avaliação")
py.iplot(fig, filename='cars_donut')



				Número de passageiros vs avaliação


**Gráfico "Porta-malas vs avaliação"**

In [None]:
configure_plotly_browser_state()
data = [
    go.Bar(
        x=a[4].index, # assign x as the dataframe column 'x'
        y=lb['unacc'],
        name='Unacceptable'
    ),
    go.Bar(
        x=a[4].index,
        y=lb['acc'],
        name='Acceptable'
    ),
    go.Bar(
        x=a[4].index,
        y=lb['good'],
        name='Good'
    ),
    go.Bar(
        x=a[4].index,
        y=lb['vgood'],
        name='Very Good'
    )

]

layout = go.Layout(
    barmode='stack',
    #title='Porta-malas vs avaliação'
)

fig = go.Figure(data=data, layout=layout)
print("\n\n\t\t\t\tPorta-malas vs avaliação")
py.iplot(fig, filename='cars_donut')



				Porta-malas vs avaliação


**Gráfico "Segurança vs Avaliação"**

In [None]:
configure_plotly_browser_state()
data = [
    go.Bar(
        x=a[5].index, # assign x as the dataframe column 'x'
        y=sfty['unacc'],
        name='Unacceptable'
    ),
    go.Bar(
        x=a[5].index,
        y=sfty['acc'],
        name='Acceptable'
    ),
    go.Bar(
        x=a[5].index,
        y=sfty['good'],
        name='Good'
    ),
    go.Bar(
        x=a[5].index,
        y=sfty['vgood'],
        name='Very Good'
    )

]

layout = go.Layout(
    barmode='stack',
    #title='Segurança vs Avaliação'
)

fig = go.Figure(data=data, layout=layout)
print("\n\n\t\t\t\tSegurança vs Avaliação")
py.iplot(fig, filename='cars_donut')



				Segurança vs Avaliação


## **Preparando dados utilizando a função Dummies**

**Dividindo dados em X e y**

In [None]:
x = cars.iloc[:, :-1]
y = cars.iloc[:, 6]

**Nomeando colunas X e y**

In [None]:
x.columns = ['Buying', 'Maint', 'Doors','Persons','LugBoot','Safety']
y.columns=['Evaluation']
x.head()

Unnamed: 0,Buying,Maint,Doors,Persons,LugBoot,Safety
0,vhigh,vhigh,2,2,small,med
1,vhigh,vhigh,2,2,small,high
2,vhigh,vhigh,2,2,med,low
3,vhigh,vhigh,2,2,med,med
4,vhigh,vhigh,2,2,med,high


**Codificando dados categóricos**

In [None]:
x = pd.get_dummies(x, prefix_sep='_', drop_first=True)
x.sample(5)

Unnamed: 0,Buying_low,Buying_med,Buying_vhigh,Maint_low,Maint_med,Maint_vhigh,Doors_3,Doors_4,Doors_5,Persons_4,Persons_5,LugBoot_med,LugBoot_small,Safety_low,Safety_med
1373,1,0,0,0,0,1,0,1,0,0,1,0,0,1,0
1115,0,1,0,0,1,0,1,0,0,1,0,0,1,1,0
1083,0,1,0,0,1,0,0,0,0,0,0,1,0,0,1
27,0,0,1,0,0,1,1,0,0,0,0,0,1,0,1
954,0,1,0,0,0,1,0,0,1,1,0,0,1,0,1


**Descrição do y**

In [None]:
y.describe()

count      1727
unique        4
top       unacc
freq       1209
Name: Evaluation, dtype: object

**Dividindo dados em treino e teste**

In [None]:
x=x.values
y=y.values
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.25, random_state = 42, stratify=y)

**Todos os dados são categóricos, assim, não é viável normalizá-los nem padronizá-los**

In [None]:
"""
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

sc1 = StandardScaler()
sc2 = MinMaxScaler()

x_train_std = sc1.fit_transform(x_train)
x_test_std = sc1.transform(x_test)

x_train_minmax = sc2.fit_transform(x_train)
x_test_minmax = sc2.transform(x_test)

"""

'\nfrom sklearn.preprocessing import StandardScaler\nfrom sklearn.preprocessing import MinMaxScaler\n\nsc1 = StandardScaler()\nsc2 = MinMaxScaler()\n\nx_train_std = sc1.fit_transform(x_train)\nx_test_std = sc1.transform(x_test)\n\nx_train_minmax = sc2.fit_transform(x_train)\nx_test_minmax = sc2.transform(x_test)\n\n'

**Demonstração de como ficou os dados de treino com o X codificado**

In [None]:
x_train[:5]

array([[0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0],
       [1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0],
       [1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 1],
       [0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0]], dtype=uint8)

In [None]:
y_train[:5]

array(['acc', 'unacc', 'acc', 'unacc', 'acc'], dtype=object)

### **Aplicação dos modelos de classificação**

**Classificando com KNN**

In [None]:
clf = KNeighborsClassifier(n_neighbors = 5)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
f1_KNN = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train, y_train))
print("Testing Accuracy: ", clf.score(x_test, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

Training Accuracy:  0.9096525096525097
Testing Accuracy:  0.8425925925925926
[[ 65   2  29   0]
 [ 14   0   3   0]
 [  7   0 296   0]
 [  9   3   1   3]]
              precision    recall  f1-score   support

         acc       0.68      0.68      0.68        96
        good       0.00      0.00      0.00        17
       unacc       0.90      0.98      0.94       303
       vgood       1.00      0.19      0.32        16

    accuracy                           0.84       432
   macro avg       0.65      0.46      0.48       432
weighted avg       0.82      0.84      0.82       432



**Classificando com Logistic Regression**

In [None]:
clf = LogisticRegression(random_state = 0)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
f1_LR=f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train, y_train))
print("Testing Accuracy: ", clf.score(x_test, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred)) 

Training Accuracy:  0.9073359073359073
Testing Accuracy:  0.8912037037037037
[[ 76   4  16   0]
 [  9   7   0   1]
 [ 13   0 290   0]
 [  4   0   0  12]]
              precision    recall  f1-score   support

         acc       0.75      0.79      0.77        96
        good       0.64      0.41      0.50        17
       unacc       0.95      0.96      0.95       303
       vgood       0.92      0.75      0.83        16

    accuracy                           0.89       432
   macro avg       0.81      0.73      0.76       432
weighted avg       0.89      0.89      0.89       432



**Classificando com SVC linear**

In [None]:
clf = SVC(kernel = 'linear', random_state = 0)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
f1_SVC_Linear=f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train, y_train))
print("Testing Accuracy: ", clf.score(x_test, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

Training Accuracy:  0.9305019305019305
Testing Accuracy:  0.9259259259259259
[[ 85   3   7   1]
 [  3  13   0   1]
 [ 12   0 291   0]
 [  5   0   0  11]]
              precision    recall  f1-score   support

         acc       0.81      0.89      0.85        96
        good       0.81      0.76      0.79        17
       unacc       0.98      0.96      0.97       303
       vgood       0.85      0.69      0.76        16

    accuracy                           0.93       432
   macro avg       0.86      0.82      0.84       432
weighted avg       0.93      0.93      0.93       432



**Classificando com SVC rbf**

In [None]:
clf = SVC(kernel = 'rbf', random_state = 0)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
f1_SVC_rbf=f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train, y_train))
print("Testing Accuracy: ", clf.score(x_test, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

Training Accuracy:  0.9722007722007722
Testing Accuracy:  0.9652777777777778
[[ 93   0   3   0]
 [  4  12   0   1]
 [  5   0 298   0]
 [  2   0   0  14]]
              precision    recall  f1-score   support

         acc       0.89      0.97      0.93        96
        good       1.00      0.71      0.83        17
       unacc       0.99      0.98      0.99       303
       vgood       0.93      0.88      0.90        16

    accuracy                           0.97       432
   macro avg       0.95      0.88      0.91       432
weighted avg       0.97      0.97      0.96       432



**Classificando com Naive Bayes**

In [None]:
clf = GaussianNB()
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
f1_NB=f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train, y_train))
print("Testing Accuracy: ", clf.score(x_test, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

Training Accuracy:  0.4833976833976834
Testing Accuracy:  0.5046296296296297
[[ 41  40   0  15]
 [  0  16   0   1]
 [ 94  42 145  22]
 [  0   0   0  16]]
              precision    recall  f1-score   support

         acc       0.30      0.43      0.35        96
        good       0.16      0.94      0.28        17
       unacc       1.00      0.48      0.65       303
       vgood       0.30      1.00      0.46        16

    accuracy                           0.50       432
   macro avg       0.44      0.71      0.43       432
weighted avg       0.79      0.50      0.56       432



**Classificando com Decision Tree**

In [None]:
clf = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
f1_DT=f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train, y_train))
print("Testing Accuracy: ", clf.score(x_test, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

Training Accuracy:  1.0
Testing Accuracy:  0.9398148148148148
[[ 87   0   9   0]
 [  7   7   3   0]
 [  4   0 299   0]
 [  1   2   0  13]]
              precision    recall  f1-score   support

         acc       0.88      0.91      0.89        96
        good       0.78      0.41      0.54        17
       unacc       0.96      0.99      0.97       303
       vgood       1.00      0.81      0.90        16

    accuracy                           0.94       432
   macro avg       0.90      0.78      0.83       432
weighted avg       0.94      0.94      0.94       432



**Classificando com Random Forest**

In [None]:
clf = RandomForestClassifier(n_estimators = 25, criterion = 'entropy', random_state = 0)
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
f1_RF=f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train, y_train))
print("Testing Accuracy: ", clf.score(x_test, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

Training Accuracy:  1.0
Testing Accuracy:  0.8958333333333334
[[ 81   2  13   0]
 [ 10   3   3   1]
 [  8   0 295   0]
 [  7   1   0   8]]
              precision    recall  f1-score   support

         acc       0.76      0.84      0.80        96
        good       0.50      0.18      0.26        17
       unacc       0.95      0.97      0.96       303
       vgood       0.89      0.50      0.64        16

    accuracy                           0.90       432
   macro avg       0.78      0.62      0.67       432
weighted avg       0.89      0.90      0.89       432



#### **Comparando Algoritmos com F1 Score Macro**

In [None]:
models=['KNN', 'Logistic Regression', 'SVC linear', 'SVC rbf', 'Naive Bayes', 'Decision Tree', 'Random Forest']
fig = go.Figure(data=[
    go.Bar(name='f1_score', x=models, y=[f1_KNN, f1_LR, f1_SVC_Linear, f1_SVC_rbf, f1_NB, f1_DT, f1_RF ])])
configure_plotly_browser_state()
print("\n\n\t\t\t\tF1 Score")
fig.show()



				F1 Score


### **Preparando dados utilizando a função Factorize**

**Codificando dados categóricos y**

In [None]:
cars_f = cars.copy()
cars_f.columns = ['Buying', 'Maint', 'Doors','Persons','LugBoot','Safety','Evaluation']
cars_f['Evaluation'],class_names = pd.factorize(cars_f['Evaluation'])
print(class_names)
print(cars_f['Evaluation'].unique())

Index(['unacc', 'acc', 'vgood', 'good'], dtype='object')
[0 1 2 3]


**Codificando dados categóricos X**

In [None]:
cars_f['Buying'],_ = pd.factorize(cars_f['Buying'])
cars_f['Maint'],_ = pd.factorize(cars_f['Maint'])
cars_f['Doors'],_ = pd.factorize(cars_f['Doors'])
cars_f['Persons'],_ = pd.factorize(cars_f['Persons'])
cars_f['LugBoot'],_ = pd.factorize(cars_f['LugBoot'])
cars_f['Safety'],_ = pd.factorize(cars_f['Safety'])
cars_f.head()

Unnamed: 0,Buying,Maint,Doors,Persons,LugBoot,Safety,Evaluation
0,0,0,0,0,0,0,0
1,0,0,0,0,0,1,0
2,0,0,0,0,1,2,0
3,0,0,0,0,1,0,0
4,0,0,0,0,1,1,0


**Dividindo dados em X e y**

In [None]:
x = cars_f.iloc[:,:-1]
y = cars_f.iloc[:,-1]

**Dividindo dados em treino e teste**

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42, stratify=y)

**Padronizando os dados de X**

In [None]:
sc = StandardScaler()

x_train_std = sc.fit_transform(x_train)
x_test_std = sc.transform(x_test)

**Normalizando os dados de X**

In [None]:
sc = MinMaxScaler()

x_train_minmax = sc.fit_transform(x_train)
x_test_minmax = sc.transform(x_test)

### **Aplicação dos modelos de classificação**

**Classificando com KNN**

In [None]:
clf = KNeighborsClassifier(n_neighbors = 5)

print("\n\n=====================================Original=============================\n\n")
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
f1_KNN_f = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train, y_train))
print("Testing Accuracy: ", clf.score(x_test, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

print("\n\n=====================================Padronizado=============================\n\n")
clf.fit(x_train_std, y_train)
y_pred = clf.predict(x_test_std)
f1_KNN_f_std = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train_std, y_train))
print("Testing Accuracy: ", clf.score(x_test_std, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

print("\n\n=====================================Normalizados=============================\n\n")
clf.fit(x_train_minmax, y_train)
y_pred = clf.predict(x_test_minmax)
f1_KNN_f_minmax = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train_minmax, y_train))
print("Testing Accuracy: ", clf.score(x_test_minmax, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))





Training Accuracy:  0.9436293436293436
Testing Accuracy:  0.9120370370370371
[[297   6   0   0]
 [ 15  80   0   1]
 [  4   3   9   0]
 [  3   6   0   8]]
              precision    recall  f1-score   support

           0       0.93      0.98      0.95       303
           1       0.84      0.83      0.84        96
           2       1.00      0.56      0.72        16
           3       0.89      0.47      0.62        17

    accuracy                           0.91       432
   macro avg       0.92      0.71      0.78       432
weighted avg       0.91      0.91      0.91       432





Training Accuracy:  0.9915057915057915
Testing Accuracy:  0.9467592592592593
[[297   6   0   0]
 [  8  88   0   0]
 [  0   4  12   0]
 [  1   4   0  12]]
              precision    recall  f1-score   support

           0       0.97      0.98      0.98       303
           1       0.86      0.92      0.89        96
           2       1.00      0.75      0.86        16
           3       1.00      0.7

**Classificando com Logistic Regression**

In [None]:
clf = LogisticRegression(random_state = 0) 

print("\n\n=====================================Original=============================\n\n")
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
f1_LR_f = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train, y_train))
print("Testing Accuracy: ", clf.score(x_test, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

print("\n\n=====================================Padronizado=============================\n\n")
clf.fit(x_train_std, y_train)
y_pred = clf.predict(x_test_std)
f1_LR_f_std = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train_std, y_train))
print("Testing Accuracy: ", clf.score(x_test_std, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

print("\n\n=====================================Normalizados=============================\n\n")
clf.fit(x_train_minmax, y_train)
y_pred = clf.predict(x_test_minmax)
f1_LR_f_minmax = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train_minmax, y_train))
print("Testing Accuracy: ", clf.score(x_test_minmax, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))





Training Accuracy:  0.7683397683397684
Testing Accuracy:  0.7824074074074074
[[283  14   4   2]
 [ 49  45   0   2]
 [  6   5   4   1]
 [  6   4   1   6]]
              precision    recall  f1-score   support

           0       0.82      0.93      0.87       303
           1       0.66      0.47      0.55        96
           2       0.44      0.25      0.32        16
           3       0.55      0.35      0.43        17

    accuracy                           0.78       432
   macro avg       0.62      0.50      0.54       432
weighted avg       0.76      0.78      0.76       432





Training Accuracy:  0.7691119691119691
Testing Accuracy:  0.7824074074074074
[[283  14   4   2]
 [ 49  45   0   2]
 [  6   5   4   1]
 [  6   4   1   6]]
              precision    recall  f1-score   support

           0       0.82      0.93      0.87       303
           1       0.66      0.47      0.55        96
           2       0.44      0.25      0.32        16
           3       0.55      0.3

**Classificando com SVC Linear**

In [None]:
clf = SVC(kernel = 'linear', random_state = 0)

print("\n\n=====================================Original=============================\n\n")
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
f1_SVC_Linear_f = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train, y_train))
print("Testing Accuracy: ", clf.score(x_test, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

print("\n\n=====================================Padronizado=============================\n\n")
clf.fit(x_train_std, y_train)
y_pred = clf.predict(x_test_std)
f1_SVC_Linear_f_std = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train_std, y_train))
print("Testing Accuracy: ", clf.score(x_test_std, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

print("\n\n=====================================Normalizados=============================\n\n")
clf.fit(x_train_minmax, y_train)
y_pred = clf.predict(x_test_minmax)
f1_SVC_Linear_f_minmax = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train_minmax, y_train))
print("Testing Accuracy: ", clf.score(x_test_minmax, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))





Training Accuracy:  0.7806949806949807
Testing Accuracy:  0.7916666666666666
[[289  13   1   0]
 [ 54  40   0   2]
 [  9   1   6   0]
 [  8   2   0   7]]
              precision    recall  f1-score   support

           0       0.80      0.95      0.87       303
           1       0.71      0.42      0.53        96
           2       0.86      0.38      0.52        16
           3       0.78      0.41      0.54        17

    accuracy                           0.79       432
   macro avg       0.79      0.54      0.61       432
weighted avg       0.78      0.79      0.77       432





Training Accuracy:  0.7806949806949807
Testing Accuracy:  0.7939814814814815
[[289  13   1   0]
 [ 53  41   0   2]
 [  9   1   6   0]
 [  8   2   0   7]]
              precision    recall  f1-score   support

           0       0.81      0.95      0.87       303
           1       0.72      0.43      0.54        96
           2       0.86      0.38      0.52        16
           3       0.78      0.4

**Classificando com SVC rbf**

In [None]:
clf = SVC(kernel = 'rbf', random_state = 0)

print("\n\n=====================================Original=============================\n\n")
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
f1_SVC_rbf_f = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train, y_train))
print("Testing Accuracy: ", clf.score(x_test, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

print("\n\n=====================================Padronizado=============================\n\n")
clf.fit(x_train_std, y_train)
y_pred = clf.predict(x_test_std)
f1_SVC_rbf_f_std = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train_std, y_train))
print("Testing Accuracy: ", clf.score(x_test_std, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

print("\n\n=====================================Normalizados=============================\n\n")
clf.fit(x_train_minmax, y_train)
y_pred = clf.predict(x_test_minmax)
f1_SVC_rbf_f_minmax = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train_minmax, y_train))
print("Testing Accuracy: ", clf.score(x_test_minmax, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))





Training Accuracy:  0.9691119691119691
Testing Accuracy:  0.9513888888888888
[[294   9   0   0]
 [  3  93   0   0]
 [  0   3  13   0]
 [  2   3   1  11]]
              precision    recall  f1-score   support

           0       0.98      0.97      0.98       303
           1       0.86      0.97      0.91        96
           2       0.93      0.81      0.87        16
           3       1.00      0.65      0.79        17

    accuracy                           0.95       432
   macro avg       0.94      0.85      0.89       432
weighted avg       0.95      0.95      0.95       432





Training Accuracy:  0.9768339768339769
Testing Accuracy:  0.9537037037037037
[[294   9   0   0]
 [  1  95   0   0]
 [  0   3  13   0]
 [  1   5   1  10]]
              precision    recall  f1-score   support

           0       0.99      0.97      0.98       303
           1       0.85      0.99      0.91        96
           2       0.93      0.81      0.87        16
           3       1.00      0.5

**Classificando com Naive Bayes**

In [None]:
clf = GaussianNB()

print("\n\n=====================================Original=============================\n\n")
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
f1_NB_f = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train, y_train))
print("Testing Accuracy: ", clf.score(x_test, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

print("\n\n=====================================Padronizado=============================\n\n")
clf.fit(x_train_std, y_train)
y_pred = clf.predict(x_test_std)
f1_NB_f_std = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train_std, y_train))
print("Testing Accuracy: ", clf.score(x_test_std, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

print("\n\n=====================================Normalizados=============================\n\n")
clf.fit(x_train_minmax, y_train)
y_pred = clf.predict(x_test_minmax)
f1_NB_f_minmax = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train_minmax, y_train))
print("Testing Accuracy: ", clf.score(x_test_minmax, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))





Training Accuracy:  0.7243243243243244
Testing Accuracy:  0.7361111111111112
[[266  14  23   0]
 [ 31  34  29   2]
 [  0   0  16   0]
 [  0   6   9   2]]
              precision    recall  f1-score   support

           0       0.90      0.88      0.89       303
           1       0.63      0.35      0.45        96
           2       0.21      1.00      0.34        16
           3       0.50      0.12      0.19        17

    accuracy                           0.74       432
   macro avg       0.56      0.59      0.47       432
weighted avg       0.80      0.74      0.74       432





Training Accuracy:  0.7181467181467182
Testing Accuracy:  0.7337962962962963
[[265  14  24   0]
 [ 29  34  31   2]
 [  0   0  16   0]
 [  0   6   9   2]]
              precision    recall  f1-score   support

           0       0.90      0.87      0.89       303
           1       0.63      0.35      0.45        96
           2       0.20      1.00      0.33        16
           3       0.50      0.1

**Classificando com Decision Tree**

In [None]:
clf = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)

print("\n\n=====================================Original=============================\n\n")
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
f1_DT_f = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train, y_train))
print("Testing Accuracy: ", clf.score(x_test, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

print("\n\n=====================================Padronizado=============================\n\n")
clf.fit(x_train_std, y_train)
y_pred = clf.predict(x_test_std)
f1_DT_f_std = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train_std, y_train))
print("Testing Accuracy: ", clf.score(x_test_std, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

print("\n\n=====================================Normalizados=============================\n\n")
clf.fit(x_train_minmax, y_train)
y_pred = clf.predict(x_test_minmax)
f1_DT_f_minmax = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train_minmax, y_train))
print("Testing Accuracy: ", clf.score(x_test_minmax, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))





Training Accuracy:  1.0
Testing Accuracy:  0.9768518518518519
[[301   2   0   0]
 [  5  90   0   1]
 [  0   1  15   0]
 [  0   1   0  16]]
              precision    recall  f1-score   support

           0       0.98      0.99      0.99       303
           1       0.96      0.94      0.95        96
           2       1.00      0.94      0.97        16
           3       0.94      0.94      0.94        17

    accuracy                           0.98       432
   macro avg       0.97      0.95      0.96       432
weighted avg       0.98      0.98      0.98       432





Training Accuracy:  1.0
Testing Accuracy:  0.9768518518518519
[[301   2   0   0]
 [  5  90   0   1]
 [  0   1  15   0]
 [  0   1   0  16]]
              precision    recall  f1-score   support

           0       0.98      0.99      0.99       303
           1       0.96      0.94      0.95        96
           2       1.00      0.94      0.97        16
           3       0.94      0.94      0.94        17

    acc

**Classificando com Random Forest**

In [None]:
clf = RandomForestClassifier(n_estimators = 25, criterion = 'entropy', random_state = 0)

print("\n\n=====================================Original=============================\n\n")
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
f1_RF_f = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train, y_train))
print("Testing Accuracy: ", clf.score(x_test, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

print("\n\n=====================================Padronizado=============================\n\n")
clf.fit(x_train_std, y_train)
y_pred = clf.predict(x_test_std)
f1_RF_f_std = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train_std, y_train))
print("Testing Accuracy: ", clf.score(x_test_std, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

print("\n\n=====================================Normalizados=============================\n\n")
clf.fit(x_train_minmax, y_train)
y_pred = clf.predict(x_test_minmax)
f1_RF_f_minmax = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train_minmax, y_train))
print("Testing Accuracy: ", clf.score(x_test_minmax, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))





Training Accuracy:  1.0
Testing Accuracy:  0.9537037037037037
[[299   4   0   0]
 [  4  90   1   1]
 [  0   3  13   0]
 [  4   2   1  10]]
              precision    recall  f1-score   support

           0       0.97      0.99      0.98       303
           1       0.91      0.94      0.92        96
           2       0.87      0.81      0.84        16
           3       0.91      0.59      0.71        17

    accuracy                           0.95       432
   macro avg       0.91      0.83      0.86       432
weighted avg       0.95      0.95      0.95       432





Training Accuracy:  1.0
Testing Accuracy:  0.9583333333333334
[[299   4   0   0]
 [  4  90   1   1]
 [  0   3  13   0]
 [  1   3   1  12]]
              precision    recall  f1-score   support

           0       0.98      0.99      0.99       303
           1       0.90      0.94      0.92        96
           2       0.87      0.81      0.84        16
           3       0.92      0.71      0.80        17

    acc

#### **Comparando Algoritmos com F1 score Macro**

In [None]:
configure_plotly_browser_state()

trace1 = go.Bar(x = ['KNN', 'Logistic Regression', 'SVC linear', 'SVC rbf', 'Naive Bayes',  'Decision Tree',  'Random Forest'],
                y = [f1_KNN_f, f1_LR_f, f1_SVC_Linear_f, f1_SVC_rbf_f, f1_NB_f, f1_DT_f, f1_RF_f ],
                name = 'Original',)
trace2 = go.Bar(x = ['KNN', 'Logistic Regression', 'SVC linear', 'SVC rbf', 'Naive Bayes',  'Decision Tree',  'Random Forest'],
                y = [f1_KNN_f_std,  f1_LR_f_std,  f1_SVC_Linear_f_std,   f1_SVC_rbf_f_std,  f1_NB_f_std,  f1_DT_f_std,  f1_RF_f_std ],
                name = 'Padronizado',)
trace3 = go.Bar(x = ['KNN', 'Logistic Regression', 'SVC linear', 'SVC rbf', 'Naive Bayes',  'Decision Tree',  'Random Forest'],
                y = [f1_KNN_f_minmax,  f1_LR_f_minmax,  f1_SVC_Linear_f_minmax,   f1_SVC_rbf_f_minmax,  f1_NB_f_minmax,  f1_DT_f_minmax,  f1_RF_f_minmax ],
                name = 'Normalizado',)
data = [trace1, trace2, trace3]
py.iplot(data)

## **Comparando Algoritmos com Dummies e Factorize utilizando F1 score Macro**

In [None]:
configure_plotly_browser_state()

trace1 = go.Bar(x = ['KNN', 'Logistic Regression', 'SVC linear', 'SVC rbf', 'Naive Bayes',  'Decision Tree',  'Random Forest'],
                y = [f1_KNN, f1_LR,  f1_SVC_Linear,  f1_SVC_rbf, f1_NB,  f1_DT, f1_RF ],
                name = 'Dummies',)
trace2 = go.Bar(x = ['KNN', 'Logistic Regression', 'SVC linear', 'SVC rbf', 'Naive Bayes',  'Decision Tree',  'Random Forest'],
                y = [f1_KNN_f,  f1_LR_f,  f1_SVC_Linear_f,   f1_SVC_rbf_f,  f1_NB_f,  f1_DT_f,  f1_RF_f ],
                name = 'Factorize',)
trace3 = go.Bar(x = ['KNN', 'Logistic Regression', 'SVC linear', 'SVC rbf', 'Naive Bayes',  'Decision Tree',  'Random Forest'],
                y = [f1_KNN_f_std,  f1_LR_f_std,  f1_SVC_Linear_f_std,   f1_SVC_rbf_f_std,  f1_NB_f_std,  f1_DT_f_std,  f1_RF_f_std ],
                name = 'Fac_Padronizado',)
trace4 = go.Bar(x = ['KNN', 'Logistic Regression', 'SVC linear', 'SVC rbf', 'Naive Bayes',  'Decision Tree',  'Random Forest'],
                y = [f1_KNN_f_minmax,  f1_LR_f_minmax,  f1_SVC_Linear_f_minmax,   f1_SVC_rbf_f_minmax,  f1_NB_f_minmax,  f1_DT_f_minmax,  f1_RF_f_minmax ],
                name = 'Fac_Normalizado',)
data = [trace1, trace2, trace3, trace4]
py.iplot(data)

### **Preparando dados utilizando a função Replace**

**Nomeando as colunas**

In [None]:
cars.columns = ['Buying', 'Maint', 'Doors','Persons','LugBoot','Safety','Evaluation']
cars.head()

Unnamed: 0,Buying,Maint,Doors,Persons,LugBoot,Safety,Evaluation
0,vhigh,vhigh,2,2,small,med,unacc
1,vhigh,vhigh,2,2,small,high,unacc
2,vhigh,vhigh,2,2,med,low,unacc
3,vhigh,vhigh,2,2,med,med,unacc
4,vhigh,vhigh,2,2,med,high,unacc


**Codificando os dados considerando a ordem**

In [None]:
cars['Buying'].replace(('low', 'med', 'high', 'vhigh'), (0, 1, 2, 3), inplace = True)
cars['Maint'].replace(('low', 'med', 'high', 'vhigh'), (0, 1, 2, 3), inplace = True)
cars['LugBoot'].replace(('small', 'med', 'big'), (0, 1, 2), inplace = True)
cars['Safety'].replace(('low', 'med', 'high'), (0, 1, 2), inplace = True)
cars['Evaluation'].replace(('unacc', 'acc', 'good', 'vgood'), (0, 1, 2, 3), inplace = True)
cars

Unnamed: 0,Buying,Maint,Doors,Persons,LugBoot,Safety,Evaluation
0,3,3,2,2,0,1,0
1,3,3,2,2,0,2,0
2,3,3,2,2,1,0,0
3,3,3,2,2,1,1,0
4,3,3,2,2,1,2,0
...,...,...,...,...,...,...,...
1722,0,0,5,5,1,1,2
1723,0,0,5,5,1,2,3
1724,0,0,5,5,2,0,0
1725,0,0,5,5,2,1,2


**Dividindo em x e y**

In [None]:
x = cars.iloc[:,:-1]
y = cars.iloc[:,-1]

**Dividindo em treino e teste**

In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25, random_state=42, stratify=y)

**Padronizando os dados**

In [None]:
sc = StandardScaler()

x_train_std = sc.fit_transform(x_train)
x_test_std = sc.transform(x_test)

**Normalizando os dados**

In [None]:
sc = MinMaxScaler()

x_train_minmax = sc.fit_transform(x_train)
x_test_minmax = sc.transform(x_test)

### **Aplicação dos modelos de classificação**

**Classificando com KNN**

In [None]:
clf = KNeighborsClassifier(n_neighbors = 5)

print("\n\n=====================================Original=============================\n\n")
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
f1_KNN_r = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train, y_train))
print("Testing Accuracy: ", clf.score(x_test, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

print("\n\n=====================================Padronizado=============================\n\n")
clf.fit(x_train_std, y_train)
y_pred = clf.predict(x_test_std)
f1_KNN_r_std = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train_std, y_train))
print("Testing Accuracy: ", clf.score(x_test_std, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

print("\n\n=====================================Normalizados=============================\n\n")
clf.fit(x_train_minmax, y_train)
y_pred = clf.predict(x_test_minmax)
f1_KNN_r_minmax = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train_minmax, y_train))
print("Testing Accuracy: ", clf.score(x_test_minmax, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))





Training Accuracy:  0.9799227799227799
Testing Accuracy:  0.9490740740740741
[[298   5   0   0]
 [  9  87   0   0]
 [  0   5  12   0]
 [  0   3   0  13]]
              precision    recall  f1-score   support

           0       0.97      0.98      0.98       303
           1       0.87      0.91      0.89        96
           2       1.00      0.71      0.83        17
           3       1.00      0.81      0.90        16

    accuracy                           0.95       432
   macro avg       0.96      0.85      0.90       432
weighted avg       0.95      0.95      0.95       432





Training Accuracy:  0.9907335907335907
Testing Accuracy:  0.9629629629629629
[[295   8   0   0]
 [  1  95   0   0]
 [  0   4  11   2]
 [  0   1   0  15]]
              precision    recall  f1-score   support

           0       1.00      0.97      0.98       303
           1       0.88      0.99      0.93        96
           2       1.00      0.65      0.79        17
           3       0.88      0.9

**Classificando com Logistic Regression**

In [None]:
clf = LogisticRegression(random_state = 0) 

print("\n\n=====================================Original=============================\n\n")
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
f1_LR_r = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train, y_train))
print("Testing Accuracy: ", clf.score(x_test, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

print("\n\n=====================================Padronizado=============================\n\n")
clf.fit(x_train_std, y_train)
y_pred = clf.predict(x_test_std)
f1_LR_r_std = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train_std, y_train))
print("Testing Accuracy: ", clf.score(x_test_std, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

print("\n\n=====================================Normalizados=============================\n\n")
clf.fit(x_train_minmax, y_train)
y_pred = clf.predict(x_test_minmax)
f1_LR_r_minmax = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train_minmax, y_train))
print("Testing Accuracy: ", clf.score(x_test_minmax, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))





Training Accuracy:  0.8617760617760618
Testing Accuracy:  0.8217592592592593
[[279  21   3   0]
 [ 32  62   1   1]
 [  0  13   4   0]
 [  0   6   0  10]]
              precision    recall  f1-score   support

           0       0.90      0.92      0.91       303
           1       0.61      0.65      0.63        96
           2       0.50      0.24      0.32        17
           3       0.91      0.62      0.74        16

    accuracy                           0.82       432
   macro avg       0.73      0.61      0.65       432
weighted avg       0.82      0.82      0.82       432





Training Accuracy:  0.8625482625482626
Testing Accuracy:  0.8194444444444444
[[279  22   2   0]
 [ 32  61   1   2]
 [  0  13   4   0]
 [  0   6   0  10]]
              precision    recall  f1-score   support

           0       0.90      0.92      0.91       303
           1       0.60      0.64      0.62        96
           2       0.57      0.24      0.33        17
           3       0.83      0.6

**Classificando com SVC Linear**

In [None]:
clf = SVC(kernel = 'linear', random_state = 0)

print("\n\n=====================================Original=============================\n\n")
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
f1_SVC_Linear_r = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train, y_train))
print("Testing Accuracy: ", clf.score(x_test, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

print("\n\n=====================================Padronizado=============================\n\n")
clf.fit(x_train_std, y_train)
y_pred = clf.predict(x_test_std)
f1_SVC_Linear_r_std = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train_std, y_train))
print("Testing Accuracy: ", clf.score(x_test_std, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

print("\n\n=====================================Normalizados=============================\n\n")
clf.fit(x_train_minmax, y_train)
y_pred = clf.predict(x_test_minmax)
f1_SVC_Linear_r_minmax = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train_minmax, y_train))
print("Testing Accuracy: ", clf.score(x_test_minmax, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))





Training Accuracy:  0.8833976833976834
Testing Accuracy:  0.8495370370370371
[[282  21   0   0]
 [ 31  64   1   0]
 [  3   4   9   1]
 [  0   4   0  12]]
              precision    recall  f1-score   support

           0       0.89      0.93      0.91       303
           1       0.69      0.67      0.68        96
           2       0.90      0.53      0.67        17
           3       0.92      0.75      0.83        16

    accuracy                           0.85       432
   macro avg       0.85      0.72      0.77       432
weighted avg       0.85      0.85      0.85       432





Training Accuracy:  0.8810810810810811
Testing Accuracy:  0.8472222222222222
[[281  22   0   0]
 [ 30  65   1   0]
 [  3   5   9   0]
 [  0   4   1  11]]
              precision    recall  f1-score   support

           0       0.89      0.93      0.91       303
           1       0.68      0.68      0.68        96
           2       0.82      0.53      0.64        17
           3       1.00      0.6

**Classificando com SVC rbt**

In [None]:
clf = SVC(kernel = 'rbf', random_state = 0)

print("\n\n=====================================Original=============================\n\n")
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
f1_SVC_rbf_r = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train, y_train))
print("Testing Accuracy: ", clf.score(x_test, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

print("\n\n=====================================Padronizado=============================\n\n")
clf.fit(x_train_std, y_train)
y_pred = clf.predict(x_test_std)
f1_SVC_rbf_r_std = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train_std, y_train))
print("Testing Accuracy: ", clf.score(x_test_std, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

print("\n\n=====================================Normalizados=============================\n\n")
clf.fit(x_train_minmax, y_train)
y_pred = clf.predict(x_test_minmax)
f1_SVC_rbf_r_minmax = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train_minmax, y_train))
print("Testing Accuracy: ", clf.score(x_test_minmax, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))





Training Accuracy:  0.9382239382239382
Testing Accuracy:  0.9212962962962963
[[289  14   0   0]
 [  5  89   2   0]
 [  0   8   8   1]
 [  0   4   0  12]]
              precision    recall  f1-score   support

           0       0.98      0.95      0.97       303
           1       0.77      0.93      0.84        96
           2       0.80      0.47      0.59        17
           3       0.92      0.75      0.83        16

    accuracy                           0.92       432
   macro avg       0.87      0.78      0.81       432
weighted avg       0.93      0.92      0.92       432





Training Accuracy:  0.976061776061776
Testing Accuracy:  0.9560185185185185
[[293  10   0   0]
 [  1  95   0   0]
 [  0   5  10   2]
 [  0   1   0  15]]
              precision    recall  f1-score   support

           0       1.00      0.97      0.98       303
           1       0.86      0.99      0.92        96
           2       1.00      0.59      0.74        17
           3       0.88      0.94

**Classificando com Naive Bayes**

In [None]:
clf = GaussianNB()

print("\n\n=====================================Original=============================\n\n")
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
f1_NB_r = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train, y_train))
print("Testing Accuracy: ", clf.score(x_test, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

print("\n\n=====================================Padronizado=============================\n\n")
clf.fit(x_train_std, y_train)
y_pred = clf.predict(x_test_std)
f1_NB_r_std = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train_std, y_train))
print("Testing Accuracy: ", clf.score(x_test_std, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

print("\n\n=====================================Normalizados=============================\n\n")
clf.fit(x_train_minmax, y_train)
y_pred = clf.predict(x_test_minmax)
f1_NB_r_minmax = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train_minmax, y_train))
print("Testing Accuracy: ", clf.score(x_test_minmax, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))





Training Accuracy:  0.7760617760617761
Testing Accuracy:  0.7569444444444444
[[276  19   0   8]
 [ 26  35   3  32]
 [  1   9   0   7]
 [  0   0   0  16]]
              precision    recall  f1-score   support

           0       0.91      0.91      0.91       303
           1       0.56      0.36      0.44        96
           2       0.00      0.00      0.00        17
           3       0.25      1.00      0.41        16

    accuracy                           0.76       432
   macro avg       0.43      0.57      0.44       432
weighted avg       0.77      0.76      0.75       432





Training Accuracy:  0.7714285714285715
Testing Accuracy:  0.7546296296296297
[[276  18   0   9]
 [ 26  34   3  33]
 [  1   9   0   7]
 [  0   0   0  16]]
              precision    recall  f1-score   support

           0       0.91      0.91      0.91       303
           1       0.56      0.35      0.43        96
           2       0.00      0.00      0.00        17
           3       0.25      1.0

**Classificando com Decision Tree**

In [None]:
clf = DecisionTreeClassifier(criterion = 'entropy', random_state = 0)

print("\n\n=====================================Original=============================\n\n")
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
f1_DT_r = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train, y_train))
print("Testing Accuracy: ", clf.score(x_test, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

print("\n\n=====================================Padronizado=============================\n\n")
clf.fit(x_train_std, y_train)
y_pred = clf.predict(x_test_std)
f1_DT_r_std = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train_std, y_train))
print("Testing Accuracy: ", clf.score(x_test_std, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

print("\n\n=====================================Normalizados=============================\n\n")
clf.fit(x_train_minmax, y_train)
y_pred = clf.predict(x_test_minmax)
f1_DT_r_minmax = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train_minmax, y_train))
print("Testing Accuracy: ", clf.score(x_test_minmax, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))





Training Accuracy:  1.0
Testing Accuracy:  0.9699074074074074
[[301   2   0   0]
 [  7  88   1   0]
 [  0   2  15   0]
 [  0   1   0  15]]
              precision    recall  f1-score   support

           0       0.98      0.99      0.99       303
           1       0.95      0.92      0.93        96
           2       0.94      0.88      0.91        17
           3       1.00      0.94      0.97        16

    accuracy                           0.97       432
   macro avg       0.97      0.93      0.95       432
weighted avg       0.97      0.97      0.97       432





Training Accuracy:  1.0
Testing Accuracy:  0.9699074074074074
[[301   2   0   0]
 [  7  88   1   0]
 [  0   2  15   0]
 [  0   1   0  15]]
              precision    recall  f1-score   support

           0       0.98      0.99      0.99       303
           1       0.95      0.92      0.93        96
           2       0.94      0.88      0.91        17
           3       1.00      0.94      0.97        16

    acc

**Classificando com Random Forest**

In [None]:
clf = RandomForestClassifier(n_estimators = 25, criterion = 'entropy', random_state = 0)

print("\n\n=====================================Original=============================\n\n")
clf.fit(x_train, y_train)
y_pred = clf.predict(x_test)
f1_RF_r = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train, y_train))
print("Testing Accuracy: ", clf.score(x_test, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

print("\n\n=====================================Padronizado=============================\n\n")
clf.fit(x_train_std, y_train)
y_pred = clf.predict(x_test_std)
f1_RF_r_std = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train_std, y_train))
print("Testing Accuracy: ", clf.score(x_test_std, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))

print("\n\n=====================================Normalizados=============================\n\n")
clf.fit(x_train_minmax, y_train)
y_pred = clf.predict(x_test_minmax)
f1_RF_r_minmax = f1_score(y_test,y_pred, average='macro')
print("Training Accuracy: ",clf.score(x_train_minmax, y_train))
print("Testing Accuracy: ", clf.score(x_test_minmax, y_test))
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test,y_pred))





Training Accuracy:  1.0
Testing Accuracy:  0.9675925925925926
[[298   5   0   0]
 [  2  92   1   1]
 [  0   2  13   2]
 [  0   1   0  15]]
              precision    recall  f1-score   support

           0       0.99      0.98      0.99       303
           1       0.92      0.96      0.94        96
           2       0.93      0.76      0.84        17
           3       0.83      0.94      0.88        16

    accuracy                           0.97       432
   macro avg       0.92      0.91      0.91       432
weighted avg       0.97      0.97      0.97       432





Training Accuracy:  1.0
Testing Accuracy:  0.9699074074074074
[[298   5   0   0]
 [  1  93   1   1]
 [  0   2  13   2]
 [  0   1   0  15]]
              precision    recall  f1-score   support

           0       1.00      0.98      0.99       303
           1       0.92      0.97      0.94        96
           2       0.93      0.76      0.84        17
           3       0.83      0.94      0.88        16

    acc

#### **Comparando Algoritmos com F1 score Macro**

In [None]:
configure_plotly_browser_state()

trace1 = go.Bar(x = ['KNN', 'Logistic Regression', 'SVC linear', 'SVC rbf', 'Naive Bayes',  'Decision Tree',  'Random Forest'],
                y = [f1_KNN_r, f1_LR_r, f1_SVC_Linear_r, f1_SVC_rbf_r, f1_NB_r, f1_DT_r, f1_RF_r ],
                name = 'Original',)
trace2 = go.Bar(x = ['KNN', 'Logistic Regression', 'SVC linear', 'SVC rbf', 'Naive Bayes',  'Decision Tree',  'Random Forest'],
                y = [f1_KNN_r_std,  f1_LR_r_std,  f1_SVC_Linear_r_std,   f1_SVC_rbf_r_std,  f1_NB_r_std,  f1_DT_r_std,  f1_RF_r_std ],
                name = 'Padronizado',)
trace3 = go.Bar(x = ['KNN', 'Logistic Regression', 'SVC linear', 'SVC rbf', 'Naive Bayes',  'Decision Tree',  'Random Forest'],
                y = [f1_KNN_r_minmax,  f1_LR_r_minmax,  f1_SVC_Linear_r_minmax,   f1_SVC_rbf_r_minmax,  f1_NB_r_minmax,  f1_DT_r_minmax,  f1_RF_r_minmax ],
                name = 'Normalizado',)
data = [trace1, trace2, trace3]
py.iplot(data)

## **Comparando Algoritmos com Dummies, Factorize e Replace utilizando F1 score Macro**

In [None]:
configure_plotly_browser_state()

trace1 = go.Bar(x = ['KNN', 'Logistic Regression', 'SVC linear', 'SVC rbf', 'Naive Bayes',  'Decision Tree',  'Random Forest'],
                y = [f1_KNN, f1_LR,  f1_SVC_Linear,  f1_SVC_rbf, f1_NB,  f1_DT, f1_RF ],
                name = 'Dummies',)
trace2 = go.Bar(x = ['KNN', 'Logistic Regression', 'SVC linear', 'SVC rbf', 'Naive Bayes',  'Decision Tree',  'Random Forest'],
                y = [f1_KNN_f,  f1_LR_f,  f1_SVC_Linear_f,   f1_SVC_rbf_f,  f1_NB_f,  f1_DT_f,  f1_RF_f ],
                name = 'Factorize',)
trace3 = go.Bar(x = ['KNN', 'Logistic Regression', 'SVC linear', 'SVC rbf', 'Naive Bayes',  'Decision Tree',  'Random Forest'],
                y = [f1_KNN_f_std,  f1_LR_f_std,  f1_SVC_Linear_f_std,   f1_SVC_rbf_f_std,  f1_NB_f_std,  f1_DT_f_std,  f1_RF_f_std ],
                name = 'Fac_Padronizado',)
trace4 = go.Bar(x = ['KNN', 'Logistic Regression', 'SVC linear', 'SVC rbf', 'Naive Bayes',  'Decision Tree',  'Random Forest'],
                y = [f1_KNN_f_minmax,  f1_LR_f_minmax,  f1_SVC_Linear_f_minmax,   f1_SVC_rbf_f_minmax,  f1_NB_f_minmax,  f1_DT_f_minmax,  f1_RF_f_minmax ],
                name = 'Fac_Normalizado',)
trace5 = go.Bar(x = ['KNN', 'Logistic Regression', 'SVC linear', 'SVC rbf', 'Naive Bayes',  'Decision Tree',  'Random Forest'],
                y = [f1_KNN_r, f1_LR_r, f1_SVC_Linear_r, f1_SVC_rbf_r, f1_NB_r, f1_DT_r, f1_RF_r ],
                name = 'Replace',)
trace6 = go.Bar(x = ['KNN', 'Logistic Regression', 'SVC linear', 'SVC rbf', 'Naive Bayes',  'Decision Tree',  'Random Forest'],
                y = [f1_KNN_r_std,  f1_LR_r_std,  f1_SVC_Linear_r_std,   f1_SVC_rbf_r_std,  f1_NB_r_std,  f1_DT_r_std,  f1_RF_r_std ],
                name = 'Rep_Padronizado',)
trace7 = go.Bar(x = ['KNN', 'Logistic Regression', 'SVC linear', 'SVC rbf', 'Naive Bayes',  'Decision Tree',  'Random Forest'],
                y = [f1_KNN_r_minmax,  f1_LR_r_minmax,  f1_SVC_Linear_r_minmax,   f1_SVC_rbf_r_minmax,  f1_NB_r_minmax,  f1_DT_r_minmax,  f1_RF_r_minmax ],
                name = 'Rep_Normalizado',)
data = [trace1, trace2, trace3, trace4, trace5, trace6, trace7]
py.iplot(data)