Se necessario utilize estes comandos para instalar as bibliotecas

In [1]:
# !pip install pandas
# !pip install chefboost

In [2]:
import pandas as pd
from chefboost import Chefboost as chef

In [3]:
# Dados fornecidos
data = [
    ["Ruim", "Alta", "Nenhuma", "<15000", "Alto"],
    ["Desconhecida", "Alta", "Nenhuma", ">=15000 a <=35000", "Alto"],
    ["Desconhecida", "Baixa", "Nenhuma", ">=15000 a <=35000", "Moderado"],
    ["Desconhecida", "Baixa", "Nenhuma", ">35000", "Alto"],
    ["Desconhecida", "Baixa", "Nenhuma", ">35000", "Baixo"],
    ["Desconhecida", "Baixa", "Adequada", ">35000", "Baixo"],
    ["Ruim", "Baixa", "Nenhuma", "<15000", "Alto"],
    ["Ruim", "Baixa", "Adequada", ">35000", "Moderado"],
    ["Boa", "Baixa", "Nenhuma", ">35000", "Baixo"],
    ["Boa", "Alta", "Adequada", ">35000", "Baixo"],
    ["Boa", "Alta", "Nenhuma", "<15000", "Alto"],
    ["Boa", "Alta", "Nenhuma", ">=15000 a <=35000", "Moderado"],
    ["Boa", "Alta", "Nenhuma", ">35000", "Baixo"],
    ["Ruim", "Alta", "Nenhuma", ">=15000 a <=35000", "Alto"]
]

# Convertendo em um DataFrame
df = pd.DataFrame(data, columns=["Historia do credito", "Divida", "Garantias", "Renda Anual", "Risco"])
df[:3]

Unnamed: 0,Historia do credito,Divida,Garantias,Renda Anual,Risco
0,Ruim,Alta,Nenhuma,<15000,Alto
1,Desconhecida,Alta,Nenhuma,>=15000 a <=35000,Alto
2,Desconhecida,Baixa,Nenhuma,>=15000 a <=35000,Moderado


Separando dados de treinamento e validacao

In [4]:
# Renomeando a coluna alvo para "Decision" por regra da biblioteca chefboost
df = df.rename(columns={"Risco": "Decision"})

# Separando os dados de treinamento e validação
train_df = df.iloc[:-3]  # Todas as linhas exceto as últimas 3
validation_df = df.iloc[-3:]  # Apenas as últimas 3 linhas


train_df.head()
validation_df.head()

Unnamed: 0,Historia do credito,Divida,Garantias,Renda Anual,Decision
11,Boa,Alta,Nenhuma,>=15000 a <=35000,Moderado
12,Boa,Alta,Nenhuma,>35000,Baixo
13,Ruim,Alta,Nenhuma,>=15000 a <=35000,Alto


Treinando dos algoritmos

In [5]:
_ = chef.fit(train_df, {'algorithm': 'ID3'}, 'Decision', validation_df)

[INFO]:  1 CPU cores will be allocated in parallel running
ID3  tree is going to be built...
-------------------------
finished in  0.8611965179443359  seconds
-------------------------
Evaluate  train set
-------------------------
Accuracy:  90.9090909090909 % on  11  instances
Labels:  ['Alto' 'Moderado' 'Baixo']
Confusion matrix:  [[5, 0, 1], [0, 2, 0], [0, 0, 3]]
Decision  Alto  => Accuray:  90.9091 %, Precision:  83.3333 %, Recall:  100.0 %, F1:  90.9091 %
Decision  Moderado  => Accuray:  100.0 %, Precision:  100.0 %, Recall:  100.0 %, F1:  100.0 %
Decision  Baixo  => Accuray:  90.9091 %, Precision:  100.0 %, Recall:  75.0 %, F1:  85.7143 %
-------------------------
Evaluate  validation set
-------------------------
Accuracy:  66.66666666666667 % on  3  instances
Labels:  ['Moderado' 'Baixo' 'Alto']
Confusion matrix:  [[0, 0, 0], [0, 1, 0], [1, 0, 1]]
Decision  Moderado  => Accuray:  66.6667 %, Precision:  0.0 %, Recall:  0.0 %, F1:  0.0 %
Decision  Baixo  => Accuray:  100.0 %, Pr

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Prediction'] = predictions


In [6]:
_ = chef.fit(train_df, {'algorithm': 'C4.5'}, 'Decision', validation_df)

[INFO]:  1 CPU cores will be allocated in parallel running
C4.5  tree is going to be built...
-------------------------
finished in  0.9826149940490723  seconds
-------------------------
Evaluate  train set
-------------------------
Accuracy:  90.9090909090909 % on  11  instances
Labels:  ['Alto' 'Moderado' 'Baixo']
Confusion matrix:  [[5, 0, 1], [0, 2, 0], [0, 0, 3]]
Decision  Alto  => Accuray:  90.9091 %, Precision:  83.3333 %, Recall:  100.0 %, F1:  90.9091 %
Decision  Moderado  => Accuray:  100.0 %, Precision:  100.0 %, Recall:  100.0 %, F1:  100.0 %
Decision  Baixo  => Accuray:  90.9091 %, Precision:  100.0 %, Recall:  75.0 %, F1:  85.7143 %
['Historia do credito', 'Divida', 'Garantias', 'Renda Anual', 'Prediction', 'Decision']
-------------------------
Evaluate  validation set
-------------------------
Accuracy:  66.66666666666667 % on  3  instances
Labels:  ['Moderado' 'Baixo' 'Alto']
Confusion matrix:  [[0, 0, 0], [0, 1, 0], [1, 0, 1]]
Decision  Moderado  => Accuray:  66.6667 %

In [7]:
_ = chef.fit(train_df, {'algorithm': 'CART'}, 'Decision', validation_df)

[INFO]:  1 CPU cores will be allocated in parallel running
CART  tree is going to be built...
-------------------------
finished in  0.8767619132995605  seconds
-------------------------
Evaluate  train set
-------------------------
Accuracy:  90.9090909090909 % on  11  instances
Labels:  ['Alto' 'Moderado' 'Baixo']
Confusion matrix:  [[5, 0, 1], [0, 2, 0], [0, 0, 3]]
Decision  Alto  => Accuray:  90.9091 %, Precision:  83.3333 %, Recall:  100.0 %, F1:  90.9091 %
Decision  Moderado  => Accuray:  100.0 %, Precision:  100.0 %, Recall:  100.0 %, F1:  100.0 %
Decision  Baixo  => Accuray:  90.9091 %, Precision:  100.0 %, Recall:  75.0 %, F1:  85.7143 %
['Historia do credito', 'Divida', 'Garantias', 'Renda Anual', 'Prediction', 'Decision']
-------------------------
Evaluate  validation set
-------------------------
Accuracy:  66.66666666666667 % on  3  instances
Labels:  ['Moderado' 'Baixo' 'Alto']
Confusion matrix:  [[0, 0, 0], [0, 1, 0], [1, 0, 1]]
Decision  Moderado  => Accuray:  66.6667 %