In [1]:
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score
from random import randint

In [None]:
# 1. Carregar o conjunto de dados
data = pd.read_csv('drug200.csv')

In [None]:
# 2. Transformar atributos nominais em binários
data_encoded = pd.get_dummies(data,columns=['Sex', 'BP'], dtype=int, drop_first=True)
data_encoded = pd.get_dummies(data_encoded,columns=['Cholesterol', 'Drug'], dtype=int)


In [None]:
# 3. Separar atributos (X) e classes (y)
class_names = ['Drug_drugA', 'Drug_drugB', 'Drug_drugC', 'Drug_drugX', 'Drug_DrugY']
y = data_encoded[class_names]
x = data_encoded.drop(columns=class_names)

feature_names = x.columns
class_names = y.columns

x

In [5]:
# 4. Dividir o conjunto de dados em treinamento e teste
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=randint(0, 100))

In [None]:
# 5. Aplicar o algoritmo de árvore de decisão
clf = DecisionTreeClassifier(criterion="entropy")
clf.fit(x_train, y_train)

In [None]:
# 6. Visualizar a árvore de decisão
plt.figure(figsize=(20,10))
plot_tree(clf, filled=True,feature_names=feature_names, class_names=class_names)
plt.savefig("decision_tree.png", dpi=300, bbox_inches='tight')

plt.show()

In [8]:
# 7. Fazer a classificação dos dados de teste
y_pred = clf.predict(x_test)
y_pred = pd.DataFrame(y_pred)

In [None]:
# 8. Desfazendo o get_dummies
y_pred = y_pred.dot(class_names)
y_test = y_test.dot(class_names)

In [None]:
# 9. Avaliar o modelo

# Matriz de confusão
conf_matrix = confusion_matrix(y_test, y_pred, labels=classes_resposta)
print("Matriz de Confusão:\n", conf_matrix)

# Acurácia
accuracy = accuracy_score(y_test, y_pred)
print("Acurácia:", accuracy)

# Precisão
precision = precision_score(y_test, y_pred, average='weighted')
print("Precisão:", precision)

# Revocação (Recall)
recall = recall_score(y_test, y_pred, average='weighted')
print("Revocação:", recall)

# Medida-F (F1-Score)
f1 = f1_score(y_test, y_pred, average='weighted')
print("Medida-F:", f1)