In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    classification_report,
)
from sklearn.tree import plot_tree
import matplotlib.pyplot as plt

In [None]:
base = pd.read_csv('insurance.csv')

In [None]:
if 'Unnamed: 0' in base.columns:
    base = base.drop(columns=['Unnamed: 0'])

In [None]:
base = base.fillna(base.mode().iloc[0])

In [None]:
y = base.iloc[:, 7].values
x = base.drop(columns=base.columns[7]).values

In [None]:
label_encoder = LabelEncoder()
for i in range(x.shape[1]):
    if x[:, i].dtype == 'object' or isinstance(x[:, i][0], str):
        x[:, i] = label_encoder.fit_transform(x[:, i].astype(str))

In [None]:
# x independentes
# y a variável dependente
X_treinamento, X_teste, y_treinamento, y_teste = train_test_split(x, y, test_size=0.3, random_state=0)


In [None]:
# 94
# modelo = RandomForestClassifier(random_state=1, n_estimators=500)
# 92
modelo = RandomForestClassifier(random_state=1, n_estimators=500, max_depth=20, max_leaf_nodes=12)
modelo.fit(X_treinamento, y_treinamento)

In [None]:
# impressao
tree_index = 0
tree_to_visualize = modelo.estimators_[tree_index]
plt.figure(figsize=(20, 10))
plot_tree(
    tree_to_visualize,
    filled=True,
    feature_names=base.columns[:-1],
    class_names=True,
    rounded=True,
)
plt.show()

In [None]:
y_pred = modelo.predict(X_teste)
print("Accuracy:", accuracy_score(y_teste, y_pred))
print("Precision:", precision_score(y_teste, y_pred, average='weighted'))
print("Recall:", recall_score(y_teste, y_pred, average='weighted'))
print("F1 Score:", f1_score(y_teste, y_pred, average='weighted'))
print("Classification Report:\n", classification_report(y_teste, y_pred))