# Treinamento do Modelo

In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier
from sklearn.metrics import classification_report, accuracy_score
import joblib

In [8]:
# 1. Carregar os dados
df = pd.read_csv('dados_modelo.csv')

In [9]:
# 2. Remover variáveis com alta correlação com o target
df = df.drop(columns=["imc", "categoria_imc"])

In [10]:
# 3. Separar variáveis preditoras e alvo
X = df.drop(columns=["nivel_obesidade"])
y = df["nivel_obesidade"]

In [11]:
# 4. Codificar variáveis categóricas (X)
X = pd.get_dummies(X, drop_first=True)

In [12]:
# 5. Codificar variável alvo
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

In [13]:
# 6. Divisão treino/teste
X_train, X_test, y_train, y_test = train_test_split(
    X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded
)

In [14]:
# 7. Instanciar modelos
modelos = {
    "Random Forest": RandomForestClassifier(random_state=42),
    "KNN": KNeighborsClassifier(),
    "Logistic Regression": LogisticRegression(max_iter=1000),
    "XGBoost": XGBClassifier(use_label_encoder=False, eval_metric='mlogloss', random_state=42)
}

In [17]:
# 8. Treinar, avaliar e salvar resultados
resultados = {}

for nome, modelo in modelos.items():
    print(f"\nTreinando {nome}...")
    modelo.fit(X_train, y_train)
    y_pred = modelo.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    print(f"Acurácia: {acc:.4f}")
    print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))
    resultados[nome] = {
        "modelo": modelo,
        "acuracia": acc
    }


Treinando Random Forest...
Acurácia: 0.9433
                     precision    recall  f1-score   support

Insufficient_Weight       1.00      0.94      0.97        54
      Normal_Weight       0.81      0.93      0.86        58
     Obesity_Type_I       0.96      0.96      0.96        70
    Obesity_Type_II       0.98      0.98      0.98        60
   Obesity_Type_III       1.00      0.98      0.99        65
 Overweight_Level_I       0.89      0.86      0.88        58
Overweight_Level_II       0.98      0.93      0.96        58

           accuracy                           0.94       423
          macro avg       0.95      0.94      0.94       423
       weighted avg       0.95      0.94      0.94       423


Treinando KNN...
Acurácia: 0.8794
                     precision    recall  f1-score   support

Insufficient_Weight       0.88      0.96      0.92        54
      Normal_Weight       0.88      0.60      0.71        58
     Obesity_Type_I       0.93      0.90      0.91        70
 

[WinError 2] O sistema não pode encontrar o arquivo especificado
  File "C:\Users\dphat\AppData\Local\Packages\PythonSoftwareFoundation.Python.3.11_qbz5n2kfra8p0\LocalCache\local-packages\Python311\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
               ^^^^^^^^^^^^^^^
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.11_3.11.2544.0_x64__qbz5n2kfra8p0\Lib\subprocess.py", line 548, in run
    with Popen(*popenargs, **kwargs) as process:
         ^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.11_3.11.2544.0_x64__qbz5n2kfra8p0\Lib\subprocess.py", line 1026, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "C:\Program Files\WindowsApps\PythonSoftwareFoundation.Python.3.11_3.11.2544.0_x64__qbz5n2kfra8p0\Lib\subprocess.py", line 1538, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executab

Acurácia: 0.8085
                     precision    recall  f1-score   support

Insufficient_Weight       0.85      0.87      0.86        54
      Normal_Weight       0.72      0.66      0.68        58
     Obesity_Type_I       0.79      0.79      0.79        70
    Obesity_Type_II       0.88      0.98      0.93        60
   Obesity_Type_III       0.98      0.98      0.98        65
 Overweight_Level_I       0.67      0.71      0.69        58
Overweight_Level_II       0.73      0.66      0.69        58

           accuracy                           0.81       423
          macro avg       0.80      0.81      0.80       423
       weighted avg       0.81      0.81      0.81       423


Treinando XGBoost...
Acurácia: 0.9527
                     precision    recall  f1-score   support

Insufficient_Weight       0.96      0.91      0.93        54
      Normal_Weight       0.83      0.95      0.89        58
     Obesity_Type_I       0.97      0.97      0.97        70
    Obesity_Type_II      

In [18]:

# 9. Selecionar o melhor modelo
melhor_modelo_nome = max(resultados, key=lambda x: resultados[x]["acuracia"])
melhor_modelo = resultados[melhor_modelo_nome]["modelo"]

print(f"\n🔍 Melhor modelo: {melhor_modelo_nome} com acurácia {resultados[melhor_modelo_nome]['acuracia']:.4f}")

# 10. Salvar melhor modelo, encoder e colunas
joblib.dump(melhor_modelo, "modelo_obesidade.pkl")
joblib.dump(label_encoder, "label_encoder.pkl")
joblib.dump(X.columns.tolist(), "colunas_modelo.pkl")


🔍 Melhor modelo: XGBoost com acurácia 0.9527


['colunas_modelo.pkl']