In [1]:
import json
import pandas as pd
from pycaret.classification import setup, compare_models, save_model, pull

# Carregar as configurações do arquivo config.json
try:
    with open('config.json', 'r') as f:
        config = json.load(f)
except FileNotFoundError:
    print("Arquivo config.json não encontrado.")
    raise

# Carregar o dataset
try:
    df = pd.read_csv(f'./datasets/{config["file"]}')
except pd.errors.EmptyDataError:
    print("Erro: Dataset está vazio ou mal formatado.")
    raise

# Verificar se o dataframe tem colunas e dados
if df.empty or len(df.columns) == 0:
    raise ValueError("Erro: O dataset está vazio ou sem colunas.")

# Verificar se a variável alvo está no dataset
if config["target"] not in df.columns:
    raise ValueError(f"A coluna alvo '{config['target']}' não está presente no dataset.")

# Verificar se a coluna de target contém valores válidos
if df[config["target"]].isnull().sum() > 0:
    raise ValueError(f"A coluna alvo '{config['target']}' contém valores nulos. Por favor, limpe ou preencha esses valores.")

# Verificações e ajustes de configuração
normalize = config.get("normalize", False)
normalize_method = config.get("normalization_method", None) if normalize else None

remove_multicollinearity = config.get("remove_multicollinearity", False)
multicollinearity_threshold = config.get("multicollinearity_threshold", None) if remove_multicollinearity else None

fold_strategy = config.get("fold_strategy", None)  # Se não for informado, usa None
fold_number = config.get("fold_number", 10) if fold_strategy else None  # Se fold_strategy não estiver marcado, ignoramos

# Setup do PyCaret
clf = setup(
    data=df,
    target=config["target"],
    session_id=config["session_id"],
    normalize=normalize,
    normalize_method=normalize_method,
    train_size=1 - config["test_size"],
    fold_strategy=fold_strategy,
    fold=fold_number,
    remove_multicollinearity=remove_multicollinearity,
    multicollinearity_threshold=multicollinearity_threshold
)

# Treinar os modelos e salvar o melhor
best_model = compare_models()

# Verificar se best_model foi retornado corretamente
if best_model is None or (isinstance(best_model, list) and len(best_model) == 0):
    raise ValueError("Nenhum modelo foi comparado ou retornado. Verifique as configurações.")

# Se best_model for uma lista, pegar o primeiro modelo
if isinstance(best_model, list):
    best_model = best_model[0]

# Salvar o melhor modelo
save_model(best_model, './models/best_model')

# Puxar os resultados
results = pull()

# Salvar os resultados em CSV
results.to_csv('./models/results.csv', index=False)

# Salvar os parâmetros do melhor modelo
with open('./models/best_model_params.json', 'w') as f:
    json.dump(best_model.get_params(), f)


Unnamed: 0,Description,Value
0,Session id,1245
1,Target,sex
2,Target type,Binary
3,Target mapping,"Female: 0, Male: 1"
4,Original data shape,"(333, 7)"
5,Transformed data shape,"(333, 11)"
6,Transformed train set shape,"(266, 11)"
7,Transformed test set shape,"(67, 11)"
8,Numeric features,4
9,Categorical features,2


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
dt,Decision Tree Classifier,0.5229,0.5206,0.5229,0.5288,0.5209,0.0397,0.0404,0.017
rf,Random Forest Classifier,0.5228,0.5287,0.5228,0.5387,0.5192,0.0579,0.0595,0.045
qda,Quadratic Discriminant Analysis,0.5192,0.5211,0.5192,0.5375,0.4922,0.0539,0.057,0.014
lightgbm,Light Gradient Boosting Machine,0.5191,0.4884,0.5191,0.5327,0.5179,0.043,0.0459,0.105
et,Extra Trees Classifier,0.5152,0.5039,0.5152,0.5285,0.5125,0.0385,0.0397,0.031
svm,SVM - Linear Kernel,0.5151,0.4903,0.5151,0.529,0.503,0.0361,0.0386,0.014
gbc,Gradient Boosting Classifier,0.508,0.5079,0.508,0.5218,0.5069,0.0289,0.0279,0.023
lr,Logistic Regression,0.5004,0.5026,0.5004,0.5204,0.4859,0.0178,0.0198,0.365
nb,Naive Bayes,0.5001,0.479,0.5001,0.4986,0.4937,-0.0177,-0.0194,0.015
ada,Ada Boost Classifier,0.4932,0.5129,0.4932,0.5013,0.4886,-0.0161,-0.0152,0.021


Transformation Pipeline and Model Successfully Saved


In [26]:
pip install pycaret


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.2.1[0m[39;49m -> [0m[32;49m24.2[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpython3.10 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.
