In [1]:
import json
import pandas as pd
from pycaret.classification import setup, compare_models, save_model, pull

# Carregar as configurações do arquivo config.json
try:
    with open('config.json', 'r') as f:
        config = json.load(f)
except FileNotFoundError:
    print("Arquivo config.json não encontrado.")
    raise

# Carregar o dataset
try:
    df = pd.read_csv(f'./datasets/{config["file"]}')
except pd.errors.EmptyDataError:
    print("Erro: Dataset está vazio ou mal formatado.")
    raise

# Verificar se o dataframe tem colunas e dados
if df.empty or len(df.columns) == 0:
    raise ValueError("Erro: O dataset está vazio ou sem colunas.")

# Verificar se a variável alvo está no dataset
if config["target"] not in df.columns:
    raise ValueError(f"A coluna alvo '{config['target']}' não está presente no dataset.")

# Verificar se a coluna de target contém valores válidos
if df[config["target"]].isnull().sum() > 0:
    raise ValueError(f"A coluna alvo '{config['target']}' contém valores nulos. Por favor, limpe ou preencha esses valores.")

# Verificações e ajustes de configuração
normalize = config.get("normalize", False)
normalize_method = config.get("normalization_method", None) if normalize else None

remove_multicollinearity = config.get("remove_multicollinearity", False)
multicollinearity_threshold = config.get("multicollinearity_threshold", None) if remove_multicollinearity else None

fold_strategy = config.get("fold_strategy", None)  # Se não for informado, usa None
fold_number = config.get("fold_number", 10) if fold_strategy else None  # Se fold_strategy não estiver marcado, ignoramos

# Setup do PyCaret
clf = setup(
    data=df,
    target=config["target"],
    session_id=config["session_id"],
    normalize=normalize,
    normalize_method=normalize_method,
    train_size=1 - config["test_size"],
    fold_strategy=fold_strategy,
    fold=fold_number,
    remove_multicollinearity=remove_multicollinearity,
    multicollinearity_threshold=multicollinearity_threshold
)

# Treinar os modelos e salvar o melhor
best_model = compare_models()

# Verificar se best_model foi retornado corretamente
if best_model is None or (isinstance(best_model, list) and len(best_model) == 0):
    raise ValueError("Nenhum modelo foi comparado ou retornado. Verifique as configurações.")

# Se best_model for uma lista, pegar o primeiro modelo
if isinstance(best_model, list):
    best_model = best_model[0]

# Salvar o melhor modelo
save_model(best_model, './models/best_model')

# Puxar os resultados
results = pull()

# Salvar os resultados em CSV
results.to_csv('./models/results.csv', index=False)

# Salvar os parâmetros do melhor modelo
with open('./models/best_model_params.json', 'w') as f:
    json.dump(best_model.get_params(), f)


Unnamed: 0,Description,Value
0,Session id,1245
1,Target,sex
2,Target type,Binary
3,Target mapping,"Female: 0, Male: 1"
4,Original data shape,"(333, 7)"
5,Transformed data shape,"(333, 11)"
6,Transformed train set shape,"(266, 11)"
7,Transformed test set shape,"(67, 11)"
8,Numeric features,4
9,Categorical features,2


Unnamed: 0,Model,Accuracy,AUC,Recall,Prec.,F1,Kappa,MCC,TT (Sec)
lr,Logistic Regression,0.9209,0.9797,0.9209,0.9275,0.9206,0.842,0.8484,0.254
ridge,Ridge Classifier,0.9171,0.9787,0.9171,0.9228,0.9168,0.8343,0.8399,0.011
lda,Linear Discriminant Analysis,0.9171,0.9781,0.9171,0.9228,0.9168,0.8343,0.8399,0.01
knn,K Neighbors Classifier,0.902,0.9586,0.902,0.9064,0.9018,0.804,0.8082,0.018
et,Extra Trees Classifier,0.902,0.9685,0.902,0.9094,0.9014,0.8038,0.8111,0.033
lightgbm,Light Gradient Boosting Machine,0.8835,0.9623,0.8835,0.8914,0.8829,0.767,0.7747,0.102
svm,SVM - Linear Kernel,0.883,0.9638,0.883,0.8954,0.8821,0.7666,0.7784,0.01
ada,Ada Boost Classifier,0.8795,0.9557,0.8795,0.8894,0.8789,0.7592,0.7688,0.02
rf,Random Forest Classifier,0.8722,0.9604,0.8722,0.8783,0.8719,0.7447,0.7505,0.033
gbc,Gradient Boosting Classifier,0.8644,0.9593,0.8644,0.8733,0.8637,0.7289,0.7375,0.025


Transformation Pipeline and Model Successfully Saved


In [2]:
pip install pycaret











Note: you may need to restart the kernel to use updated packages.
