In [None]:
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv("final_features.csv")
df.head()

In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import train_test_split
import numpy as np


def avg_and_std(scores):
    return np.mean(scores), np.std(scores)

# Função de avaliação sem K-Fold e PCA
def evaluate_model(model, df, metric_funcs, test_size=0.2):
    # Dividir os dados em features e target
    X = df.iloc[:, 2:].values  # Features
    y = df.iloc[:, 1].values   # Target
    
    # Dividir os dados em treino e teste
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=42)
    
    # Treinar o modelo
    model.fit(X_train, y_train)
    
    # Obter previsões
    y_pred = model.predict_proba(X_test)[:, 1]
    
    # Calcular e armazenar cada métrica
    scores = {}
    for metric_func in metric_funcs:
        score = metric_func(y_test, y_pred)
        scores[metric_func.__name__] = score
    
    return scores

In [None]:
best_auc = 0
best_params = {}

for crit in ["gini", "entropy", "log_loss"]:
    for n_est in range(25, 201, 25):
        for m_depth in range(5, 106, 10):
            for m_samples_leaf in range(5, 26, 5):
                params = {
                    'n_estimators': n_est,
                    'max_depth': m_depth,
                    'min_samples_leaf': m_samples_leaf,
                    'criterion': crit
                }
                
                rf_model = RandomForestClassifier(**params)
                
                score = evaluate_model(model=rf_model, df=df, metric_funcs=[roc_auc_score])
                auc = score["roc_auc_score"]
                
                if auc > best_auc:
                    best_auc = auc
                    best_params = params
                    print("New best parameter combination found!")
                    for parameter, value in best_params.items():
                        print(f"\t{parameter}: {value}")
                    print(f"Best AUC: {best_auc * 100:.6f}%\n")
                else:
                    print("Parameter combination tested:")
                    for parameter, value in params.items():
                        print(f"\t{parameter}: {value}")
                    print(f"AUC: {auc * 100:.6f}%\n")


In [None]:
print("Best parameter combination:")
for parameter, value in best_params.items():
    print(f"\t{parameter}: {value}")
print(f"Best Average AUC: {best_auc * 100:.2f}%")