In [48]:
import numpy as np
import pandas as pd
import tensorflow as tf

from sklearn.model_selection import train_test_split, KFold
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# Caminho para o arquivo xlsx local
file_path = './banco_dados.xlsx'
# Leitura do arquivo xlsx usando pandas
df = pd.read_excel(file_path, sheet_name='TDados')

In [None]:
import numpy as np
import pandas as pd
from scipy.stats import pearsonr, f_oneway, kruskal

def check_feature_representativeness(df, target_col):
    """
    Verifica, de forma estatística básica, a representatividade de cada feature
    em relação ao target (numérico).
    
    1. Identifica se a feature é numérica ou categórica.
    2. Para features numéricas: faz correlação (Pearson) com target.
    3. Para features categóricas: faz ANOVA ou Kruskal-Wallis (exemplo).
    """

    # Separar X (features) e y (target)
    X = df.drop(columns=[target_col])
    y = df[target_col]

    # Para armazenar resultados
    resultados_num = []
    resultados_cat = []

    # Loop em cada coluna de X
    for feature in X.columns:
        serie = X[feature]

        # Vamos arbitrar um critério simples: se número de valores únicos for
        # maior que, digamos, 20% do df, consideramos "numérica"
        # (Ajuste conforme a natureza dos seus dados)
        limiar_categ = int(0.2 * len(df))

        if serie.nunique() >= limiar_categ and pd.api.types.is_numeric_dtype(serie):
            # Trata como numérica
            # Calcula correlação de Pearson (r) e p-value
            r, p_value = pearsonr(serie, y)
            resultados_num.append((feature, r, p_value))

        else:
            # Trata como categórica
            # Precisamos agrupar os valores de y por cada categoria
            categorias = serie.unique()
            grupos = []
            for cat in categorias:
                grupos.append(y[serie == cat])

            # Exemplo: ANOVA (F-oneway) se assumirmos proximidade com normalidade
            # ou Kruskal-Wallis se preferir não assumir.
            # Aqui, vamos exemplificar com ANOVA:
            if len(grupos) > 1:
                f_stat, p_value = f_oneway(*grupos)
                resultados_cat.append((feature, f_stat, p_value, categorias))
            # Se len(grupos) == 1, é uma feature praticamente "constante"; pouco útil.

    # Imprimindo resultados
    print("=== Análise de Features Numéricas ===")
    print("Feature         | Pearson_r  | p-value")
    for (feat, r, p) in sorted(resultados_num, key=lambda x: abs(x[1]), reverse=True):
        print(f"{feat:<15} | {r:>9.4f} | {p:>9.4g}")

    print("\n=== Análise de Features Categóricas ===")
    print("Feature         | Estatística (F) | p-value | Categorias")
    for (feat, f_stat, p, cats) in sorted(resultados_cat, key=lambda x: x[1], reverse=True):
        print(f"{feat:<15} | {f_stat:>16.4f} | {p:>8.4g} | {len(cats)} cat(s)")

    # Chamar a função para checar representatividade
    check_feature_representativeness(df, target_col='Resp_13')


In [47]:
#############################################################################
# 1. CARREGAMENTO, LIMPEZA (FILTRAGEM Y != 0) E POSSÍVEL TRATAMENTO DE OUTLIERS
##############################################################################
# 1.1. Filtrar linhas onde Resp_13 seja diferente de 0
df = df[df['Resp_13'] != 0]
# Exemplo: df = pd.read_csv("seu_arquivo.csv")
# Ajuste de acordo com seu caso real
# ---------------------------------------------------------------
# 1.2. Exemplo adicional: Remover outliers de y (Resp_13) acima de algum quantil, 
#     por exemplo, 99º percentil. Ajuste se fizer sentido para o seu caso.
upper_quantile = df['Resp_13'].quantile(0.99)
df = df[df['Resp_13'] < upper_quantile]

print(f"Tamanho do DF após filtrar y != 0 e outliers: {len(df)}")

# Separação em X e y
X = df.drop(columns=['Resp_13'])
y = df['Resp_13']

print("Dimensão de X:", X.shape)
print("Dimensão de y:", y.shape)

##############################################################################
# 2. TREINO/TESTE SPLIT E ESCALONAMENTO
##############################################################################
X_train_full, X_test, y_train_full, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42
)

scaler = StandardScaler()
X_train_full = scaler.fit_transform(X_train_full)
X_test = scaler.transform(X_test)

# Baseline: prever média
baseline_pred = np.full_like(y_test, fill_value=np.mean(y_train_full))
baseline_mse = mean_squared_error(y_test, baseline_pred)
baseline_mae = mean_absolute_error(y_test, baseline_pred)
baseline_r2  = r2_score(y_test, baseline_pred)

print("\n[BASELINE]")
print(f"MSE = {baseline_mse:.4f}")
print(f"MAE = {baseline_mae:.4f}")
print(f"R²  = {baseline_r2:.4f}\n")

##############################################################################
# 3. CONSTRUÇÃO DO MODELO (POSSÍVEIS 4 CAMADAS DENSAS) + ATIVAÇÕES
##############################################################################
def build_model(
    # Camadas
    units1=64, units2=32, units3=16, units4=8,
    activation='relu',  # 'relu', 'leaky', 'selu'
    # Hiperparâmetros de otimização
    learning_rate=1e-3,
    # Regularização
    dropout_rate=0.0,
    l2_reg=0.0,
    use_batchnorm=False
):
    """
    Rede neural com até 4 camadas densas (algumas podem ser pequenas).
    Pode usar ReLU, LeakyReLU ou SELU. Inclui dropout, batchnorm, L2.
    """
    regularizer = tf.keras.regularizers.l2(l2_reg) if l2_reg > 0 else None
    
    # Função auxiliar para ativação
    def activation_layer(x, act):
        if act == 'leaky':
            return tf.keras.layers.LeakyReLU(alpha=0.01)(x)
        elif act == 'selu':
            return tf.keras.layers.Activation('selu')(x)
        else:
            return tf.keras.layers.ReLU()(x)
    
    inputs = tf.keras.Input(shape=(X_train_full.shape[1],))

    # 1ª camada densa
    x = tf.keras.layers.Dense(
        units1, 
        kernel_regularizer=regularizer,
        kernel_initializer='he_normal'
    )(inputs)
    x = activation_layer(x, activation)
    if use_batchnorm:
        x = tf.keras.layers.BatchNormalization()(x)
    if dropout_rate > 0:
        x = tf.keras.layers.Dropout(dropout_rate)(x)

    # 2ª camada densa
    x = tf.keras.layers.Dense(
        units2,
        kernel_regularizer=regularizer,
        kernel_initializer='he_normal'
    )(x)
    x = activation_layer(x, activation)
    if use_batchnorm:
        x = tf.keras.layers.BatchNormalization()(x)
    if dropout_rate > 0:
        x = tf.keras.layers.Dropout(dropout_rate)(x)
    
    # 3ª camada densa
    x = tf.keras.layers.Dense(
        units3,
        kernel_regularizer=regularizer,
        kernel_initializer='he_normal'
    )(x)
    x = activation_layer(x, activation)
    if use_batchnorm:
        x = tf.keras.layers.BatchNormalization()(x)
    if dropout_rate > 0:
        x = tf.keras.layers.Dropout(dropout_rate)(x)

    # 4ª camada densa
    x = tf.keras.layers.Dense(
        units4,
        kernel_regularizer=regularizer,
        kernel_initializer='he_normal'
    )(x)
    x = activation_layer(x, activation)
    if use_batchnorm:
        x = tf.keras.layers.BatchNormalization()(x)
    if dropout_rate > 0:
        x = tf.keras.layers.Dropout(dropout_rate)(x)

    # Saída
    outputs = tf.keras.layers.Dense(1)(x)
    
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    optimizer = tf.keras.optimizers.Adam(learning_rate=learning_rate)
    
    model.compile(
        optimizer=optimizer,
        loss='mse',
        metrics=['mae']
    )
    
    return model

##############################################################################
# 4. FUNÇÃO DE K-FOLD CROSS-VALIDATION (COM CALLBACKS)
##############################################################################
def evaluate_model_kfold(
    model_fn,
    X_data,
    y_data,
    epochs=100,
    batch_size=32,
    k=10,            
    patience=10
):
    """
    Executa K-fold cross-validation para o modelo definido por model_fn.
    Retorna a média do menor val_loss (MSE) em cada fold.
    """
    kf = KFold(n_splits=k, shuffle=True, random_state=42)
    val_losses = []
    y_data = np.array(y_data)

    for train_index, val_index in kf.split(X_data):
        X_tr, X_val = X_data[train_index], X_data[val_index]
        y_tr, y_val = y_data[train_index], y_data[val_index]

        model = model_fn()
        
        # Callbacks
        early_stop = tf.keras.callbacks.EarlyStopping(
            monitor='val_loss',
            patience=patience,
            restore_best_weights=True
        )
        reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(
            monitor='val_loss',
            factor=0.5,
            patience=5,
            min_lr=1e-7,
            verbose=0
        )

        history = model.fit(
            X_tr, y_tr,
            validation_data=(X_val, y_val),
            epochs=epochs,
            batch_size=batch_size,
            callbacks=[early_stop, reduce_lr],
            verbose=0
        )
        
        min_val_loss = min(history.history['val_loss'])
        val_losses.append(min_val_loss)
    
    return np.mean(val_losses)

##############################################################################
# 5. GRID DE HIPERPARÂMETROS COMBINANDO VARIAÇÕES (maior)
##############################################################################
param_grid = [
    # 1) Rede "menor"
    {
        'units1': 32, 'units2': 16, 'units3': 8, 'units4': 4,
        'activation': 'relu',
        'learning_rate': 1e-3,
        'dropout_rate': 0.0,
        'l2_reg': 0.0,
        'use_batchnorm': False
    },
    # 2) Rede "média"
    {
        'units1': 64, 'units2': 32, 'units3': 16, 'units4': 8,
        'activation': 'leaky',
        'learning_rate': 1e-3,
        'dropout_rate': 0.2,
        'l2_reg': 1e-4,
        'use_batchnorm': True
    },
    # 3) Rede "grande", SELU
    {
        'units1': 128,'units2': 64,'units3': 32, 'units4': 16,
        'activation': 'selu',
        'learning_rate': 1e-3,
        'dropout_rate': 0.3,
        'l2_reg': 1e-5,
        'use_batchnorm': True
    },
    # 4) Rede "grande", ReLU, LR menor
    {
        'units1': 128,'units2': 64,'units3': 32, 'units4': 16,
        'activation': 'relu',
        'learning_rate': 1e-4,
        'dropout_rate': 0.0,
        'l2_reg': 1e-4,
        'use_batchnorm': True
    },
    # 5) Rede média, variação de dropout e L2
    {
        'units1': 64,'units2': 32,'units3': 16, 'units4': 8,
        'activation': 'relu',
        'learning_rate': 1e-3,
        'dropout_rate': 0.2,
        'l2_reg': 1e-3,
        'use_batchnorm': False
    },
    # 6) Rede média + SELU, LR=1e-4
    {
        'units1': 64, 'units2': 32, 'units3': 16, 'units4': 8,
        'activation': 'selu',
        'learning_rate': 1e-4,
        'dropout_rate': 0.2,
        'l2_reg': 1e-3,
        'use_batchnorm': True
    },
    # 7) Rede média + Leaky, sem dropout, LR=1e-4
    {
        'units1': 64, 'units2': 32, 'units3': 16, 'units4': 8,
        'activation': 'leaky',
        'learning_rate': 1e-4,
        'dropout_rate': 0.0,
        'l2_reg': 0.0,
        'use_batchnorm': True
    },
    # 8) Rede grande + batchnorm, dropout 0.1, LR=1e-3, L2=1e-4
    {
        'units1': 128, 'units2': 64, 'units3': 32, 'units4': 16,
        'activation': 'relu',
        'learning_rate': 1e-3,
        'dropout_rate': 0.1,
        'l2_reg': 1e-4,
        'use_batchnorm': True
    }
    # Você pode adicionar mais combinações, inclusive variar batch_size
    # e epochs no evaluate_model_kfold, mas isso pode aumentar muito o tempo.
]

best_val_loss = float('inf')
best_params = None

print("[INÍCIO DA BUSCA DE HIPERPARÂMETROS COM K-FOLD (10 folds)]")

##############################################################################
# 6. RODAR A VALIDAÇÃO CRUZADA PARA CADA COMBINAÇÃO
##############################################################################
for params in param_grid:
    print(f"\nTestando hiperparâmetros: {params}")
    
    def model_fn():
        return build_model(**params)
    
    avg_val_loss = evaluate_model_kfold(
        model_fn=model_fn,
        X_data=X_train_full,
        y_data=y_train_full,
        epochs=200,        # Aumentamos para 200 épocas
        batch_size=16,     # Reduzindo batch_size (às vezes ajuda em dados pequenos)
        k=10,              # 10 folds
        patience=10        # maior paciência no early stopping
    )
    
    print(f"Média do val_loss (MSE) nos folds = {avg_val_loss:.4f}")
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        best_params = params
        print(f" -> Novo melhor modelo! val_loss: {best_val_loss:.4f}")

print(f"\nMelhor configuração encontrada: {best_params}")
print(f"Melhor val_loss médio (MSE): {best_val_loss:.4f}")

##############################################################################
# 7. TREINO FINAL COM OS MELHORES PARÂMETROS ENCONTRADOS
##############################################################################
final_model = build_model(**best_params)

# Callbacks para o treino final
early_stop_final = tf.keras.callbacks.EarlyStopping(
    monitor='loss',
    patience=15,
    restore_best_weights=True
)
reduce_lr_final = tf.keras.callbacks.ReduceLROnPlateau(
    monitor='loss',
    factor=0.5,
    patience=5,
    min_lr=1e-7,
    verbose=1
)

history = final_model.fit(
    X_train_full,
    y_train_full,
    epochs=300,           # mais épocas
    batch_size=16,
    callbacks=[early_stop_final, reduce_lr_final],
    verbose=1
)

##############################################################################
# 8. AVALIAÇÃO NO TESTE
##############################################################################
y_pred_test = final_model.predict(X_test)

mse_test = mean_squared_error(y_test, y_pred_test)
mae_test = mean_absolute_error(y_test, y_pred_test)
r2_test  = r2_score(y_test, y_pred_test)

print("\n[RESULTADOS NO CONJUNTO DE TESTE]")
print(f"MSE  = {mse_test:.4f}")
print(f"MAE  = {mae_test:.4f}")
print(f"R²   = {r2_test:.4f}")

Tamanho do DF após filtrar y != 0 e outliers: 22
Dimensão de X: (22, 310)
Dimensão de y: (22,)

[BASELINE]
MSE = 3.5772
MAE = 1.3444
R²  = -0.2713

[INÍCIO DA BUSCA DE HIPERPARÂMETROS COM K-FOLD (10 folds)]

Testando hiperparâmetros: {'units1': 32, 'units2': 16, 'units3': 8, 'units4': 4, 'activation': 'relu', 'learning_rate': 0.001, 'dropout_rate': 0.0, 'l2_reg': 0.0, 'use_batchnorm': False}
Média do val_loss (MSE) nos folds = 13.4486
 -> Novo melhor modelo! val_loss: 13.4486

Testando hiperparâmetros: {'units1': 64, 'units2': 32, 'units3': 16, 'units4': 8, 'activation': 'leaky', 'learning_rate': 0.001, 'dropout_rate': 0.2, 'l2_reg': 0.0001, 'use_batchnorm': True}




Média do val_loss (MSE) nos folds = 18.2925

Testando hiperparâmetros: {'units1': 128, 'units2': 64, 'units3': 32, 'units4': 16, 'activation': 'selu', 'learning_rate': 0.001, 'dropout_rate': 0.3, 'l2_reg': 1e-05, 'use_batchnorm': True}
Média do val_loss (MSE) nos folds = 16.4376

Testando hiperparâmetros: {'units1': 128, 'units2': 64, 'units3': 32, 'units4': 16, 'activation': 'relu', 'learning_rate': 0.0001, 'dropout_rate': 0.0, 'l2_reg': 0.0001, 'use_batchnorm': True}
Média do val_loss (MSE) nos folds = 13.7436

Testando hiperparâmetros: {'units1': 64, 'units2': 32, 'units3': 16, 'units4': 8, 'activation': 'relu', 'learning_rate': 0.001, 'dropout_rate': 0.2, 'l2_reg': 0.001, 'use_batchnorm': False}
Média do val_loss (MSE) nos folds = 9.6484
 -> Novo melhor modelo! val_loss: 9.6484

Testando hiperparâmetros: {'units1': 64, 'units2': 32, 'units3': 16, 'units4': 8, 'activation': 'selu', 'learning_rate': 0.0001, 'dropout_rate': 0.2, 'l2_reg': 0.001, 'use_batchnorm': True}
Média do val_los

KeyboardInterrupt: 