# Libraries

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# FUNÇÕES DE AJUDA

# Identificar Var Numéricas e Categóricas

In [4]:
def get_feature_types(df, target_col='Tipo de Ataque'):
    '''
    Identifica Categorias Numéricas e Categóricas
    '''
    numeric_features = df.select_dtypes(include=[np.number]).columns.tolist()
    categorical_features = df.select_dtypes(exclude=[np.number]).columns.tolist()

    # Remove Variável Alvo
    if target_col in numeric_features:
        numeric_features.remove(target_col)
    if target_col in categorical_features:
        categorical_features.remove(target_col)

    return numeric_features, categorical_features

# Analizar Correlação Entre Features Numéricas

In [5]:
def correlation_analysis(df, numeric_features, threshold=0.85):
    '''
    Analiza Correlação entre Features Numéricas

    Threshold 0.85 para procurar correlações fortes
    '''

    corr_matrix = df[numeric_features].corr()

    plt.figure(figsize=(20, 20))
    sns.heatmap(corr_matrix, annot=False, cmap='coolwarm', center=0, linewidth= 0.5)

    plt.title('Correlação entre Features - Heatmap')
    plt.xticks(rotation=90)
    plt.yticks(rotation=0)
    plt.show()

    threshold = threshold
    high_corr = np.where(np.abs(corr_matrix) > threshold)
    high_corr = [(corr_matrix.index[x], corr_matrix.columns[y], corr_matrix.iloc[x, y]) for x, y in zip(*high_corr) if x != y and x < y]
    
    return high_corr


# Análise de Homogeneidade da Variância (Teste de Levene's)

In [10]:
def analyze_variance_homogen(df, numeric_features, target_col='Tipo de Ataque'):
    '''
    Análise de Variância Homogénea
    '''

    results_levene = {}

    for feature in numeric_features:
        groups = [group[feature].dropna().values for name, group in df.groupby(target_col) if not group[feature].dropna().empty] 

        groups = [group for group in groups if len(group) > 0 and np.any(group != 0) and np.var(group) > 0]

        if len(groups) < 2:
            print(f"Não existem grupos válidos para fazer o teste de Levene para a feature: {feature}")
            continue

        stat_levene, p_value_levene = stats.levene(*groups)  
        results_levene[feature] = {'Statistic': stat_levene, 'p-value': p_value_levene}  

    return results_levene

