# Time Data Analyser

- Notebook que faz a analise dos resultados das features extraidas do microcontrolador e geras os graficos para serem utilizados no relatorio do TG

+ Ha **15** colunas no arquivo csv dos dados das features:
    1. **`RMS`:** 
    2. **`Variance`:** 
    3. **`Skewness`:** 
    4. **`Kurtosis`:** 
    5. **`CrestFactor`:** 
    6. **`ShapeFactor`:** 
    7. **`ImpulseFactor`:** 
    8. **`MarginFactor`:** 
    9. **`Peak1`:** 
    10. **`Peak2`:** 
    11. **`Peak3`:** 
    12. **`PeakLocs1`:** 
    13. **`PeakLocs2`:** 
    14. **`PeakLocs3`:** 
    14. **`FAULT_ID`:** 

+ Os dados foram extraidos utilizando os modelos: **_Decision Tree_**, **_Extra Tree_**, **_Gaussian Naive Bayess_** e **_Random Forest_**
    - Alem disso, para cada modelo foi feito a captura dos dados de tempo para cada um dos arquivos de audio: _Off Condition with noise_, _Healthy condition_, _Bearing fault (F1)_, _Fan fault (F2)_ e _Gear fault (F3)_.

In [None]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, confusion_matrix
from collections import deque

## Carregando os dados

In [None]:
models = {
    'DecisionTree': {
        'FAULT_ID_OFF': '../data/extracted_features/uControlador/decision_tree/features/DT-F-OFF.CSV',
        'FAULT_ID_HEALTH': '../data/extracted_features/uControlador/decision_tree/features/DT-F-HTH.CSV',
        'FAULT_ID_BEARING': '../data/extracted_features/uControlador/decision_tree/features/DT-F-F1.CSV',
        'FAULT_ID_FAN': '../data/extracted_features/uControlador/decision_tree/features/DT-F-F2.CSV',
        'FAULT_ID_GEAR': '../data/extracted_features/uControlador/decision_tree/features/DT-F-F3.CSV',
    },
    'ExtraTree': {
        'FAULT_ID_OFF': '../data/extracted_features/uControlador/extra_trees/features/ET-F-OFF.CSV',
        'FAULT_ID_HEALTH': '../data/extracted_features/uControlador/extra_trees/features/ET-F-HTH.CSV',
        'FAULT_ID_BEARING': '../data/extracted_features/uControlador/extra_trees/features/ET-F-F1.CSV',
        'FAULT_ID_FAN': '../data/extracted_features/uControlador/extra_trees/features/ET-F-F2.CSV',
        'FAULT_ID_GEAR': '../data/extracted_features/uControlador/extra_trees/features/ET-F-F3.CSV',
    },
    'GaussianNaiveBayes': {
        'FAULT_ID_OFF': '../data/extracted_features/uControlador/gaussian_naive_bayes/features/NB-F-OFF.CSV',
        'FAULT_ID_HEALTH': '../data/extracted_features/uControlador/gaussian_naive_bayes/features/NB-F-HTH.CSV',
        'FAULT_ID_BEARING': '../data/extracted_features/uControlador/gaussian_naive_bayes/features/NB-F-F1.CSV',
        'FAULT_ID_FAN': '../data/extracted_features/uControlador/gaussian_naive_bayes/features/NB-F-F2.CSV',
        'FAULT_ID_GEAR': '../data/extracted_features/uControlador/gaussian_naive_bayes/features/NB-F-F3.CSV',
    },
    'RandomForest': {
        'FAULT_ID_OFF': '../data/extracted_features/uControlador/random_forest/features/RF-F-OFF.CSV',
        'FAULT_ID_HEALTH': '../data/extracted_features/uControlador/random_forest/features/RF-F-HTH.CSV',
        'FAULT_ID_BEARING': '../data/extracted_features/uControlador/random_forest/features/RF-F-F1.CSV',
        'FAULT_ID_FAN': '../data/extracted_features/uControlador/random_forest/features/RF-F-F2.CSV',
        'FAULT_ID_GEAR': '../data/extracted_features/uControlador/random_forest/features/RF-F-F3.CSV',
    },
}

CONDITION_MAPPING = {
    'FAULT_ID_OFF': 1,
    'FAULT_ID_HEALTH': 2,
    'FAULT_ID_BEARING': 3,
    'FAULT_ID_FAN': 4,
    'FAULT_ID_GEAR': 5
}


# SOUND_FILES = [
#     '../data/audio_files/off.wav',      # Off Condition with noise ((Fault_ID1))
#     '../data/audio_files/health.wav',   # Healthy condition (Fault_ID2)
#     '../data/audio_files/f1.wav',       # Bearing fault (Fault_ID3)
#     '../data/audio_files/f2.wav',       # Fan fault (Fault_ID4)
#     '../data/audio_files/f3.wav'        # Gear fault (Fault_ID5)
# ]

CSV_COLUMNS_NAMES=['RMS','Variance','Skewness','Kurtosis','CrestFactor','ShapeFactor','ImpulseFactor','MarginFactor','Peak1','Peak2','Peak3','PeakLocs1','PeakLocs2','PeakLocs3','FAULT_ID']

AUDIO_SAMPLE_RATE = 48000   # 48kHz, do arquivo de audio .wav
INPUT_BUFFER_SIZE = 2048    # Tamanho do buffer usado

# Nomes das classes
FAULT_LABELS  = ['Off Condition with noise', 'Healthy', 'Bearing fault', 'Gear fault', 'Fan fault']
window_size = 20  # Tamanho da janela igual ao usado no artigo

## Processando os dados

In [None]:
def preprocess_csv(file_path, condition):
    """
    Pre-processa o CSV para ajustar formato e adicionar Fault_ID
    
    :param file_path: Caminho do arquivo CSV
    :param condition: Condição correspondente ao arquivo
    :return: DataFrame pré-processado
    """
    df = pd.read_csv(file_path)
    
    # Renomear coluna FAULT_ID para Prediction
    if 'FAULT_ID' in df.columns:
        df.rename(columns={'FAULT_ID': 'Prediction'}, inplace=True)
    
    # Adicionar coluna Fault_ID baseado na condição
    df['Fault_ID'] = CONDITION_MAPPING.get(condition, -1)
    
    # Verificar mapeamento correto
    if df['Fault_ID'].iloc[0] == -1:
        raise ValueError(f"Condição {condition} não encontrada no mapeamento")
    
    return df

In [None]:
def apply_ecdf(predictions, window_size=20):
    """
    Aplica Empirical Cumulative Distribution Function em uma janela deslizante
    para estabilizar as previsões usando a moda das ultimas 'window_size' amostras
    
    :param predictions: Array de previsões brutas
    :param window_size: Tamanho da janela para cálculo da moda
    :return: Array de previsões estabilizadas
    """
    stabilized = []
    window = deque(maxlen=window_size)
    
    for pred in predictions:
        window.append(pred)
        values, counts = np.unique(list(window), return_counts=True)
        stabilized.append(values[np.argmax(counts)])
    
    return np.array(stabilized)

In [None]:
def process_model_results(model_name, csv_files, fault_labels, window_size=20):
    """
    Processa resultados com pre-processamento e ECDF
    
    :param model_name: Nome do modelo para título dos gráficos
    :param csv_files: Dicionário com {condição: caminho_arquivo}
    :param fault_labels: Lista com nomes das classes/falhas
    :param window_size: Tamanho da janela para ECDF
    """
    all_true = []
    all_pred_raw = []
    
    # Processar cada arquivo
    for condition, file_path in csv_files.items():
        try:
            df = preprocess_csv(file_path, condition)
            all_true.extend(df['Fault_ID'].values)
            all_pred_raw.extend(df['Prediction'].values)
        except Exception as e:
            print(f"Erro em {file_path}: {str(e)}")
            continue
    
    # Aplicar ECDF
    all_pred = apply_ecdf(all_pred_raw, window_size)
    
    # Calcular métricas
    accuracy = accuracy_score(all_true, all_pred)
    cm = confusion_matrix(all_true, all_pred, normalize='true')
    
    # Plotar matriz
    plt.figure(figsize=(12, 8))
    sns.heatmap(cm, annot=True, fmt=".2f", cmap='Blues',
                xticklabels=fault_labels, 
                yticklabels=fault_labels)
    plt.title(f'{model_name} - Acurácia: {accuracy:.2%}\nJanela ECDF={window_size}')
    plt.ylabel('Verdadeiro')
    plt.xlabel('Predito')
    plt.savefig(f'../data/{model_name}_confusion_matrix.png')
    plt.close()
    
    return accuracy

In [None]:
# Processar todos os modelos --------------------------------------------------
results = {}
for model_name, csv_files in models.items():
    try:
        acc = process_model_results(
            model_name,
            csv_files,
            FAULT_LABELS,
            window_size=100
        )
        results[model_name] = acc
    except Exception as e:
        print(f"Falha no modelo {model_name}: {str(e)}")

## Graficos

In [None]:
# Exibir resultados
print("\nResultados Finais:")
for model, acc in results.items():
    print(f"{model}: {acc:.2%}")

# Salvar relatório
pd.DataFrame.from_dict(results, orient='index', columns=['Acurácia']).to_csv('../data/relatorio_final.csv')