In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import os


def fft_analysis(signal):
    slope, intercept = np.polyfit(np.arange(len(signal)), signal, 1)
    trend = np.arange(len(signal)) * slope + intercept
    detrended = signal - trend
    fft_values = np.fft.fft(detrended)
    frequencies = np.fft.fftfreq(len(fft_values))
    # Remove negative frequencies and sort
    positive_frequencies = frequencies[frequencies > 0]
    magnitudes = np.abs(fft_values)[frequencies > 0]
    # Identify dominant frequency
    dominant_frequency = positive_frequencies[np.argmax(magnitudes)]
    #print(f"Dominant Frequency: {dominant_frequency:.3f}")
    # Convert frequency to period (e.g., days, weeks, months, etc.)
    dominant_period = 1 / dominant_frequency
    #print(f"Dominant Period: {dominant_period:.2f} time units")
    return dominant_frequency


def process_file(file_path):
    data = pd.read_csv(file_path)

    data.columns = [
        "tachometer",
        "acc_under_axial",
        "acc_under_radiale",
        "acc_under_tangencial",
        "acc_over_axial",
        "acc_over_radiale",
        "acc_over_tangencial",
        "microphone",
    ]

    statistics_dict = {}

    for col in data.columns:
        statistics_dict[f'{col}_mean'] = data[col].mean()
        statistics_dict[f'{col}_median'] = data[col].median()
        statistics_dict[f'{col}_std'] = data[col].std()
        statistics_dict[f'{col}_variance'] = data[col].var()
        statistics_dict[f'{col}_range'] = data[col].max() - data[col].min()
        statistics_dict[f'{col}_skewness'] = data[col].skew()
        statistics_dict[f'{col}_kurtosis'] = data[col].kurt()
        statistics_dict[f'{col}_dominant_frequency'] = fft_analysis(data[col])

    label = os.path.normpath(file_path).split(os.sep)[1]
    statistics_dict['label'] = label
    return statistics_dict


def process_directory(root_dir):
    all_statistics = []

    for subdir, _, files in os.walk(root_dir):
        for file in files:
            if file.endswith(".csv"):
                file_path = os.path.join(subdir, file)
                # print(f"Procesando archivo: {file_path}")
                try:
                    stats = process_file(file_path)
                    all_statistics.append(stats)
                except Exception as e:
                    print(f"Error procesando {file_path}: {e}")

    combined_df = pd.DataFrame(all_statistics)
    return combined_df


root_dir = '../bearing_fault_detection'
output_csv_path = 'combined_statistics_full_sobregrupos.csv'

combined_df = process_directory(root_dir)
combined_df.to_csv(output_csv_path, index=False)

print(f"Archivo grande combinado guardado en: {output_csv_path}")