# 2. Concatenar CSVs

En este notebook se procede a la concatenación de csvs procesados en el notebook '1_generar_estadisticos'.

Se prueban tres tipos de concatenación
* **2.1 Sin arreglo**:  Calcula las estadísticas para el DataFrame y devuelve uno nuevo con los resultados.
Tras realizar la concatenación observamos que al realizar el walk, en el caso de 'normal', 'overhang' y 'underhang', al tener una estructura distinta a los demás fallos la asignacion de los valores en las columnas era incorracta. Por lo tanto en el 2.2 se procede al arreglo.

* **2.2 Con arreglo**: Recorre la estructura de directorios, procesa los CSVs y asigna columnas personalizadas,
    manejando casos específicos como carpetas 'normal', 'overhang' y 'underhang'. En un primer lugar decidimos asignar como tipo a las subcarpetas (ej: underhang_ball_fault). Pero finalmente tomamos la decision reducir el numero de clases a seis ya que las subclases eran práctimente iguales y dificiles de diferenciar(Realizado en el 2.3).

* **2.3 quitando las subacarpetas**:Recorre la estructura de directorios, procesa los CSVs y asigna columnas personalizadas,
    manejando casos específicos como carpetas 'normal', 'overhang' y 'underhang'. En el caso se 'normal' en la medida la no tener tomamos la decision de en esa columna poner 0, y en eel caso de 'overhang' y 'underhang' hemos prescindido de los subtipos.

## 2.1 Sin arreglo en normal y en over/underhang

In [4]:
import os
import pandas as pd
import numpy as np

def load_and_preprocess_csv(csv_path, segment_length=500):
    """
    Carga un archivo CSV, inserta una fila vacía y utiliza automáticamente la primera fila del archivo.
    """
    df = pd.read_csv(csv_path)
    
    # Guardar la primera fila del archivo CSV antes de insertar la fila vacía
    first_row_values = df.iloc[0].tolist()
    
    # Insertar una fila vacía al principio
    df.loc[-1] = [np.nan] * len(df.columns)  # Crear una fila vacía
    df.index = df.index + 1  # Desplazar los índices hacia abajo
    df = df.sort_index()  # Ordenar el índice para que la fila vacía quede en la parte superior

    # Asignar los valores originales de la primera fila a la nueva primera fila
    df.iloc[0] = first_row_values

    # Definir los nombres de las columnas si es necesario
    column_names = ['S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8']
    df.columns = column_names[:len(df.columns)]  # Asignar nombres de columnas dinámicamente
    
    # Dividir el DataFrame en segmentos
    num_segments = len(df) // segment_length  # Número total de segmentos completos
    segments = [df.iloc[i * segment_length: (i + 1) * segment_length] for i in range(num_segments)]
    
    return segments

def calculate_statistics(df):
   
    stats = {}
    
    # Iterar por cada columna y calcular las estadísticas
    for column in df.columns:
        stats[f'{column}_mean'] = df[column].mean()
        stats[f'{column}_median'] = df[column].median()
        stats[f'{column}_std'] = df[column].std()
        stats[f'{column}_var'] = df[column].var()
        stats[f'{column}_cv'] = df[column].std() / df[column].mean()
        stats[f'{column}_min'] = df[column].min()
        stats[f'{column}_max'] = df[column].max()
        stats[f'{column}_IQR'] = df[column].quantile(0.75) - df[column].quantile(0.25)

    # Convertir el diccionario en un DataFrame de una sola fila
    return pd.DataFrame(stats, index=[0])

def add_custom_columns(df, col_names, values):
    """
    Añade columnas personalizadas al DataFrame.
    """
    for col_name, value in zip(col_names, values):
        df[col_name] = value
    return df

def process_csvs_in_directory(base_dir, segment_length=500):
    """
    Recorre la estructura de directorios, procesa los CSVs y asigna columnas personalizadas.
    """
    processed_data = []

    for root, dirs, files in os.walk(base_dir):
        # Identificar archivos CSV
        csv_files = [f for f in files if f.endswith('.csv')]
        
        for csv_file in csv_files:
            csv_path = os.path.join(root, csv_file)
            
            # Extraer los valores de las columnas personalizadas del nombre de archivo y ruta
            file_name = os.path.splitext(csv_file)[0]
            folder_name = os.path.basename(root)
            parent_folder = os.path.basename(os.path.dirname(root))
            
            custom_column_names = ['Hz', 'medida', 'Tipo']
            custom_values = [file_name, folder_name, parent_folder]
            
            # Procesar el archivo CSV
            segments = load_and_preprocess_csv(csv_path, segment_length)
            
            for segment in segments:
                # Calcular estadísticas
                stats_df = calculate_statistics(segment)
                # Añadir columnas personalizadas
                stats_df = add_custom_columns(stats_df, custom_column_names, custom_values)
                # Almacenar el DataFrame procesado
                processed_data.append(stats_df)

    # Concatenar todos los DataFrames en uno solo
    if processed_data:
        return pd.concat(processed_data, ignore_index=True)
    else:
        return pd.DataFrame()  # Retornar un DataFrame vacío si no hay datos procesados



In [3]:
#USO
base_dir = '../bearing_fault'

# Procesar todos los CSVs en la estructura de directorios y concatenar los resultados
final_df = process_csvs_in_directory(base_dir, segment_length=500)

# Guardar el DataFrame final en un archivo CSV
final_df.to_csv('processed_data.csv', index=False)

final_df


Unnamed: 0,S1_mean,S1_median,S1_std,S1_var,S1_cv,S1_min,S1_max,S1_IQR,S2_mean,S2_median,...,S8_median,S8_std,S8_var,S8_cv,S8_min,S8_max,S8_IQR,Hz,medida,Tipo
0,0.286609,-0.454150,1.760644,3.099869,6.143024,-0.74812,4.80120,0.186630,0.021666,0.031395,...,-0.023922,0.080811,0.006530,-7.557894,-0.15725,0.34543,0.118908,12.288,0.5mm,horizontal-misalignment
1,3.455610,4.518150,2.094236,4.385825,0.606040,-0.64089,4.63560,0.092075,-0.477666,-0.431530,...,0.025264,0.115949,0.013444,3.698598,-0.16506,0.34609,0.206634,12.288,0.5mm,horizontal-misalignment
2,-0.563440,-0.608240,0.089544,0.008018,-0.158924,-0.71058,-0.18403,0.090285,-0.399980,-0.359175,...,-0.002870,0.105225,0.011072,12.542015,-0.28833,0.30622,0.164061,12.288,0.5mm,horizontal-misalignment
3,-0.617669,-0.588875,0.123700,0.015302,-0.200269,-0.96866,-0.17164,0.099337,-0.807317,-0.667040,...,0.042654,0.208275,0.043378,2.588939,-0.26358,0.76796,0.301667,12.288,0.5mm,horizontal-misalignment
4,-0.589237,-0.580280,0.109047,0.011891,-0.185065,-0.93480,-0.26924,0.095765,-0.660323,-0.560835,...,0.004480,0.197578,0.039037,3.445324,-0.27998,0.62127,0.297905,12.288,0.5mm,horizontal-misalignment
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83995,-0.482294,-0.731295,1.032127,1.065287,-2.140036,-1.18790,4.70840,0.119138,-0.062058,-0.026253,...,-0.006095,0.210738,0.044410,9.968423,-0.36460,0.59416,0.346293,61.0304,1.90mm,vertical-misalignment
83996,0.497346,-0.703255,2.181805,4.760273,4.386897,-1.15060,4.76810,0.392220,0.111686,0.086110,...,-0.006776,0.173011,0.029933,-224.655849,-0.30021,0.55831,0.302690,61.0304,1.90mm,vertical-misalignment
83997,-0.766249,-0.741715,0.095152,0.009054,-0.124179,-1.16010,-0.48600,0.083067,0.045768,0.099299,...,-0.008565,0.210502,0.044311,14.491912,-0.37432,0.71113,0.332762,61.0304,1.90mm,vertical-misalignment
83998,0.540872,-0.649505,2.154793,4.643134,3.983921,-1.16870,4.77620,0.351330,-0.093285,-0.065109,...,0.009104,0.210349,0.044247,10.582702,-0.37507,0.64408,0.354055,61.0304,1.90mm,vertical-misalignment


In [14]:
ver = final_df[final_df.Tipo == 'imbalance']
ver

Unnamed: 0,S1_mean,S1_median,S1_std,S1_var,S1_cv,S1_min,S1_max,S1_IQR,S2_mean,S2_median,...,S8_median,S8_std,S8_var,S8_cv,S8_min,S8_max,S8_IQR,Hz,medida,Tipo
8000,3.423437,4.519850,2.120827,4.497906,0.619502,-0.74536,4.625200,0.093350,-0.155627,-0.166530,...,0.006987,0.118429,0.014025,5.234659,-0.17628,0.31484,0.205524,13.9264,10g,imbalance
8001,-0.557464,-0.604010,0.090670,0.008221,-0.162647,-0.72114,-0.200080,0.087430,-0.119600,-0.016183,...,0.004699,0.102789,0.010566,11.132329,-0.16778,0.31710,0.179383,13.9264,10g,imbalance
8002,-0.629374,-0.591760,0.101226,0.010247,-0.160836,-0.99625,-0.174630,0.091898,-0.254386,-0.120050,...,0.047607,0.172402,0.029722,2.839674,-0.23805,0.50141,0.304160,13.9264,10g,imbalance
8003,-0.562279,-0.570630,0.126805,0.016079,-0.225519,-0.96048,-0.078649,0.083803,-0.157924,-0.076623,...,0.001973,0.139290,0.019402,7.651119,-0.21369,0.47040,0.214523,13.9264,10g,imbalance
8004,-0.511046,-0.545170,0.111004,0.012322,-0.217210,-0.88883,-0.107150,0.084640,0.175552,0.246915,...,0.001803,0.113894,0.012972,11.422415,-0.17219,0.27714,0.191910,13.9264,10g,imbalance
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
21995,0.464974,-0.745065,2.212850,4.896706,4.759087,-1.18140,4.765200,0.372935,0.311492,0.343465,...,-0.021246,0.183423,0.033644,-45.767297,-0.32725,0.47525,0.292730,62.0544,6g,imbalance
21996,-0.751649,-0.754560,0.121216,0.014693,-0.161267,-1.15440,-0.363880,0.095550,-0.184678,-0.176630,...,0.000768,0.240654,0.057914,8.871741,-0.42937,0.79587,0.380130,62.0544,6g,imbalance
21997,0.537143,-0.667770,2.169321,4.705952,4.038630,-1.04370,4.579900,0.393132,0.064856,0.068761,...,-0.004708,0.208255,0.043370,33.233827,-0.41452,0.76815,0.320320,62.0544,6g,imbalance
21998,-0.776595,-0.760945,0.315891,0.099787,-0.406764,-1.13710,4.643700,0.075475,0.053538,0.046412,...,-0.011692,0.216219,0.046751,37.643290,-0.41557,0.76504,0.338475,62.0544,6g,imbalance


## 2.2 con arreglo 

In [21]:
import os
import pandas as pd
import numpy as np

def load_and_preprocess_csv(csv_path, segment_length=500):
    """
    Carga un archivo CSV, inserta una fila vacía y utiliza automáticamente la primera fila del archivo.
    """
    df = pd.read_csv(csv_path)
    
    # Guardar la primera fila del archivo CSV antes de insertar la fila vacía
    first_row_values = df.iloc[0].tolist()
    
    # Insertar una fila vacía al principio
    df.loc[-1] = [np.nan] * len(df.columns)  # Crear una fila vacía
    df.index = df.index + 1  # Desplazar los índices hacia abajo
    df = df.sort_index()  # Ordenar el índice para que la fila vacía quede en la parte superior

    # Asignar los valores originales de la primera fila a la nueva primera fila
    df.iloc[0] = first_row_values

    # Definir los nombres de las columnas si es necesario
    column_names = ['S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8']
    df.columns = column_names[:len(df.columns)]  # Asignar nombres de columnas dinámicamente
    
    # Dividir el DataFrame en segmentos
    num_segments = len(df) // segment_length  # Número total de segmentos completos
    segments = [df.iloc[i * segment_length: (i + 1) * segment_length] for i in range(num_segments)]
    
    return segments

def calculate_statistics(df):
    """
    Calcula las estadísticas para un DataFrame y devuelve un nuevo DataFrame con los resultados.
    """
    stats = {}
    
    # Iterar por cada columna y calcular las estadísticas
    for column in df.columns:
        stats[f'{column}_mean'] = df[column].mean()
        stats[f'{column}_median'] = df[column].median()
        stats[f'{column}_std'] = df[column].std()
        stats[f'{column}_var'] = df[column].var()
        stats[f'{column}_cv'] = df[column].std() / df[column].mean()
        stats[f'{column}_min'] = df[column].min()
        stats[f'{column}_max'] = df[column].max()
        stats[f'{column}_IQR'] = df[column].quantile(0.75) - df[column].quantile(0.25)

    # Convertir el diccionario en un DataFrame de una sola fila
    return pd.DataFrame(stats, index=[0])

def add_custom_columns(df, col_names, values):
    """
    Añade columnas personalizadas al DataFrame.
    """
    for col_name, value in zip(col_names, values):
        df[col_name] = value
    return df

def process_csvs_in_directory(base_dir, segment_length=500):
    
    processed_data = []

    for root, dirs, files in os.walk(base_dir):
        # Identificar archivos CSV
        csv_files = [f for f in files if f.endswith('.csv')]
        
        for csv_file in csv_files:
            csv_path = os.path.join(root, csv_file)
            
            # Extraer información de las carpetas
            file_name = os.path.splitext(csv_file)[0]  # Nombre del archivo sin extensión
            folder_name = os.path.basename(root)      # Nombre de la carpeta actual
            parent_folder = os.path.basename(os.path.dirname(root))  # Carpeta superior
            grandparent_folder = os.path.basename(os.path.dirname(os.path.dirname(root)))  # Carpeta principal

            # Definir valores predeterminados
            custom_column_names = ['Hz', 'medida', 'Tipo']
            
            # Manejo especial para carpeta 'normal'
            if folder_name == "normal":
                custom_values = [file_name, 0, "normal"]
            
            # Manejo especial para carpetas 'overhang' y 'underhang'
            elif grandparent_folder in ["overhang", "underhang"]:
                tipo = f"{grandparent_folder}_{parent_folder}"  # Carpeta principal + subcarpeta inmediata
                medida = folder_name  # Última subcarpeta
                custom_values = [file_name, medida, tipo]

            # Para otros casos estándar
            else:
                custom_values = [file_name, folder_name, parent_folder]
            
            # Procesar el archivo CSV
            segments = load_and_preprocess_csv(csv_path, segment_length)
            
            for segment in segments:
                # Calcular estadísticas
                stats_df = calculate_statistics(segment)
                # Añadir columnas personalizadas
                stats_df = add_custom_columns(stats_df, custom_column_names, custom_values)
                # Almacenar el DataFrame procesado
                processed_data.append(stats_df)

    # Concatenar todos los DataFrames en uno solo
    if processed_data:
        return pd.concat(processed_data, ignore_index=True)
    else:
        return pd.DataFrame()  # Retornar un DataFrame vacío si no hay datos procesados


# Ejemplo de uso:

# Ruta base del directorio que contiene las carpetas y CSVs
base_dir = '../bearing_fault'

# Procesar todos los CSVs en la estructura de directorios y concatenar los resultados
final_df = process_csvs_in_directory(base_dir, segment_length=500)

# Guardar el DataFrame final en un archivo CSV
final_df.to_csv('processed_data_ultimate.csv', index=False)

# Mostrar las primeras filas del DataFrame final
print(final_df.head())


    S1_mean  S1_median    S1_std    S1_var     S1_cv   S1_min   S1_max  \
0  0.286609  -0.454150  1.760644  3.099869  6.143024 -0.74812  4.80120   
1  3.455610   4.518150  2.094236  4.385825  0.606040 -0.64089  4.63560   
2 -0.563440  -0.608240  0.089544  0.008018 -0.158924 -0.71058 -0.18403   
3 -0.617669  -0.588875  0.123700  0.015302 -0.200269 -0.96866 -0.17164   
4 -0.589237  -0.580280  0.109047  0.011891 -0.185065 -0.93480 -0.26924   

     S1_IQR   S2_mean  S2_median  ...  S8_median    S8_std    S8_var  \
0  0.186630  0.021666   0.031395  ...  -0.023922  0.080811  0.006530   
1  0.092075 -0.477666  -0.431530  ...   0.025264  0.115949  0.013444   
2  0.090285 -0.399980  -0.359175  ...  -0.002870  0.105225  0.011072   
3  0.099337 -0.807317  -0.667040  ...   0.042654  0.208275  0.043378   
4  0.095765 -0.660323  -0.560835  ...   0.004480  0.197578  0.039037   

       S8_cv   S8_min   S8_max    S8_IQR      Hz  medida  \
0  -7.557894 -0.15725  0.34543  0.118908  12.288   0.5mm   
1 

In [22]:
final_df

Unnamed: 0,S1_mean,S1_median,S1_std,S1_var,S1_cv,S1_min,S1_max,S1_IQR,S2_mean,S2_median,...,S8_median,S8_std,S8_var,S8_cv,S8_min,S8_max,S8_IQR,Hz,medida,Tipo
0,0.286609,-0.454150,1.760644,3.099869,6.143024,-0.74812,4.80120,0.186630,0.021666,0.031395,...,-0.023922,0.080811,0.006530,-7.557894,-0.15725,0.34543,0.118908,12.288,0.5mm,horizontal-misalignment
1,3.455610,4.518150,2.094236,4.385825,0.606040,-0.64089,4.63560,0.092075,-0.477666,-0.431530,...,0.025264,0.115949,0.013444,3.698598,-0.16506,0.34609,0.206634,12.288,0.5mm,horizontal-misalignment
2,-0.563440,-0.608240,0.089544,0.008018,-0.158924,-0.71058,-0.18403,0.090285,-0.399980,-0.359175,...,-0.002870,0.105225,0.011072,12.542015,-0.28833,0.30622,0.164061,12.288,0.5mm,horizontal-misalignment
3,-0.617669,-0.588875,0.123700,0.015302,-0.200269,-0.96866,-0.17164,0.099337,-0.807317,-0.667040,...,0.042654,0.208275,0.043378,2.588939,-0.26358,0.76796,0.301667,12.288,0.5mm,horizontal-misalignment
4,-0.589237,-0.580280,0.109047,0.011891,-0.185065,-0.93480,-0.26924,0.095765,-0.660323,-0.560835,...,0.004480,0.197578,0.039037,3.445324,-0.27998,0.62127,0.297905,12.288,0.5mm,horizontal-misalignment
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83995,-0.482294,-0.731295,1.032127,1.065287,-2.140036,-1.18790,4.70840,0.119138,-0.062058,-0.026253,...,-0.006095,0.210738,0.044410,9.968423,-0.36460,0.59416,0.346293,61.0304,1.90mm,vertical-misalignment
83996,0.497346,-0.703255,2.181805,4.760273,4.386897,-1.15060,4.76810,0.392220,0.111686,0.086110,...,-0.006776,0.173011,0.029933,-224.655849,-0.30021,0.55831,0.302690,61.0304,1.90mm,vertical-misalignment
83997,-0.766249,-0.741715,0.095152,0.009054,-0.124179,-1.16010,-0.48600,0.083067,0.045768,0.099299,...,-0.008565,0.210502,0.044311,14.491912,-0.37432,0.71113,0.332762,61.0304,1.90mm,vertical-misalignment
83998,0.540872,-0.649505,2.154793,4.643134,3.983921,-1.16870,4.77620,0.351330,-0.093285,-0.065109,...,0.009104,0.210349,0.044247,10.582702,-0.37507,0.64408,0.354055,61.0304,1.90mm,vertical-misalignment


In [23]:
ver = final_df[final_df.Tipo == 'imbalance']
ver

Unnamed: 0,S1_mean,S1_median,S1_std,S1_var,S1_cv,S1_min,S1_max,S1_IQR,S2_mean,S2_median,...,S8_median,S8_std,S8_var,S8_cv,S8_min,S8_max,S8_IQR,Hz,medida,Tipo
22000,2.959290,4.508600,2.382522,5.676411,0.805099,-0.99208,4.82550,5.142255,-0.185484,-0.113750,...,0.010077,0.109887,0.012075,6.213349,-0.17925,0.33903,0.192579,12.288,0,normal
22001,-0.552406,-0.599735,0.092168,0.008495,-0.166849,-0.71160,-0.13611,0.097030,-0.060016,-0.038474,...,-0.000376,0.090901,0.008263,24.143600,-0.13685,0.21147,0.159693,12.288,0,normal
22002,-0.633190,-0.596305,0.087390,0.007637,-0.138016,-1.03430,-0.47044,0.099627,-0.267587,-0.233155,...,0.034988,0.154203,0.023779,3.280045,-0.21188,0.50630,0.239867,12.288,0,normal
22003,-0.535707,-0.563020,0.116485,0.013569,-0.217442,-0.92042,-0.17545,0.100200,-0.085666,-0.050691,...,-0.001544,0.136097,0.018522,7.670252,-0.20225,0.41431,0.210983,12.288,0,normal
22004,-0.530275,-0.542080,0.123164,0.015169,-0.232264,-0.89921,-0.10289,0.093740,0.111051,0.148200,...,-0.016840,0.096576,0.009327,-23.850594,-0.16331,0.26942,0.159395,12.288,0,normal
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
23995,0.460329,-0.730125,2.177597,4.741930,4.730519,-1.15260,4.75960,0.065497,-0.081929,-0.042240,...,-0.008358,0.216018,0.046664,14.421248,-0.40661,0.66138,0.341135,61.44,0,normal
23996,-0.554922,-0.720500,0.794459,0.631165,-1.431658,-1.33730,4.83840,0.099927,0.190453,0.241065,...,-0.017534,0.185499,0.034410,-38.777733,-0.32763,0.47087,0.308555,61.44,0,normal
23997,0.391176,-0.733435,2.085717,4.350216,5.331911,-0.87053,4.48280,0.264460,-0.018859,0.017035,...,0.006269,0.223017,0.049736,10.115686,-0.39619,0.64109,0.369185,61.44,0,normal
23998,0.481279,-0.717200,2.157782,4.656021,4.483431,-1.13910,4.75290,0.212715,0.088833,0.089363,...,-0.012983,0.204288,0.041734,79.181206,-0.41564,0.67903,0.330067,61.44,0,normal


## 2.3 quitando subcarpeta over/underhang

In [None]:
def process_csvs_in_directory(base_dir, segment_length=500):
 
    processed_data = []

    for root, dirs, files in os.walk(base_dir):
        # Identificar archivos CSV
        csv_files = [f for f in files if f.endswith('.csv')]
        
        for csv_file in csv_files:
            csv_path = os.path.join(root, csv_file)
            
            # Extraer información de las carpetas
            file_name = os.path.splitext(csv_file)[0]  # Nombre del archivo sin extensión
            folder_name = os.path.basename(root)      # Nombre de la carpeta actual
            parent_folder = os.path.basename(os.path.dirname(root))  # Carpeta superior
            grandparent_folder = os.path.basename(os.path.dirname(os.path.dirname(root)))  # Carpeta principal

            # Definir valores predeterminados
            custom_column_names = ['Hz', 'medida', 'Tipo']
            
            # Manejo especial para carpeta 'normal'
            if folder_name == "normal":
                custom_values = [file_name, 0, "normal"]
            
            # Manejo especial para carpetas 'overhang' y 'underhang'
            elif grandparent_folder in ["overhang", "underhang"]:
                tipo = grandparent_folder  # Usar solo la carpeta principal
                medida = folder_name  # Última subcarpeta (como medida)
                custom_values = [file_name, medida, tipo]

            # Para otros casos estándar
            else:
                custom_values = [file_name, folder_name, parent_folder]
            
            # Procesar el archivo CSV
            segments = load_and_preprocess_csv(csv_path, segment_length)
            
            for segment in segments:
                # Calcular estadísticas
                stats_df = calculate_statistics(segment)
                # Añadir columnas personalizadas
                stats_df = add_custom_columns(stats_df, custom_column_names, custom_values)
                # Almacenar el DataFrame procesado
                processed_data.append(stats_df)

    # Concatenar todos los DataFrames en uno solo
    if processed_data:
        return pd.concat(processed_data, ignore_index=True)
    else:
        return pd.DataFrame()  # Retornar un DataFrame vacío si no hay datos procesados


# Ejemplo de uso:

# Ruta base del directorio que contiene las carpetas y CSVs
base_dir = '../bearing_fault'

# Procesar todos los CSVs en la estructura de directorios y concatenar los resultados
final_df = process_csvs_in_directory(base_dir, segment_length=500)

# Guardar el DataFrame final en un archivo CSV
final_df.to_csv('ultimate.csv', index=False)

# Mostrar las primeras filas del DataFrame final
final_df


Unnamed: 0,S1_mean,S1_median,S1_std,S1_var,S1_cv,S1_min,S1_max,S1_IQR,S2_mean,S2_median,...,S8_median,S8_std,S8_var,S8_cv,S8_min,S8_max,S8_IQR,Hz,medida,Tipo
0,0.286609,-0.454150,1.760644,3.099869,6.143024,-0.74812,4.80120,0.186630,0.021666,0.031395,...,-0.023922,0.080811,0.006530,-7.557894,-0.15725,0.34543,0.118908,12.288,0.5mm,horizontal-misalignment
1,3.455610,4.518150,2.094236,4.385825,0.606040,-0.64089,4.63560,0.092075,-0.477666,-0.431530,...,0.025264,0.115949,0.013444,3.698598,-0.16506,0.34609,0.206634,12.288,0.5mm,horizontal-misalignment
2,-0.563440,-0.608240,0.089544,0.008018,-0.158924,-0.71058,-0.18403,0.090285,-0.399980,-0.359175,...,-0.002870,0.105225,0.011072,12.542015,-0.28833,0.30622,0.164061,12.288,0.5mm,horizontal-misalignment
3,-0.617669,-0.588875,0.123700,0.015302,-0.200269,-0.96866,-0.17164,0.099337,-0.807317,-0.667040,...,0.042654,0.208275,0.043378,2.588939,-0.26358,0.76796,0.301667,12.288,0.5mm,horizontal-misalignment
4,-0.589237,-0.580280,0.109047,0.011891,-0.185065,-0.93480,-0.26924,0.095765,-0.660323,-0.560835,...,0.004480,0.197578,0.039037,3.445324,-0.27998,0.62127,0.297905,12.288,0.5mm,horizontal-misalignment
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
83995,-0.482294,-0.731295,1.032127,1.065287,-2.140036,-1.18790,4.70840,0.119138,-0.062058,-0.026253,...,-0.006095,0.210738,0.044410,9.968423,-0.36460,0.59416,0.346293,61.0304,1.90mm,vertical-misalignment
83996,0.497346,-0.703255,2.181805,4.760273,4.386897,-1.15060,4.76810,0.392220,0.111686,0.086110,...,-0.006776,0.173011,0.029933,-224.655849,-0.30021,0.55831,0.302690,61.0304,1.90mm,vertical-misalignment
83997,-0.766249,-0.741715,0.095152,0.009054,-0.124179,-1.16010,-0.48600,0.083067,0.045768,0.099299,...,-0.008565,0.210502,0.044311,14.491912,-0.37432,0.71113,0.332762,61.0304,1.90mm,vertical-misalignment
83998,0.540872,-0.649505,2.154793,4.643134,3.983921,-1.16870,4.77620,0.351330,-0.093285,-0.065109,...,0.009104,0.210349,0.044247,10.582702,-0.37507,0.64408,0.354055,61.0304,1.90mm,vertical-misalignment


In [28]:
ver = final_df[(final_df.Tipo =='overhang') ]
ver

Unnamed: 0,S1_mean,S1_median,S1_std,S1_var,S1_cv,S1_min,S1_max,S1_IQR,S2_mean,S2_median,...,S8_median,S8_std,S8_var,S8_cv,S8_min,S8_max,S8_IQR,Hz,medida,Tipo
24000,-0.580178,-0.541330,0.088068,0.007756,-0.151795,-0.92575,-0.435360,0.107023,0.237748,0.893550,...,0.001450,0.100604,0.010121,10.298396,-0.15133,0.22856,0.176912,12.9024,0g,overhang
24001,-0.488979,-0.510710,0.130014,0.016904,-0.265889,-0.90091,-0.061922,0.097058,0.559311,2.056150,...,-0.026405,0.090237,0.008143,-9.114275,-0.15016,0.21671,0.144945,12.9024,0g,overhang
24002,-0.469886,-0.491415,0.119908,0.014378,-0.255186,-0.77675,-0.075922,0.099620,-1.407891,1.319200,...,-0.011960,0.090818,0.008248,-113.048994,-0.17776,0.37191,0.149648,12.9024,0g,overhang
24003,3.546248,4.485950,1.958819,3.836971,0.552364,-0.91016,4.996500,0.082250,0.054627,1.748250,...,-0.002736,0.103439,0.010700,10.382146,-0.15274,0.33871,0.177777,12.9024,0g,overhang
24004,-0.285250,-0.599455,1.124899,1.265398,-3.943558,-0.68694,4.701400,0.146370,0.568264,2.108350,...,0.007315,0.097890,0.009583,8.244944,-0.17438,0.27402,0.178720,12.9024,0g,overhang
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
47995,-0.810939,-0.855330,0.100237,0.010048,-0.123607,-1.00300,-0.430250,0.102010,-0.243489,-0.238945,...,0.023415,0.235419,0.055422,8.360897,-0.39607,0.72352,0.402563,61.8496,6g,overhang
47996,0.550112,-0.845080,2.290644,5.247052,4.163960,-1.23240,4.544700,5.088745,0.296095,0.361520,...,-0.019700,0.218419,0.047707,83.479925,-0.37385,0.72895,0.357420,61.8496,6g,overhang
47997,-0.800748,-0.844480,0.089724,0.008050,-0.112051,-0.93115,-0.435400,0.095152,-0.230256,-0.157730,...,-0.000704,0.219117,0.048012,16.476578,-0.37955,0.67976,0.358183,61.8496,6g,overhang
47998,0.572154,-0.857895,2.313973,5.354470,4.044321,-1.12950,4.524400,5.094668,-0.149333,0.022561,...,-0.000570,0.223409,0.049912,14.917633,-0.41216,0.65394,0.369665,61.8496,6g,overhang
