# 1. Generar estadísticos

Partiendo de los csvs crudos se ha generado un csv procesado obteniedo los estadísticos de cada sensor.

## 1.1 Funciones

In [16]:
import os
import pandas as pd
import numpy as np

def load_and_preprocess_csv(csv_path, segment_length=250000):
    """
    Carga un archivo CSV, inserta una fila vacía y utiliza automáticamente la primera fila del archivo.
    """
    df = pd.read_csv(csv_path)
    
    # Guardar la primera fila del archivo CSV antes de insertar la fila vacía
    first_row_values = df.iloc[0].tolist()
    
    # Insertar una fila vacía al principio
    df.loc[-1] = [np.nan] * len(df.columns)  # Crear una fila vacía
    df.index = df.index + 1  # Desplazar los índices hacia abajo
    df = df.sort_index()  # Ordenar el índice para que la fila vacía quede en la parte superior

    # Asignar los valores originales de la primera fila a la nueva primera fila
    df.iloc[0] = first_row_values

    # Definir los nombres de las columnas si es necesario
    column_names = ['S1', 'S2', 'S3', 'S4', 'S5', 'S6', 'S7', 'S8']
    df.columns = column_names[:len(df.columns)]  # Asignar nombres de columnas dinámicamente
    
    # Dividir el DataFrame en segmentos
    num_segments = len(df) // segment_length  # Número total de segmentos completos
    segments = [df.iloc[i * segment_length: (i + 1) * segment_length] for i in range(num_segments)]
    
    return segments


## 1.2 Llamada a la función

 * **csv_path:** Ruta del archivo CSV a procesar.
 * **segment_length:** Longitud del segmento en filas (500 por defecto).


In [None]:
csv_path = pd.read_csv = '../bearing_fault/horizontal-misalignment/0.5mm/12.288.csv'
segment_length = 500
segments = load_and_preprocess_csv(csv_path, segment_length)

In [8]:
segments

[          S1       S2       S3        S4       S5        S6       S7        S8
 0   -0.54833 -1.21600 -0.35353 -0.060630  0.50864  0.195880  0.23587  0.045124
 1   -0.54833 -1.21600 -0.35353 -0.060630  0.50864  0.195880  0.23587  0.045124
 2   -0.44806  1.33940  0.28563  0.064970  0.65422  0.196410  0.37791 -0.070311
 3   -0.57483 -1.25120 -0.47459  0.016734  0.47286  0.192980  0.21212  0.124980
 4   -0.45559  0.84107  0.31498  0.124230  0.62664  0.194600  0.36869 -0.133050
 ..       ...      ...      ...       ...      ...       ...      ...       ...
 495  4.60280  0.58925 -0.10452 -0.013871 -0.34693  0.084376  0.22026  0.066858
 496  4.59080 -1.17360 -0.20568 -0.060926 -0.39292  0.083394  0.16428 -0.028157
 497  4.58330  1.14750  0.16875 -0.003023 -0.33402  0.084883  0.19970 -0.016305
 498  4.61010 -1.62430 -0.41950 -0.092770 -0.40315  0.084241  0.11698  0.073624
 499  4.56690  1.17140  0.30305  0.023600 -0.32154  0.087855  0.18007 -0.087152
 
 [500 rows x 8 columns],
           S1

## 1.3 Cálculo de estadísticos

In [5]:
def calculate_statistics(df):
    """
    Calcula las estadísticas para un DataFrame y devuelve un nuevo DataFrame con los resultados.
    """
    stats = {}
    
    # Iterar por cada columna y calcular las estadísticas
    for column in df.columns:
        stats[f'{column}_mean'] = df[column].mean()
        stats[f'{column}_median'] = df[column].median()
        stats[f'{column}_std'] = df[column].std()
        stats[f'{column}_var'] = df[column].var()
        stats[f'{column}_cv'] = df[column].std() / df[column].mean()
        stats[f'{column}_min'] = df[column].min()
        stats[f'{column}_max'] = df[column].max()
        stats[f'{column}_IQR'] = df[column].quantile(0.75) - df[column].quantile(0.25)

    # Convertir el diccionario en un DataFrame de una sola fila
    return pd.DataFrame(stats, index=[0])

In [6]:
def add_custom_columns(df, col_names, values):
    """
    Añade columnas personalizadas al DataFrame.
    """
    for col_name, value in zip(col_names, values):
        df[col_name] = value
    return df

## 1.4 Función procesado csvs

Calcula estadísticas para todos los segmentos y añade columnas personalizadas.

* **csv_path:** Ruta del archivo CSV a procesar.
* **segment_length:** Longitud del segmento en filas (500 por defecto).
* **custom_column_names:** Lista de nombres de columnas personalizadas.
* **custom_values:** Lista de valores a añadir a las columnas personalizadas.

In [7]:
def process_csv(csv_path, segment_length=500, custom_column_names=None, custom_values=None):
  
    segments = load_and_preprocess_csv(csv_path, segment_length)
    
    processed_segments = []

    for segment in segments:
       
        stats_df = calculate_statistics(segment)
        
        if custom_column_names and custom_values:
            stats_df = add_custom_columns(stats_df, custom_column_names, custom_values)
        
        processed_segments.append(stats_df)
    
    return processed_segments

In [8]:
#USO
csv_path = '../bearing_fault/horizontal-misalignment/0.5mm/12.288.csv'

custom_column_names = ['Hz', 'medida', 'Tipo']
custom_values = [
    [12.288],  
    ['0.5mm'],  
    ['horizontal_misalignment']  
]

processed_dfs = process_csv(csv_path, segment_length=500, custom_column_names=custom_column_names, custom_values=custom_values)

processed_dfs[0]

    S1_mean  S1_median    S1_std    S1_var     S1_cv   S1_min  S1_max  \
0  0.286609   -0.45415  1.760644  3.099869  6.143024 -0.74812  4.8012   

    S1_IQR   S2_mean  S2_median  ...  S8_median    S8_std   S8_var     S8_cv  \
0  0.18663  0.021666   0.031395  ...  -0.023922  0.080811  0.00653 -7.557894   

    S8_min   S8_max    S8_IQR      Hz  medida                     Tipo  
0 -0.15725  0.34543  0.118908  12.288   0.5mm  horizontal_misalignment  

[1 rows x 67 columns]


In [11]:
processed_dfs

[    S1_mean  S1_median    S1_std    S1_var     S1_cv   S1_min  S1_max  \
 0  0.286609   -0.45415  1.760644  3.099869  6.143024 -0.74812  4.8012   
 
     S1_IQR   S2_mean  S2_median  ...  S8_median    S8_std   S8_var     S8_cv  \
 0  0.18663  0.021666   0.031395  ...  -0.023922  0.080811  0.00653 -7.557894   
 
     S8_min   S8_max    S8_IQR      Hz  medida                     Tipo  
 0 -0.15725  0.34543  0.118908  12.288   0.5mm  horizontal_misalignment  
 
 [1 rows x 67 columns],
    S1_mean  S1_median    S1_std    S1_var    S1_cv   S1_min  S1_max    S1_IQR  \
 0  3.45561    4.51815  2.094236  4.385825  0.60604 -0.64089  4.6356  0.092075   
 
     S2_mean  S2_median  ...  S8_median    S8_std    S8_var     S8_cv   S8_min  \
 0 -0.477666   -0.43153  ...   0.025264  0.115949  0.013444  3.698598 -0.16506   
 
     S8_max    S8_IQR      Hz  medida                     Tipo  
 0  0.34609  0.206634  12.288   0.5mm  horizontal_misalignment  
 
 [1 rows x 67 columns],
    S1_mean  S1_median  

In [20]:
# Combinar todos los DataFrames procesados en uno solo
combined_df = pd.concat(processed_dfs, ignore_index=True)
combined_df

Unnamed: 0,S1_mean,S1_median,S1_std,S1_var,S1_cv,S1_min,S1_max,S1_IQR,S2_mean,S2_median,...,S8_median,S8_std,S8_var,S8_cv,S8_min,S8_max,S8_IQR,Hz,medida,Tipo
0,0.286609,-0.454150,1.760644,3.099869,6.143024,-0.74812,4.80120,0.186630,0.021666,0.031395,...,-0.023922,0.080811,0.006530,-7.557894,-0.15725,0.34543,0.118908,12.288,0.5mm,horizontal_misalignment
1,3.455610,4.518150,2.094236,4.385825,0.606040,-0.64089,4.63560,0.092075,-0.477666,-0.431530,...,0.025264,0.115949,0.013444,3.698598,-0.16506,0.34609,0.206634,12.288,0.5mm,horizontal_misalignment
2,-0.563440,-0.608240,0.089544,0.008018,-0.158924,-0.71058,-0.18403,0.090285,-0.399980,-0.359175,...,-0.002870,0.105225,0.011072,12.542015,-0.28833,0.30622,0.164061,12.288,0.5mm,horizontal_misalignment
3,-0.617669,-0.588875,0.123700,0.015302,-0.200269,-0.96866,-0.17164,0.099337,-0.807317,-0.667040,...,0.042654,0.208275,0.043378,2.588939,-0.26358,0.76796,0.301667,12.288,0.5mm,horizontal_misalignment
4,-0.589237,-0.580280,0.109047,0.011891,-0.185065,-0.93480,-0.26924,0.095765,-0.660323,-0.560835,...,0.004480,0.197578,0.039037,3.445324,-0.27998,0.62127,0.297905,12.288,0.5mm,horizontal_misalignment
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
495,3.444656,4.511550,2.100716,4.413008,0.609848,-0.68667,4.60160,0.084525,0.282189,0.350065,...,0.047254,0.190972,0.036470,2.757261,-0.24579,0.67204,0.299494,12.288,0.5mm,horizontal_misalignment
496,-0.630334,-0.606160,0.125946,0.015862,-0.199808,-1.04950,-0.21959,0.091795,0.251748,0.323625,...,0.020761,0.154232,0.023788,3.609713,-0.21171,0.45551,0.259878,12.288,0.5mm,horizontal_misalignment
497,-0.610148,-0.597285,0.110842,0.012286,-0.181664,-1.00650,-0.21657,0.074952,-0.078801,-0.038246,...,0.053922,0.194922,0.037995,2.777731,-0.26475,0.66362,0.318408,12.288,0.5mm,horizontal_misalignment
498,-0.520183,-0.564765,0.098844,0.009770,-0.190018,-0.73177,-0.05655,0.090313,-0.034864,-0.037494,...,0.004662,0.175164,0.030682,4.390148,-0.27969,0.58214,0.260262,12.288,0.5mm,horizontal_misalignment
