In [None]:

# Importar librerías
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

# Cargar datos
col_names = ['unit', 'time', 'op1', 'op2', 'op3'] + [f'sensor_{i}' for i in range(1, 22)]

train = pd.read_csv(
    "../data/CMAPSSData/train_FD001.txt",
    sep=r'\s+',
    header=None,
    names=col_names
) # Indacamos de la ruta de donde tenemos los datos


# Crear RUL (Remaining Useful Life)
train['RUL'] = train.groupby('unit')['time'].transform(max) - train['time']


# Feature Engineering

def create_features(df, window_sizes=[5, 10]):
    df_feat = df.copy()
    
    # Diferencias de sensores
    sensor_cols = [c for c in df.columns if 'sensor_' in c]
    for col in sensor_cols:
        df_feat[f'{col}_diff'] = df_feat.groupby('unit')[col].diff()
    
    # Medias y desviaciones en ventanas móviles
    for window in window_sizes:
        for col in sensor_cols:
            df_feat[f'{col}_mean_{window}'] = df_feat.groupby('unit')[col].rolling(window=window, min_periods=1).mean().reset_index(0,drop=True)
            df_feat[f'{col}_std_{window}']  = df_feat.groupby('unit')[col].rolling(window=window, min_periods=1).std().reset_index(0,drop=True)
    
    return df_feat

train_feat = create_features(train)

# Escalado / Normalización
feature_cols = [c for c in train_feat.columns if 'sensor' in c or 'op' in c]
scaler = StandardScaler()
train_feat[feature_cols] = scaler.fit_transform(train_feat[feature_cols])

# Guardar dataset procesado
train_feat.to_csv("../data/CMAPSSData/train_FD001_features.csv", index=False)

# Vista previa
train_feat.head()
