In [None]:

# Importar librerías
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

# Cargar datos
col_names = ['unit', 'time', 'op1', 'op2', 'op3'] + [f'sensor_{i}' for i in range(1, 22)]

train = pd.read_csv(
    "../data/CMAPSSData/train_FD001.txt",
    sep=r'\s+',
    header=None,
    names=col_names
) # Indacamos de la ruta de donde tenemos los datos


# Crear RUL (Remaining Useful Life)
train['RUL'] = train.groupby('unit')['time'].transform(max) - train['time']


# Feature Engineering

def create_features(df, window_sizes=[5, 10]):
    df_feat = df.copy()
    
    # Diferencias de sensores
    sensor_cols = [c for c in df.columns if 'sensor_' in c]
    for col in sensor_cols:
        df_feat[f'{col}_diff'] = df_feat.groupby('unit')[col].diff()
    
    # Medias y desviaciones en ventanas móviles
    for window in window_sizes:
        for col in sensor_cols:
            df_feat[f'{col}_mean_{window}'] = df_feat.groupby('unit')[col].rolling(window=window, min_periods=1).mean().reset_index(0,drop=True)
            df_feat[f'{col}_std_{window}']  = df_feat.groupby('unit')[col].rolling(window=window, min_periods=1).std().reset_index(0,drop=True)
    
    return df_feat

train_feat = create_features(train)

# Escalado / Normalización
feature_cols = [c for c in train_feat.columns if 'sensor' in c or 'op' in c]
scaler = StandardScaler()
train_feat[feature_cols] = scaler.fit_transform(train_feat[feature_cols])

# Guardar dataset procesado
train_feat.to_csv("../data/CMAPSSData/train_FD001_features.csv", index=False)

# Vista previa
train_feat.head()


  train['RUL'] = train.groupby('unit')['time'].transform(max) - train['time']
  df_feat[f'{col}_std_{window}']  = df_feat.groupby('unit')[col].rolling(window=window, min_periods=1).std().reset_index(0,drop=True)
  df_feat[f'{col}_mean_{window}'] = df_feat.groupby('unit')[col].rolling(window=window, min_periods=1).mean().reset_index(0,drop=True)
  df_feat[f'{col}_std_{window}']  = df_feat.groupby('unit')[col].rolling(window=window, min_periods=1).std().reset_index(0,drop=True)
  df_feat[f'{col}_mean_{window}'] = df_feat.groupby('unit')[col].rolling(window=window, min_periods=1).mean().reset_index(0,drop=True)
  df_feat[f'{col}_std_{window}']  = df_feat.groupby('unit')[col].rolling(window=window, min_periods=1).std().reset_index(0,drop=True)
  df_feat[f'{col}_mean_{window}'] = df_feat.groupby('unit')[col].rolling(window=window, min_periods=1).mean().reset_index(0,drop=True)
  df_feat[f'{col}_std_{window}']  = df_feat.groupby('unit')[col].rolling(window=window, min_periods=1).std().reset_

Unnamed: 0,unit,time,op1,op2,op3,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,...,sensor_17_mean_10,sensor_17_std_10,sensor_18_mean_10,sensor_18_std_10,sensor_19_mean_10,sensor_19_std_10,sensor_20_mean_10,sensor_20_std_10,sensor_21_mean_10,sensor_21_std_10
0,1,1,-0.31598,-1.372953,0.0,0.0,-1.721725,-0.134255,-0.925936,-1.776357e-15,...,-0.944594,,0.0,,0.0,,1.610804,,1.41456,
1,1,2,0.872722,-1.03172,0.0,0.0,-1.06178,0.211528,-0.643726,-1.776357e-15,...,-0.944594,-3.945161,0.0,0.0,0.0,0.0,1.403488,-2.346314,1.441011,-3.901728
2,1,3,-1.961874,1.015677,0.0,0.0,-0.661813,-0.413166,-0.525953,-1.776357e-15,...,-1.50518,0.977408,0.0,0.0,0.0,0.0,1.219208,-1.818494,1.145448,-0.996565
3,1,4,0.32409,-0.008022,0.0,0.0,-0.661813,-1.261314,-0.784831,-1.776357e-15,...,-1.365034,0.317909,0.0,0.0,0.0,0.0,1.006133,-0.931959,1.083058,-1.460346
4,1,5,-0.864611,-0.690488,0.0,0.0,-0.621816,-1.251528,-0.301518,-1.776357e-15,...,-1.11277,0.724798,0.0,0.0,0.0,0.0,0.90593,-1.044665,1.115777,-1.775208
