In [1]:
from pathlib import Path
import sys

# Ajusta esta ruta al directorio raíz de tu proyecto (donde está la carpeta src)
PROJECT_ROOT = Path().resolve().parent  # Si tu notebook está dentro de 'notebooks/', usa parent

if str(PROJECT_ROOT) not in sys.path:
    sys.path.insert(0, str(PROJECT_ROOT))

print("PROJECT_ROOT:", PROJECT_ROOT)
print("sys.path[0]:", sys.path[0])

PROJECT_ROOT: /media/franklin/Respaldo 2/Proyectos portafolio/Proyecto 1
sys.path[0]: /media/franklin/Respaldo 2/Proyectos portafolio/Proyecto 1


In [13]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import src.models as models
import importlib
import pandas as pd
from pathlib import Path

# Define PROJECT_ROOT si no está definido (ajusta según tu estructura)
PROJECT_ROOT = Path().resolve().parent  # Ajusta si tu notebook está en otra carpeta

# Cargar los datos procesados
train_processed = pd.read_parquet(PROJECT_ROOT / "data" / "processed" / "fd001_prepared.parquet")
test_processed = train_processed.copy()  # O carga el de test si tienes uno específico

# Normalizar nombres de columnas a los que usa el notebook (id, cycle)
train_processed = train_processed.rename(columns={'unit_id': 'id', 'time_cycles': 'cycle'})
test_processed  = test_processed.rename(columns={'unit_id': 'id', 'time_cycles': 'cycle'})

# Eliminar columnas que están totalmente vacías (todas NaN) para evitar errores en el escalador
train_processed.dropna(axis=1, how='all', inplace=True)
test_processed = test_processed[train_processed.columns.intersection(test_processed.columns)].copy()

# 1. Configuración y Hiperparámetros
SEQUENCE_LENGTH = 30
BATCH_SIZE = 64
EPOCHS = 30
LEARNING_RATE = 0.001
MAX_RUL = 125
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Entrenando en: {DEVICE}")

# 2. Preparación de Datos (Escalado y Secuencias)
feature_cols = [c for c in train_processed.columns if c not in ['id', 'cycle', 'RUL']]
print("Número de features:", len(feature_cols))
print("NaNs por columna (train):")
print(train_processed[feature_cols].isna().sum())

# Copia de trabajo
train_data = train_processed.copy()
test_data  = test_processed.copy()

# Rellenar NaNs por motor (forward/backward fill), luego con la mediana global si aún quedan
train_data[feature_cols] = train_data.groupby('id')[feature_cols].apply(
    lambda g: g.ffill().bfill()
).reset_index(level=0, drop=True)

test_data[feature_cols] = test_data.groupby('id')[feature_cols].apply(
    lambda g: g.ffill().bfill()
).reset_index(level=0, drop=True)

# Escalado
scaler = StandardScaler()
train_data[feature_cols] = scaler.fit_transform(train_data[feature_cols])
test_data[feature_cols]  = scaler.transform(test_data[feature_cols])

# Aplicar clipping al RUL de entrenamiento
train_data['RUL'] = np.minimum(train_data['RUL'], MAX_RUL)

# Función para crear secuencias
def create_sequences(df, seq_len, features):
    sequences, labels = [], []
    for engine_id in df['id'].unique():
        group = df[df['id'] == engine_id]
        if len(group) >= seq_len:
            data = group[features].values
            target = group['RUL'].values
            for i in range(len(group) - seq_len + 1):
                sequences.append(data[i:i+seq_len])
                labels.append(target[i+seq_len-1])
    return np.array(sequences), np.array(labels)

X_train_seq, y_train_seq = create_sequences(train_data, SEQUENCE_LENGTH, feature_cols)

# Dataset PyTorch
class RULDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32).unsqueeze(1)
    def __len__(self): return len(self.X)
    def __getitem__(self, i): return self.X[i], self.y[i]

train_loader = DataLoader(RULDataset(X_train_seq, y_train_seq), batch_size=BATCH_SIZE, shuffle=True)

# 4. Inicializar Modelo
importlib.reload(models)
model = models.LSTMPredictor(input_dim=len(feature_cols), hidden_dim=64, num_layers=2).to(DEVICE)
criterion = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

# 5. Bucle de Entrenamiento
model.train()
for epoch in range(1, EPOCHS + 1):
    losses = []
    for batch_X, batch_y in train_loader:
        batch_X, batch_y = batch_X.to(DEVICE), batch_y.to(DEVICE)
        
        optimizer.zero_grad()
        outputs = model(batch_X)
        loss = criterion(outputs, batch_y)
        loss.backward()
        optimizer.step()
        losses.append(loss.item())
    
    if epoch % 5 == 0 or epoch == 1:
        print(f"Epoch [{epoch}/{EPOCHS}] - Loss: {np.mean(losses):.4f}")

# 6. Evaluación en Test (Última ventana de cada motor)
model.eval()
test_results = []
y_true_test = []

for engine_id in test_data['id'].unique():
    group = test_data[test_data['id'] == engine_id]
    if len(group) >= SEQUENCE_LENGTH:
        last_seq = group[feature_cols].values[-SEQUENCE_LENGTH:]
        last_seq = torch.tensor(last_seq, dtype=torch.float32).unsqueeze(0).to(DEVICE)
        
        with torch.no_grad():
            pred = model(last_seq).cpu().item()
            test_results.append(max(0, pred))
            y_true_test.append(group['RUL'].values[-1])

rmse_lstm = np.sqrt(mean_squared_error(y_true_test, test_results))
print(f"\n--- RESULTADOS FINALES LSTM ---")
print(f"RMSE: {rmse_lstm:.2f}")

Entrenando en: cuda
Número de features: 24
NaNs por columna (train):
op_setting_1    0
op_setting_2    0
op_setting_3    0
sensor_1        0
sensor_2        0
sensor_3        0
sensor_4        0
sensor_5        0
sensor_6        0
sensor_7        0
sensor_8        0
sensor_9        0
sensor_10       0
sensor_11       0
sensor_12       0
sensor_13       0
sensor_14       0
sensor_15       0
sensor_16       0
sensor_17       0
sensor_18       0
sensor_19       0
sensor_20       0
sensor_21       0
dtype: int64
Epoch [1/30] - Loss: 6419.9264
Epoch [5/30] - Loss: 1304.0812
Epoch [10/30] - Loss: 227.5329
Epoch [15/30] - Loss: 102.7406
Epoch [20/30] - Loss: 68.4893
Epoch [25/30] - Loss: 44.1681
Epoch [30/30] - Loss: 31.2719

--- RESULTADOS FINALES LSTM ---
RMSE: 1.17


In [15]:
import joblib
import torch

# Crear carpeta models si no existe
(PROJECT_ROOT / "models").mkdir(parents=True, exist_ok=True)

# Guardar el estado del modelo (pesos)
torch.save(model.state_dict(), PROJECT_ROOT / "models" / "lstm_model_v1.pth")

# Guardar el escalador (fundamental para que los datos nuevos tengan la misma escala)
joblib.dump(scaler, PROJECT_ROOT / "models" / "scaler_v1.pkl")

# Guardar la lista de columnas de características para asegurar consistencia
joblib.dump(feature_cols, PROJECT_ROOT / "models" / "feature_cols_v1.pkl")

print("Archivos de producción guardados en la carpeta /models/")

Archivos de producción guardados en la carpeta /models/


In [16]:
from src.inference import RULInference

# Inicializar el motor de inferencia
infer = RULInference(PROJECT_ROOT)

# Tomar un motor aleatorio del set de test para probar
sample_id = test_processed['id'].unique()[0]
sample_engine_data = test_processed[test_processed['id'] == sample_id]

# Realizar predicción
predicted_rul = infer.predict(sample_engine_data)

print(f"Motor ID: {sample_id}")
print(f"RUL Predicho por LSTM: {predicted_rul:.2f} ciclos")

Motor ID: 1
RUL Predicho por LSTM: 1.73 ciclos
