In [None]:
# ============================================================
# MLP com Random Forest + Suavização Exponencial – FD001
# Com identificação de janelas por motor
# ============================================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

# ============================================================
# 1) Leitura dos dados
# ============================================================
base = "/content/CMAPSSData/"

col_names = ['unit', 'cycle'] + \
            [f'op_setting_{i}' for i in range(1, 4)] + \
            [f'sensor_{i}' for i in range(1, 22)]

train = pd.read_csv(base + "train_FD001.txt", sep=r"\s+", header=None, names=col_names)
test  = pd.read_csv(base + "test_FD001.txt",  sep=r"\s+", header=None, names=col_names)
rul   = pd.read_csv(base + "RUL_FD001.txt",   sep=r"\s+", header=None)

# ============================================================
# 2) Cálculo da RUL
# ============================================================
train['max_cycle'] = train.groupby('unit')['cycle'].transform('max')
train['RUL'] = train['max_cycle'] - train['cycle']
train.drop(columns='max_cycle', inplace=True)

test['RUL'] = 0
for i, unit in enumerate(test['unit'].unique()):
    final_cycle = test[test['unit'] == unit]['cycle'].max()
    test.loc[test['unit'] == unit, 'RUL'] = (
        rul.iloc[i, 0] + final_cycle - test.loc[test['unit'] == unit, 'cycle']
    )

# ============================================================
# 3) Suavização Exponencial α=0.3
# ============================================================
def exponential_smoothing(df, alpha=0.3):
    df_smoothed = df.copy()
    for unit in df['unit'].unique():
        for col in df.columns:
            if col.startswith('sensor_') or col.startswith('op_setting_'):
                values = df.loc[df['unit'] == unit, col]
                df_smoothed.loc[df['unit'] == unit, col] = values.ewm(alpha=alpha).mean().values
    return df_smoothed

train = exponential_smoothing(train, alpha=0.3)
test = exponential_smoothing(test, alpha=0.3)

# ============================================================
# 4) Seleção de atributos via Random Forest
# ============================================================
features = [c for c in train.columns if c not in ['unit', 'cycle', 'RUL']]
rf = RandomForestRegressor(n_estimators=100, random_state=42)
rf.fit(train[features], train['RUL'])

importances = pd.Series(rf.feature_importances_, index=features).sort_values(ascending=False)
top_features = importances.head(10).index.tolist()

# ============================================================
# 5) Normalização Min-Max
# ============================================================
scaler = MinMaxScaler()
train[top_features] = scaler.fit_transform(train[top_features])
test[top_features]  = scaler.transform(test[top_features])

# ============================================================
# 6) Função de janelas – AGORA com identificação de motor
# ============================================================
def make_windows(df, features, window_size=30):
    X, y, units = [], [], []
    for unit in df['unit'].unique():
        unit_df = df[df['unit'] == unit].reset_index(drop=True)
        for i in range(len(unit_df) - window_size + 1):
            window = unit_df.loc[i:i+window_size-1, features].values.flatten()
            target = unit_df.loc[i+window_size-1, 'RUL']
            X.append(window)
            y.append(target)
            units.append(unit)  # <- aqui está a diferença
    return np.array(X), np.array(y), np.array(units)

# Criar janelas com identificação de unidade
X_train, y_train, units_train = make_windows(train, top_features, window_size=30)
X_test,  y_test,  units_test  = make_windows(test,  top_features, window_size=30)

# ============================================================
# 7) Treino do MLP
# ============================================================
mlp = MLPRegressor(
    hidden_layer_sizes=(25, 25, 25, 25, 25, 25),
    activation='relu',
    solver='lbfgs',
    max_iter=2000,
    random_state=42
)

mlp.fit(X_train, y_train)
y_pred = mlp.predict(X_test)

# ============================================================
# 8) Métricas
# ============================================================
mae = mean_absolute_error(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

print(f"\nResultados do modelo MLP:")
print(f"MAE  = {mae:.2f} ciclos")
print(f"RMSE = {rmse:.2f} ciclos")
print(f"Top 10 atributos selecionados: {top_features}")

# ============================================================
# 9) Quantidade de janelas por motor (teste)
# ============================================================
unique, counts = np.unique(units_test, return_counts=True)
print("\nQuantidade de janelas por motor no conjunto de teste:")
for u, c in zip(unique, counts):
    print(f"Motor {u}: {c} janelas")

# ============================================================
# 10) Visualizar um motor específico
# ============================================================
motor_escolhido = 23  # Mude se quiser outro motor

indices = np.where(units_test == motor_escolhido)[0]
y_true_motor = y_test[indices]
y_pred_motor = y_pred[indices]

plt.figure(figsize=(10, 5))
plt.plot(y_true_motor, label="RUL real", marker="o")
plt.plot(y_pred_motor, label="RUL predita", marker="x")

plt.title(f"RUL real vs predita – MLP (todas as janelas do motor {motor_escolhido})")
plt.xlabel("Amostra (janelas consecutivas)")
plt.ylabel("Ciclos")
plt.grid(True)
plt.legend()
plt.tight_layout()
plt.show()


 391.66475926 391.77435736 391.52783885 391.67544358 392.08436142
 392.05854247 391.73652256 392.11927423 392.38529619 391.96772488
 391.9774397  391.98422357 391.98896422 391.69193638 391.78442927
 391.84913663 391.89441334 391.92609801 391.94827286 392.26383332
 392.78473222 392.8493168  391.99448244 392.29614742 391.60728767
 391.72510323 391.80757317 391.86530166 391.90571138 391.63399694
 391.74379815 391.82065885 391.87446126 391.91212292 391.93848606
 391.95694025 391.96985818 391.97890073 391.98523051 392.28966139
 392.50276299 392.35193408 392.24635386 392.1724477  392.12071339
 392.38449937 391.96914956 391.97840469 391.98488328 391.9894183
 391.69259281 391.78481497 391.24937048 391.47455933 391.93219153
 391.65253407 391.45677385 392.2197417  391.85381919 391.89767343
 391.6283714  392.03985998 392.02790199 392.01953139 392.01367197
 391.70957038 391.49669927 391.34768949 391.54338264 391.68036785
 391.17625749 391.72338025 391.80636617 392.46445632 392.32511942
 392.227583