In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score


In [2]:
def obtain_RUL(number):
    nombres_columnas = ['RUL','unit']
    df = pd.read_csv(rf"./CMAPSSData/RUL_FD00{number}.txt",sep = " ", header = None, index_col = None)
    df.columns =   nombres_columnas
    df['unit'] = range(1, len(df) + 1)
    return df

In [3]:
def create_test_df(number):
    nombres_columnas = ['unit', 'cycle', 'op_setting_1', 'op_setting_2', 'op_setting_3']
    nombres_columnas += [f'sensor_{i}' for i in range(1, 24)]
    df = pd.read_csv(rf"./CMAPSSData/test_FD00{number}.txt",sep = " ", header = None, index_col = None)
    df.columns =   nombres_columnas
    df.dropna(how='all', axis=1, inplace = True)
    rul_df = obtain_RUL(number)
    max_cycles = df.groupby('unit')['cycle'].max().reset_index()
    max_cycles.columns = ['unit', 'max_cycle']
    temp_df = pd.merge(df[['unit', 'cycle']], max_cycles, on='unit')
    temp_df = pd.merge(temp_df, rul_df, on='unit')
    temp_df['RUL'] = temp_df['RUL'] + (temp_df['max_cycle'] - temp_df['cycle'])
    df = pd.merge(df, temp_df[['unit', 'cycle', 'RUL']], on=['unit', 'cycle'])
    return df

In [4]:
test_data = create_test_df(4)
test_data

Unnamed: 0,unit,cycle,op_setting_1,op_setting_2,op_setting_3,sensor_1,sensor_2,sensor_3,sensor_4,sensor_5,...,sensor_13,sensor_14,sensor_15,sensor_16,sensor_17,sensor_18,sensor_19,sensor_20,sensor_21,RUL
0,1,1,20.0072,0.7000,100.0,491.19,606.67,1481.04,1227.81,9.35,...,2387.78,8048.98,9.2229,0.02,362,2324,100.00,24.31,14.7007,251
1,1,2,24.9984,0.6200,60.0,462.54,536.22,1256.17,1031.48,7.05,...,2028.09,7863.46,10.8632,0.02,306,1915,84.93,14.36,8.5748,250
2,1,3,42.0000,0.8420,100.0,445.00,549.23,1340.13,1105.88,3.91,...,2387.95,8071.13,9.3960,0.02,328,2212,100.00,10.39,6.4365,249
3,1,4,42.0035,0.8402,100.0,445.00,549.19,1339.70,1107.26,3.91,...,2387.90,8078.89,9.3594,0.02,328,2212,100.00,10.56,6.2367,248
4,1,5,35.0079,0.8400,100.0,449.44,555.10,1353.04,1117.80,5.48,...,2387.87,8057.83,9.3030,0.02,333,2223,100.00,14.85,8.9326,247
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
41209,248,277,41.9991,0.8401,100.0,445.00,550.30,1364.40,1129.17,3.91,...,2388.50,8112.61,9.4427,0.02,331,2212,100.00,10.53,6.2620,30
41210,248,278,20.0026,0.7005,100.0,491.19,608.00,1494.75,1260.88,9.35,...,2388.33,8086.83,9.2772,0.02,366,2324,100.00,24.33,14.6486,29
41211,248,279,34.9988,0.8413,100.0,449.44,555.92,1370.65,1130.97,5.48,...,2388.64,8100.84,9.3982,0.02,336,2223,100.00,14.69,8.8389,28
41212,248,280,20.0027,0.7000,100.0,491.19,608.19,1489.11,1256.25,9.35,...,2388.37,8085.24,9.2727,0.03,366,2324,100.00,24.44,14.6887,27


In [5]:
mean_rul = np.mean(test_data['RUL'])
mean_rul

181.8995972242442

In [6]:
def obtain_RUL(number):
    nombres_columnas = ['RUL','unit']
    df = pd.read_csv(rf"./CMAPSSData/RUL_FD00{number}.txt",sep = " ", header = None, index_col = None)
    df.columns =   nombres_columnas
    df['unit'] = range(1, len(df) + 1)
    return df

In [7]:
rul_test = obtain_RUL(4)

In [8]:
y_pred_naive = np.full_like(rul_test, fill_value=mean_rul)

In [9]:
rmse_naive = np.sqrt(mean_squared_error(rul_test, y_pred_naive))
mae_naive = mean_absolute_error(rul_test, y_pred_naive)
r2_naive = r2_score(rul_test, y_pred_naive)
print(f"Modelo Naíf (predice media del RUL)")
print(f"Media usada como predicción: {mean_rul:.2f}")
print(f"RMSE: {rmse_naive:.2f}")
print(f"MAE: {mae_naive:.2f}")
print(f"R²: {r2_naive:.4f}")

Modelo Naíf (predice media del RUL)
Media usada como predicción: 181.90
RMSE: 100.53
MAE: 84.94
R²: -1.8117
