In [2]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error

# 데이터 불러오기'
train_df = pd.read_csv('C:/Users/Administrator/RUL2/cmapss/train_FD001.txt', sep=" ", header=None)
train_df.drop(train_df.columns[[26, 27]], axis=1, inplace=True)  # 불필요한 열 제거

# 열 이름 설정
columns = ['unit_number', 'time_in_cycles', 'operational_setting_1', 'operational_setting_2', 
           'operational_setting_3'] + [f'sensor_measurement_{i}' for i in range(1, 22)]
train_df.columns = columns

# 데이터 정규화
scaler = MinMaxScaler()
train_df.iloc[:, 2:] = scaler.fit_transform(train_df.iloc[:, 2:])

# RUL 데이터 불러오기
rul_df = pd.read_csv('C:/Users/Administrator/RUL2/cmapss/RUL_FD001.txt', header=None, names=['RUL'])

# 시퀀스 데이터 생성 함수
def create_sequences(df, sequence_length=50):
    x, y = [], []
    data = df.values
    for i in range(len(data) - sequence_length):
        x.append(data[i:i+sequence_length, 2:])  # 센서 데이터
        y.append(data[i+sequence_length, 2])  # 다음 시간의 첫번째 센서 데이터
    return np.array(x), np.array(y)

# 각 엔진 유닛별로 데이터 분리 및 시퀀스 생성
train_x, train_y = [], []
for unit_number in train_df['unit_number'].unique():
    unit_data = train_df[train_df['unit_number'] == unit_number]
    x, y = create_sequences(unit_data)
    train_x.extend(x)
    train_y.extend(y)

train_x = np.array(train_x)
train_y = np.array(train_y)

# LSTM 모델 정의
model = Sequential([
    LSTM(50, input_shape=(train_x.shape[1], train_x.shape[2]), return_sequences=True),
    LSTM(25),
    Dense(1)
])

# 모델 컴파일
model.compile(optimizer='adam', loss='mean_squared_error')

# 모델 요약 출력
model.summary()

# 모델 훈련
history = model.fit(train_x, train_y, epochs=5, batch_size=32, validation_split=0.1, verbose=1)

# 테스트 데이터 로드 및 전처리 (위와 유사)
test_df = pd.read_csv('C:/Users/Administrator/RUL2/cmapss/test_FD001.txt', sep=" ", header=None)
test_df.drop(test_df.columns[[26, 27]], axis=1, inplace=True)
test_df.columns = columns
test_df.iloc[:, 2:] = scaler.transform(test_df.iloc[:, 2:])

# 테스트 데이터 예측
test_x, _ = create_sequences(test_df)
predicted_rul = model.predict(test_x)

# 예측된 RUL과 실제 RUL 비교 (MSE 계산)
mse = mean_squared_error(rul_df['RUL'], predicted_rul.flatten())

mse


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 50, 50)            15000     
                                                                 
 lstm_1 (LSTM)               (None, 25)                7600      
                                                                 
 dense (Dense)               (None, 1)                 26        
                                                                 
Total params: 22,626
Trainable params: 22,626
Non-trainable params: 0
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


ValueError: Found input variables with inconsistent numbers of samples: [100, 13046]