In [1]:
# 전체 실험 자동화 워크플로우 
import numpy as np
import pandas as pd
import os
import joblib
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from scipy.signal import savgol_filter, butter, filtfilt, medfilt, gaussian
import random

# 실험 옵션 정의
scaler_options = {
    'StandardScaler': StandardScaler(),
    'MinMaxScaler': MinMaxScaler(),
    'RobustScaler': RobustScaler()
}
filter_options = {
    'None': lambda x: x,
    'MovingAvg': lambda x: np.convolve(x, np.ones(5)/5, mode='same'),
    'Savgol': lambda x: savgol_filter(x, window_length=11, polyorder=2),
    'Butterworth': lambda x: filtfilt(*butter(3, 0.1), x),
    'Median': lambda x: medfilt(x, kernel_size=5),
    'Gaussian': lambda x: np.convolve(x, gaussian(11, 2)/np.sum(gaussian(11,2)), mode='same')
}
model_options = {
    'LinearRegression': LinearRegression(),
    'RandomForest': RandomForestRegressor(n_estimators=50, random_state=42)
}
SEED = 42
random.seed(SEED)
np.random.seed(SEED)

# 데이터 불러오기
train = pd.read_csv('../CMaps/Data/train_FD001_with_RUL.csv')
train = train.fillna(train.mean())
train['RUL'] = train['RUL'].clip(upper=125)

# 실험 결과 저장
results = []
log_dir = '../experiment_logs'
os.makedirs(log_dir, exist_ok=True)

kf = KFold(n_splits=3, shuffle=True, random_state=SEED)

for scaler_name, scaler in scaler_options.items():
    for filter_name, filter_func in filter_options.items():
        for model_name, model in model_options.items():
            for fold, (train_idx, val_idx) in enumerate(kf.split(train)):
                # 데이터 분리
                train_fold = train.iloc[train_idx]
                val_fold = train.iloc[val_idx]
                # feature/target 분리
                feature_cols = [c for c in train.columns if c not in ['unit_id', 'time_cycles', 'RUL']]
                X_train = train_fold[feature_cols].values
                X_val = val_fold[feature_cols].values
                y_train = train_fold['RUL'].values
                y_val = val_fold['RUL'].values
                # Sensor 2 필터 적용 예시 (전체 feature 적용은 커스텀 필요)
                X_train[:,0] = filter_func(X_train[:,0])
                X_val[:,0] = filter_func(X_val[:,0])
                # 스케일링
                X_train = scaler.fit_transform(X_train)
                X_val = scaler.transform(X_val)
                # 모델 학습
                model.fit(X_train, y_train)
                y_pred = model.predict(X_val)
                rmse = np.sqrt(mean_squared_error(y_val, y_pred))
                mae = mean_absolute_error(y_val, y_pred)
                # 결과 저장
                results.append({
                    'scaler': scaler_name,
                    'filter': filter_name,
                    'model': model_name,
                    'fold': fold,
                    'rmse': rmse,
                    'mae': mae,
                    'seed': SEED
                })
                # 모델 저장
                model_path = f"../models/{model_name.lower()}_{scaler_name.lower()}_{filter_name.lower()}_fold{fold}.pkl"
                joblib.dump(model, model_path)
                # 로그 저장
                with open(os.path.join(log_dir, 'experiment_log.txt'), 'a') as f:
                    f.write(f"{model_path}, RMSE: {rmse:.4f}, MAE: {mae:.4f}\n")

# 결과 csv 저장
results_df = pd.DataFrame(results)
results_df.to_csv(os.path.join(log_dir, 'experiment_results.csv'), index=False)
print('전체 실험 완료 및 결과 저장!')

전체 실험 완료 및 결과 저장!
