<a href="https://colab.research.google.com/github/UnpackJungHo/XRSimulator_Osaka/blob/Learning_AI/LSTM_TEST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error
from sklearn.model_selection import TimeSeriesSplit
from datetime import datetime, timedelta
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from xgboost import XGBRegressor
from prophet import Prophet
import warnings
warnings.filterwarnings('ignore')

class WeatherPredictor:
    def __init__(self):
        self.lstm_models = {}
        self.xgb_models = {}
        self.prophet_models = {}
        self.scalers = {}
        self.label_encoders = {}
        self.sequence_length = 24
        self.n_future = 6  # 예측 시간 단위

    def _get_time_weight(self, hour):
        """시간대별 가중치 계산"""
        if 6 <= hour < 12:  # 아침
            return 1.1
        elif 12 <= hour < 18:  # 오후
            return 1.05
        elif 18 <= hour < 24:  # 저녁
            return 0.95
        else:  # 새벽
            return 0.9

    def _get_season_weight(self, month):
        """계절별 가중치 계산"""
        if 3 <= month < 6:  # 봄
            return 1.05
        elif 6 <= month < 9:  # 여름
            return 1.1
        elif 9 <= month < 12:  # 가을
            return 1.0
        else:  # 겨울
            return 0.95

    def load_and_preprocess_data(self, training=True):
        """데이터 로드 및 전처리"""
        try:
            if training:
                df_2020 = pd.read_excel('2020_weather.xlsx', dtype={'DateTime(YYYYMMDDHHMI)': str})
                df_2021 = pd.read_excel('2021_weather.xlsx', dtype={'DateTime(YYYYMMDDHHMI)': str})
                df_2022 = pd.read_excel('2022_weather.xlsx', dtype={'DateTime(YYYYMMDDHHMI)': str})
                print("학습 데이터 로드 완료 (2020-2022)")
                df = pd.concat([df_2020, df_2021, df_2022], ignore_index=True)
            else:
                df = pd.read_excel('2023_weather.xlsx', dtype={'DateTime(YYYYMMDDHHMI)': str})
                print("검증 데이터 로드 완료 (2023)")

            df = self._basic_preprocessing(df)
            df = self._create_advanced_features(df)
            df = self._handle_outliers(df)
            df = self._handle_missing_values(df)

            return df

        except Exception as e:
            print("데이터 로드 중 오류 발생:", e)
            raise e

    def _basic_preprocessing(self, df):
        """기본 전처리 작업"""
        try:
            # DateTime 변환
            df['DateTime(YYYYMMDDHHMI)'] = pd.to_datetime(df['DateTime(YYYYMMDDHHMI)'],
                                                        format='%Y%m%d%H%M')
            df = df.set_index('DateTime(YYYYMMDDHHMI)')

            # 시간 관련 특성 추가
            df['hour'] = df.index.hour
            df['month'] = df.index.month
            df['day'] = df.index.day
            df['dayofweek'] = df.index.dayofweek
            df['dayofyear'] = df.index.dayofyear
            df['is_weekend'] = df['dayofweek'].isin([5, 6]).astype(int)

            # 계절성 특성 추가
            df['year_sin'] = np.sin(2 * np.pi * df['dayofyear']/365)
            df['year_cos'] = np.cos(2 * np.pi * df['dayofyear']/365)
            df['day_sin'] = np.sin(2 * np.pi * df['hour']/24)
            df['day_cos'] = np.cos(2 * np.pi * df['hour']/24)
            df['month_sin'] = np.sin(2 * np.pi * df['month']/12)
            df['month_cos'] = np.cos(2 * np.pi * df['month']/12)

            # 계절 추가
            df['season'] = pd.cut(df['month'],
                                bins=[0,3,6,9,12],
                                labels=['winter','spring','summer','fall'])

            # 범주형 변수 인코딩
            categorical_columns = ['WW', 'CT', 'season']
            for col in categorical_columns:
                if df[col].dtype == 'object' or df[col].dtype.name == 'category':
                    if col not in self.label_encoders:
                        self.label_encoders[col] = LabelEncoder()
                        self.label_encoders[col].fit(df[col].astype(str))

                    if col in ['WW', 'CT']:  # season은 고정된 카테고리이므로 제외
                        unique_labels = set(df[col].astype(str).unique())
                        known_labels = set(self.label_encoders[col].classes_)
                        new_labels = unique_labels - known_labels

                        if new_labels:
                            print(f"\n{col}에서 발견된 새로운 레이블: {new_labels}")
                            for new_label in new_labels:
                                df.loc[df[col].astype(str) == new_label, col] = self.label_encoders[col].classes_[0]

                    df[col] = self.label_encoders[col].transform(df[col].astype(str))

            return df

        except Exception as e:
            print("기본 전처리 중 오류 발생:", e)
            raise e

    def _create_advanced_features(self, df):
        """고급 특성 생성"""
        # 시계열 특성
        for col in ['TA', 'WS', 'HM', 'RN', 'WD']:
            # 이동평균 (다양한 윈도우 크기)
            windows = [1, 2, 3, 6, 12, 24]
            for w in windows:
                df[f'{col}_MA{w}'] = df[col].rolling(window=w).mean()

            # 변화율
            df[f'{col}_change'] = df[col].diff()
            df[f'{col}_change_rate'] = df[col].pct_change()
            df[f'{col}_change_acc'] = df[col].diff().diff()

            # 시간별/일별 통계
            df[f'{col}_hour_mean'] = df.groupby('hour')[col].transform('mean')
            df[f'{col}_hour_std'] = df.groupby('hour')[col].transform('std')

            # 시간대별 평균
            for period in ['morning', 'afternoon', 'evening', 'night']:
                if period == 'morning':
                    mask = (df['hour'] >= 6) & (df['hour'] < 12)
                elif period == 'afternoon':
                    mask = (df['hour'] >= 12) & (df['hour'] < 18)
                elif period == 'evening':
                    mask = (df['hour'] >= 18) & (df['hour'] < 24)
                else:  # night
                    mask = (df['hour'] < 6)
                df[f'{col}_{period}_mean'] = df[col][mask].mean()

            # 계절별 통계
            df[f'{col}_season_mean'] = df.groupby('season')[col].transform('mean')
            df[f'{col}_season_std'] = df.groupby('season')[col].transform('std')

        return df

    def _handle_outliers(self, df):
        """이상치 처리"""
        numerical_columns = ['TA', 'WS', 'HM', 'RN']
        for col in numerical_columns:
            Q1 = df[col].quantile(0.25)
            Q3 = df[col].quantile(0.75)
            IQR = Q3 - Q1
            lower_bound = Q1 - 1.5 * IQR
            upper_bound = Q3 + 1.5 * IQR
            df[col] = df[col].clip(lower_bound, upper_bound)
        return df

    def _handle_missing_values(self, df):
        """결측치 처리"""
        numerical_columns = ['TA', 'WS', 'HM', 'RN']
        for col in numerical_columns:
            # 시간대별, 계절별 평균으로 결측치 처리
            df[col] = df.groupby(['hour', 'season'])[col].transform(
                lambda x: x.fillna(x.mean())
            )
        df.fillna(df.mean(), inplace=True)
        return df

    def _prepare_data(self, df, target):
        """데이터 준비 및 스케일링"""
        try:
            # 인덱스 중복 확인 및 처리
            if df.index.duplicated().any():
                print("중복된 인덱스 발견. 첫 번째 값만 유지합니다.")
                df = df[~df.index.duplicated(keep='first')]

            # 특성 선택
            feature_columns = [col for col in df.columns
                              if col not in ['DateTime(YYYYMMDDHHMI)']]

            # 무한대 값 처리
            data = df[feature_columns].copy()
            data = data.replace([np.inf, -np.inf], np.nan)

            # 이상치 처리
            for col in data.columns:
                if data[col].dtype in ['int64', 'float64']:
                    Q1 = data[col].quantile(0.25)
                    Q3 = data[col].quantile(0.75)
                    IQR = Q3 - Q1
                    lower_bound = Q1 - 1.5 * IQR
                    upper_bound = Q3 + 1.5 * IQR
                    data[col] = data[col].clip(lower_bound, upper_bound)

            # 결측치 처리
            for col in data.columns:
                if data[col].dtype in ['int64', 'float64']:
                    data[col] = data.groupby(['hour', 'season'])[col].transform(
                        lambda x: x.fillna(x.mean())
                    )
            data = data.fillna(data.mean())

            # 각 변수별 개별 스케일링
            scaled_data = pd.DataFrame(index=data.index)  # 인덱스 유지
            scalers = {}

            for col in data.columns:
                if data[col].dtype in ['int64', 'float64']:
                    scaler = MinMaxScaler()
                    scaled_values = scaler.fit_transform(data[col].values.reshape(-1, 1)).flatten()
                    scaled_data[col] = scaled_values
                    scalers[col] = scaler
                else:
                    scaled_data[col] = data[col]

            self.scalers[target] = scalers
            return scaled_data

        except Exception as e:
            print(f"데이터 준비 중 오류 발생: {str(e)}")
            print(f"문제가 발생한 데이터 형태:")
            print(df.info())
            print("\n데이터 샘플:")
            print(df.head())
            raise e

    def create_sequences(self, data, target_col):
        """시계열 시퀀스 생성 (직접 예측 방식)"""
        X, y = [], []
        for i in range(len(data) - self.sequence_length - self.n_future + 1):
            X.append(data[i:(i + self.sequence_length)])
            y.append(data[target_col].iloc[i + self.sequence_length:i + self.sequence_length + self.n_future])
        return np.array(X), np.array(y)

    def create_lstm_model(self, input_shape):
        """단순화된 LSTM 모델 생성"""
        model = Sequential([
            LSTM(64, input_shape=input_shape),
            Dropout(0.2),
            Dense(32, activation='relu'),
            Dense(self.n_future)
        ])
        model.compile(optimizer='adam', loss='mse')
        return model

    def prepare_prophet_data(self, df, target):
        """Prophet용 데이터 준비"""
        prophet_df = pd.DataFrame({
            'ds': df.index,
            'y': df[target]
        })
        return prophet_df

    def train_prophet_model(self, df, target):
        """Prophet 모델 학습"""
        prophet_df = self.prepare_prophet_data(df, target)
        model = Prophet(
            yearly_seasonality=True,
            weekly_seasonality=True,
            daily_seasonality=True,
            changepoint_prior_scale=0.05
        )
        model.fit(prophet_df)
        return model

    def train_models(self, df):
        """모델 학습"""
        target_columns = ['TA', 'RN', 'WS', 'HM', 'WD']

        # 기본 전처리
        df = df.copy()
        df = df.replace([np.inf, -np.inf], np.nan)

        # 시계열 교차 검증 설정
        tscv = TimeSeriesSplit(n_splits=5)

        for target in target_columns:
            print(f"\n{target} 모델 학습 중...")

            try:
                # 데이터 준비
                scaled_data = self._prepare_data(df, target)

                # 데이터 검증
                if scaled_data.isnull().any().any():
                    print(f"경고: {target}에 대한 전처리 후에도 결측치가 존재합니다.")
                    print("결측치가 있는 열:", scaled_data.columns[scaled_data.isnull().any()].tolist())
                    scaled_data = scaled_data.fillna(scaled_data.mean())

                if np.isinf(scaled_data.values).any():
                    print(f"경고: {target}에 대한 전처리 후에도 무한대 값이 존재합니다.")
                    scaled_data = scaled_data.replace([np.inf, -np.inf], 0)

                # LSTM 데이터 준비
                X, y = self.create_sequences(scaled_data, target)

                # LSTM 모델 학습
                lstm_model = self.create_lstm_model((self.sequence_length, X.shape[2]))

                # 교차 검증을 통한 LSTM 학습
                for fold, (train_idx, val_idx) in enumerate(tscv.split(X)):
                    print(f"\nFold {fold + 1}/5")
                    X_train, X_val = X[train_idx], X[val_idx]
                    y_train, y_val = y[train_idx], y[val_idx]

                    # Early Stopping 설정
                    early_stopping = tf.keras.callbacks.EarlyStopping(
                        monitor='val_loss',
                        patience=5,
                        restore_best_weights=True
                    )

                    # LSTM 모델 학습
                    lstm_model.fit(
                        X_train, y_train,
                        validation_data=(X_val, y_val),
                        epochs=50,
                        batch_size=32,
                        callbacks=[early_stopping],
                        verbose=1
                    )

                self.lstm_models[target] = lstm_model

                # XGBoost 모델 학습
                xgb_models = []
                for i in range(self.n_future):
                    xgb_model = XGBRegressor(
                        n_estimators=100,
                        learning_rate=0.1,
                        max_depth=5,
                        objective='reg:squarederror'
                    )

                    # i+1 시점 후의 값을 예측하도록 학습
                    X_xgb = scaled_data.values[:-self.n_future]
                    y_xgb = scaled_data[target].values[i+1:-(self.n_future-i-1) if self.n_future-i-1 > 0 else None]

                    xgb_model.fit(X_xgb, y_xgb)
                    xgb_models.append(xgb_model)

                self.xgb_models[target] = xgb_models

                # Prophet 모델 학습
                prophet_model = self.train_prophet_model(df, target)
                self.prophet_models[target] = prophet_model

                # 모델 성능 평가
                self._evaluate_models(scaled_data, target)

            except Exception as e:
                print(f"\n{target} 모델 학습 중 오류 발생:")
                print(f"오류 메시지: {str(e)}")
    def _evaluate_models(self, data, target):
        """모델 성능 평가"""
        X, y = self.create_sequences(data, target)
        test_size = len(X) // 5  # 20%를 테스트 셋으로 사용

        X_test = X[-test_size:]
        y_test = y[-test_size:]

        try:
            # LSTM 예측
            lstm_pred = self.lstm_models[target].predict(X_test)

            # XGBoost 예측 (n_future 시점에 대한 예측으로 수정)
            xgb_pred = np.zeros((len(X_test), self.n_future))
            for i in range(self.n_future):
                xgb_pred[:, i] = self.xgb_models[target][i].predict(X_test[:, -1, :])

            # Prophet 예측
            prophet_pred = np.zeros((len(X_test), self.n_future))
            future_dates = pd.date_range(
                start=data.index[-1],
                periods=self.n_future + 1,
                freq='H'
            )[1:]

            for i in range(len(X_test)):
                future_df = pd.DataFrame({'ds': future_dates})
                prophet_forecast = self.prophet_models[target].predict(future_df)
                prophet_pred[i] = prophet_forecast['yhat'].values

            # 앙상블 예측 (가중 평균)
            ensemble_pred = (0.4 * lstm_pred +
                           0.4 * xgb_pred +
                           0.2 * prophet_pred)

            # 성능 평가
            mse = mean_squared_error(y_test, ensemble_pred)
            mae = mean_absolute_error(y_test, ensemble_pred)
            r2 = r2_score(y_test.reshape(-1), ensemble_pred.reshape(-1))

            print(f"\n{target} 앙상블 모델 성능:")
            print(f"MSE: {mse:.4f}")
            print(f"MAE: {mae:.4f}")
            print(f"R2 Score: {r2:.4f}")

        except Exception as e:
            print(f"\n{target} 모델 평가 중 오류 발생:")
            print(f"오류 메시지: {str(e)}")
            print(f"LSTM 예측 shape: {lstm_pred.shape}")
            print(f"XGBoost 예측 shape: {xgb_pred.shape}")
            print(f"테스트 데이터 shape: {y_test.shape}")

    def predict_and_evaluate(self, input_time, validation_data):
        """예측 수행 및 평가"""
        input_datetime = pd.to_datetime(input_time, format='%Y/%m/%d/%H:%M')

        # 입력 시퀀스 준비
        past_data = validation_data[validation_data.index <= input_datetime].tail(self.sequence_length)

        if len(past_data) < self.sequence_length:
            print(f"예측을 위해 최소 {self.sequence_length}시간의 데이터가 필요합니다.")
            return None

        predictions = []
        target_columns = ['TA', 'RN', 'WS', 'HM', 'WD']

        for hour in range(self.n_future):
            next_time = input_datetime + timedelta(hours=hour)
            hour_prediction = {'DateTime': next_time}

            for target in target_columns:
                # 데이터 준비
                scaled_data = self._prepare_data(past_data, target)

                # LSTM 예측
                X_lstm = scaled_data.values.reshape(1, self.sequence_length, -1)
                lstm_pred = self.lstm_models[target].predict(X_lstm, verbose=0)[0][hour]

                # XGBoost 예측
                xgb_pred = self.xgb_models[target][hour].predict(scaled_data.iloc[-1:].values)[0]

                # Prophet 예측
                future_df = pd.DataFrame({'ds': [next_time]})
                prophet_pred = self.prophet_models[target].predict(future_df)['yhat'].values[0]

                # 앙상블 예측 (가중 평균)
                ensemble_pred = (0.4 * lstm_pred +
                               0.4 * xgb_pred +
                               0.2 * prophet_pred)

                # 시간대/계절 가중치 적용
                time_weight = self._get_time_weight(next_time.hour)
                season_weight = self._get_season_weight(next_time.month)
                final_pred = ensemble_pred * time_weight * season_weight

                # 역스케일링
                scaler = self.scalers[target][target]
                final_pred = scaler.inverse_transform([[final_pred]])[0][0]

                hour_prediction[f'{target}_pred'] = final_pred

                # 실제값 찾기
                actual_row = validation_data[validation_data.index == next_time]
                if not actual_row.empty:
                    hour_prediction[f'{target}_actual'] = actual_row[target].iloc[0]
                else:
                    hour_prediction[f'{target}_actual'] = None

            predictions.append(hour_prediction)

        results_df = pd.DataFrame(predictions)

        # 예측 성능 평가
        print("\n예측 성능 평가:")
        for target in target_columns:
            mask = results_df[f'{target}_actual'].notna()
            if mask.any():
                mae = mean_absolute_error(
                    results_df[mask][f'{target}_actual'],
                    results_df[mask][f'{target}_pred']
                )
                mse = mean_squared_error(
                    results_df[mask][f'{target}_actual'],
                    results_df[mask][f'{target}_pred']
                )
                r2 = r2_score(
                    results_df[mask][f'{target}_actual'],
                    results_df[mask][f'{target}_pred']
                )

                print(f"\n{target} 예측 성능:")
                print(f"MAE: {mae:.4f}")
                print(f"MSE: {mse:.4f}")
                print(f"R2 Score: {r2:.4f}")

        return results_df

def main():
    # 예측기 인스턴스 생성
    predictor = WeatherPredictor()

    # 학습 데이터 로드 및 전처리
    print("학습 데이터 로드 및 전처리 중...")
    train_df = predictor.load_and_preprocess_data(training=True)

    # 검증 데이터 로드
    print("\n검증 데이터 로드 중...")
    validation_df = predictor.load_and_preprocess_data(training=False)

    # 모델 학습
    print("\n모델 학습 시작...")
    predictor.train_models(train_df)

    while True:
        # 사용자로부터 날짜 입력 받기
        input_time = input("\n예측할 날짜와 시간을 입력하세요 (형식: YYYY/MM/DD/HH:MM, 종료는 'q'): ")

        if input_time.lower() == 'q':
            break

        try:
            # 예측 수행 및 평가
            results = predictor.predict_and_evaluate(input_time, validation_df)

            if results is not None:
                print("\n예측 결과:")
                pd.set_option('display.max_columns', None)
                print(results[['DateTime'] +
                            [col for col in results.columns if 'pred' in col or 'actual' in col]])

        except Exception as e:
            print(f"오류 발생: {str(e)}")
            print("올바른 형식으로 다시 입력해주세요.")

if __name__ == "__main__":
    main()

학습 데이터 로드 및 전처리 중...
학습 데이터 로드 완료 (2020-2022)

검증 데이터 로드 중...
검증 데이터 로드 완료 (2023)

WW에서 발견된 새로운 레이블: {'19060502', '601', '400601', '4240', '190501', '19060201', '160201', '1602', '190602', '1601'}

CT에서 발견된 새로운 레이블: {'CuCs', 'CuAcCi'}

모델 학습 시작...

TA 모델 학습 중...
중복된 인덱스 발견. 첫 번째 값만 유지합니다.

Fold 1/5
Epoch 1/50
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 13ms/step - loss: 0.0685 - val_loss: 0.0789
Epoch 2/50
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 12ms/step - loss: 0.0087 - val_loss: 0.0738
Epoch 3/50
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - loss: 0.0061 - val_loss: 0.0691
Epoch 4/50
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.0049 - val_loss: 0.0616
Epoch 5/50
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.0040 - val_loss: 0.0653
Epoch 6/50
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.0

DEBUG:cmdstanpy:input tempfile: /tmp/tmp4gj8q6rn/wp5rwv5k.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmp4gj8q6rn/glvw_66b.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=5899', 'data', 'file=/tmp/tmp4gj8q6rn/wp5rwv5k.json', 'init=/tmp/tmp4gj8q6rn/glvw_66b.json', 'output', 'file=/tmp/tmp4gj8q6rn/prophet_modelt1j97svj/prophet_model-20241227120740.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
12:07:40 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
12:07:53 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step

TA 앙상블 모델 성능:
MSE: 2.9970
MAE: 1.7275
R2 Score: -77.3652

RN 모델 학습 중...
중복된 인덱스 발견. 첫 번째 값만 유지합니다.

Fold 1/5
Epoch 1/50
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 0.0294 - val_loss: 6.4845e-04
Epoch 2/50
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 8ms/step - loss: 6.5405e-04 - val_loss: 5.0082e-05
Epoch 3/50
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 8.9869e-05 - val_loss: 8.0801e-05
Epoch 4/50
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 3.7209e-05 - val_loss: 1.2997e-04
Epoch 5/50
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 1.4191e-05 - val_loss: 1.0558e-04
Epoch 6/50
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 9.1838e-06 - val_loss: 8.6743e-05
Epoch 7/50
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━

DEBUG:cmdstanpy:input tempfile: /tmp/tmp4gj8q6rn/97of49id.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmp4gj8q6rn/ftwa0nke.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=90551', 'data', 'file=/tmp/tmp4gj8q6rn/97of49id.json', 'init=/tmp/tmp4gj8q6rn/ftwa0nke.json', 'output', 'file=/tmp/tmp4gj8q6rn/prophet_model2mj1lqjh/prophet_model-20241227122345.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
12:23:45 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
12:23:51 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

WS 앙상블 모델 성능:
MSE: 0.0864
MAE: 0.2691
R2 Score: -1.7896

HM 모델 학습 중...
중복된 인덱스 발견. 첫 번째 값만 유지합니다.

Fold 1/5
Epoch 1/50
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 12ms/step - loss: 0.2466 - val_loss: 0.0442
Epoch 2/50
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.0482 - val_loss: 0.0334
Epoch 3/50
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.0249 - val_loss: 0.0278
Epoch 4/50
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.0195 - val_loss: 0.0129
Epoch 5/50
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.0166 - val_loss: 0.0119
Epoch 6/50
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.0150 - val_loss: 0.0112
Epoch 7/50
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0

DEBUG:cmdstanpy:input tempfile: /tmp/tmp4gj8q6rn/3l8cpv0s.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmp4gj8q6rn/s3hut68b.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=1823', 'data', 'file=/tmp/tmp4gj8q6rn/3l8cpv0s.json', 'init=/tmp/tmp4gj8q6rn/s3hut68b.json', 'output', 'file=/tmp/tmp4gj8q6rn/prophet_modelb88xwdht/prophet_model-20241227123053.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
12:30:53 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
12:31:05 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step

HM 앙상블 모델 성능:
MSE: 187.2769
MAE: 13.6812
R2 Score: -5239.7957

WD 모델 학습 중...
중복된 인덱스 발견. 첫 번째 값만 유지합니다.

Fold 1/5
Epoch 1/50
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 9ms/step - loss: 0.1739 - val_loss: 0.0571
Epoch 2/50
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.0561 - val_loss: 0.0517
Epoch 3/50
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.0471 - val_loss: 0.0518
Epoch 4/50
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.0437 - val_loss: 0.0480
Epoch 5/50
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.0425 - val_loss: 0.0451
Epoch 6/50
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - loss: 0.0393 - val_loss: 0.0423
Epoch 7/50
[1m137/137[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - lo

DEBUG:cmdstanpy:input tempfile: /tmp/tmp4gj8q6rn/fwwqzduw.json
DEBUG:cmdstanpy:input tempfile: /tmp/tmp4gj8q6rn/xqs9h12d.json
DEBUG:cmdstanpy:idx 0
DEBUG:cmdstanpy:running CmdStan, num_threads: None
DEBUG:cmdstanpy:CmdStan args: ['/usr/local/lib/python3.10/dist-packages/prophet/stan_model/prophet_model.bin', 'random', 'seed=28951', 'data', 'file=/tmp/tmp4gj8q6rn/fwwqzduw.json', 'init=/tmp/tmp4gj8q6rn/xqs9h12d.json', 'output', 'file=/tmp/tmp4gj8q6rn/prophet_modeln5hl8b43/prophet_model-20241227123934.csv', 'method=optimize', 'algorithm=lbfgs', 'iter=10000']
12:39:34 - cmdstanpy - INFO - Chain [1] start processing
INFO:cmdstanpy:Chain [1] start processing
12:39:43 - cmdstanpy - INFO - Chain [1] done processing
INFO:cmdstanpy:Chain [1] done processing


[1m165/165[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

WD 앙상블 모델 성능:
MSE: 14.2862
MAE: 3.7657
R2 Score: -239.8635

예측할 날짜와 시간을 입력하세요 (형식: YYYY/MM/DD/HH:MM, 종료는 'q'): 2023/01/05/03:00

예측 성능 평가:

TA 예측 성능:
MAE: 10.8847
MSE: 119.4822
R2 Score: -298.5376

RN 예측 성능:
MAE: 1.7100
MSE: 2.9533
R2 Score: 0.0000

WS 예측 성능:
MAE: 0.7188
MSE: 0.6474
R2 Score: -1.5417

HM 예측 성능:
MAE: 360.7752
MSE: 131722.4686
R2 Score: -16637.6276

WD 예측 성능:
MAE: 123.3387
MSE: 15425.8035
R2 Score: -97.7954

예측 결과:
             DateTime    TA_pred  TA_actual  RN_pred  RN_actual   WS_pred  \
0 2023-01-05 03:00:00 -14.606250       -3.7  -10.539       -9.0  2.449450   
1 2023-01-05 04:00:00 -14.880693       -4.3  -10.539       -9.0  2.331902   
2 2023-01-05 05:00:00 -15.163409       -4.7  -10.539       -9.0  2.368934   
3 2023-01-05 06:00:00 -17.598326       -5.0  -10.881       -9.0  2.929963   
4 2023-01-05 07:00:00 -16.755048       -5.3  -10.881       -9.0  3.079317   
5 2023-01-05 08:00:00 -14.90

KeyboardInterrupt: Interrupted by user