## 딥러닝 시계열 예측: 실습 문제

### 기본 라이브러리 임포트

In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
import plotly.express as px
import plotly.graph_objects as go
import os
import warnings

warnings.filterwarnings('ignore')

---

### 연습 문제 1 & 2: AEP 에너지 소비량 데이터 로드 및 탐색

In [None]:
# 1. 데이터셋 로드
energy_url = 'https://storage.googleapis.com/mledu-datasets/pjm_hourly_est.csv'
df_energy = pd.read_csv(energy_url)

print("--- AEP 에너지 소비량 데이터셋 (처음 5행) ---")
print(df_energy.head())

# 2. 데이터 기본 정보 및 통계 요약 확인
print("\n--- 데이터프레임 정보 ---")
df_energy.info()

print("\n--- 통계 요약 ---")
print(df_energy.describe())

---

### 연습 문제 3 & 4: 시계열 데이터 전처리

In [None]:
# 3. AEP 에너지 데이터셋 전처리
df_energy['Datetime'] = pd.to_datetime(df_energy['Datetime'])
df_energy.set_index('Datetime', inplace=True)

# 2015-2016년 데이터 필터링
df_energy_filtered = df_energy['2015-01-01':'2016-12-31'].copy()

fig_energy = px.line(df_energy_filtered, y='AEP_MW', title='AEP Energy Consumption (2015-2016)')
fig_energy.show()

# 4. (가상) 서울시 대기오염 데이터 전처리
apple_url = 'https://raw.githubusercontent.com/guipsamora/pandas_exercises/master/09_Time_Series/Apple_Stock/appl_1980_2014.csv'
df_air_mock = pd.read_csv(apple_url)
df_air_mock['Date'] = pd.to_datetime(df_air_mock['Date'])
df_air_mock.set_index('Date', inplace=True)
df_air_mock_filtered = df_air_mock['2010':].copy()

fig_air = px.line(df_air_mock_filtered, y='Adj. Close', title='(Mock) Air Pollution Index since 2010')
fig_air.show()

---

### 연습 문제 5: 특성 공학

In [None]:
# 5. AEP 에너지 데이터셋 특성 공학
df_energy_eng = df_energy_filtered.copy()

day_of_week = df_energy_eng.index.dayofweek
day_of_year = df_energy_eng.index.dayofyear

df_energy_eng['DayOfWeek_sin'] = np.sin(day_of_week * (2 * np.pi / 7))
df_energy_eng['DayOfWeek_cos'] = np.cos(day_of_week * (2 * np.pi / 7))
df_energy_eng['DayOfYear_sin'] = np.sin(day_of_year * (2 * np.pi / 365.25))
df_energy_eng['DayOfYear_cos'] = np.cos(day_of_year * (2 * np.pi / 365.25))

print("특성 공학 후 데이터 샘플:")
print(df_energy_eng.head())

fig_dayofweek = px.line(df_energy_eng.iloc[:168], y=['DayOfWeek_sin', 'DayOfWeek_cos'], title='Weekly Cycle') # 168 hours = 7 days
fig_dayofweek.show()

---

### 연습 문제 6 & 7: 데이터 분할 및 정규화

In [None]:
# 6. 데이터 분할
n_energy = len(df_energy_eng)
train_energy_df = df_energy_eng[0:int(n_energy*0.8)]
val_energy_df = df_energy_eng[int(n_energy*0.8):int(n_energy*0.9)]
test_energy_df = df_energy_eng[int(n_energy*0.9):]

# 7. 정규화
train_energy_mean = train_energy_df.mean()
train_energy_std = train_energy_df.std()

train_energy_df = (train_energy_df - train_energy_mean) / train_energy_std
val_energy_df = (val_energy_df - train_energy_mean) / train_energy_std
test_energy_df = (test_energy_df - train_energy_mean) / train_energy_std

print("정규화된 훈련 데이터 샘플:")
print(train_energy_df.head())

# 8. 정규화된 데이터 분포 시각화
fig_hist = px.histogram(train_energy_df, x='AEP_MW', nbins=50, title='Normalized Energy Consumption Distribution')
fig_hist.show()

---

### 연습 문제 8 & 9: 윈도우 생성
*(WindowGenerator 클래스 코드는 튜토리얼 본문에 있으므로 여기서는 생략하고 바로 사용합니다.)*

In [None]:
# WindowGenerator 클래스 정의 (튜토리얼에서 복사)
class WindowGenerator():
    def __init__(self, input_width, label_width, shift, train_df, val_df, test_df, label_columns=None):
        self.train_df = train_df
        self.val_df = val_df
        self.test_df = test_df
        self.label_columns = label_columns
        if label_columns is not None:
            self.label_columns_indices = {name: i for i, name in enumerate(label_columns)}
        self.column_indices = {name: i for i, name in enumerate(train_df.columns)}
        self.input_width = input_width
        self.label_width = label_width
        self.shift = shift
        self.total_window_size = input_width + shift
        self.input_slice = slice(0, input_width)
        self.input_indices = np.arange(self.total_window_size)[self.input_slice]
        self.label_start = self.total_window_size - self.label_width
        self.labels_slice = slice(self.label_start, None)
        self.label_indices = np.arange(self.total_window_size)[self.labels_slice]

    def __repr__(self):
        return '\n'.join([
            f'Total window size: {self.total_window_size}',
            f'Input indices: {self.input_indices}',
            f'Label indices: {self.label_indices}',
            f'Label column name(s): {self.label_columns}'
        ])

    def split_window(self, features):
        inputs = features[:, self.input_slice, :]
        labels = features[:, self.labels_slice, :]
        if self.label_columns is not None:
            labels = tf.stack(
                [labels[:, :, self.column_indices[name]] for name in self.label_columns],
                axis=-1)
        inputs.set_shape([None, self.input_width, None])
        labels.set_shape([None, self.label_width, None])
        return inputs, labels

    def make_dataset(self, data):
        data = np.array(data, dtype=np.float32)
        ds = tf.keras.utils.timeseries_dataset_from_array(
            data=data,
            targets=None,
            sequence_length=self.total_window_size,
            sequence_stride=1,
            shuffle=True,
            batch_size=32,
        )
        ds = ds.map(self.split_window)
        return ds
    
    @property
    def train(self):
        return self.make_dataset(self.train_df)
    @property
    def val(self):
        return self.make_dataset(self.val_df)
    @property
    def test(self):
        return self.make_dataset(self.test_df)

# 8. 다중 스텝 예측 윈도우 생성
multi_step_energy_window = WindowGenerator(
    input_width=7*24, label_width=24, shift=24,
    train_df=train_energy_df, val_df=val_energy_df, test_df=test_energy_df,
    label_columns=['AEP_MW'])

print("--- 다중 스텝 에너지 예측 윈도우 ---")
print(multi_step_energy_window)

# 9. 단일 스텝 예측 윈도우 생성
single_step_energy_window = WindowGenerator(
    input_width=24, label_width=1, shift=1,
    train_df=train_energy_df, val_df=val_energy_df, test_df=test_energy_df,
    label_columns=['AEP_MW'])

print("\n--- 단일 스텝 에너지 예측 윈도우 ---")
print(single_step_energy_window)

---

### 연습 문제 10: Dense 모델로 단일 스텝 예측

In [None]:
# 10. Dense 모델 구성
dense_model = tf.keras.Sequential([
    tf.keras.layers.Flatten(), # (batch, time, features) -> (batch, time * features)
    tf.keras.layers.Dense(units=64, activation='relu'),
    tf.keras.layers.Dense(units=64, activation='relu'),
    tf.keras.layers.Dense(units=1)
])

dense_model.compile(loss=tf.keras.losses.MeanSquaredError(),
                    optimizer=tf.keras.optimizers.Adam(),
                    metrics=[tf.keras.metrics.MeanAbsoluteError()])

history = dense_model.fit(single_step_energy_window.train, epochs=10,
                          validation_data=single_step_energy_window.val)

test_performance_dense = dense_model.evaluate(single_step_energy_window.test)
print(f"\nDense Model - Test MAE: {test_performance_dense[1]:.4f}")

---

### 연습 문제 11: LSTM 모델로 다중 스텝 예측

In [None]:
# 11. LSTM 모델 구성
OUT_STEPS = 24 # 24시간 예측
num_features = train_energy_df.shape[1]

lstm_energy_model = tf.keras.models.Sequential([
    tf.keras.layers.LSTM(32, return_sequences=False),
    tf.keras.layers.Dense(OUT_STEPS*1),
    tf.keras.layers.Reshape([OUT_STEPS, 1])
])

lstm_energy_model.compile(loss=tf.keras.losses.MeanSquaredError(),
                          optimizer=tf.keras.optimizers.Adam(),
                          metrics=[tf.keras.metrics.MeanAbsoluteError()])

history = lstm_energy_model.fit(multi_step_energy_window.train, epochs=10,
                              validation_data=multi_step_energy_window.val)

test_performance_lstm = lstm_energy_model.evaluate(multi_step_energy_window.test)
print(f"\nLSTM Model - Test MAE: {test_performance_lstm[1]:.4f}")