In [193]:
from IPython.display import display, HTML
display(HTML("""
<style>
div.container{width:86% !important;}
div.cell.code_cell.rendered{width:100%;}
div.CodeMirror {font-family:Consolas; font-size:12pt;}
div.output {font-size:15pt; font-weight:bold;}
div.input {font-family:Consolas; font-size:12pt;}
div.prompt {min-width:70px;}
div#toc-wrapper{padding-top:120px;}
div.text_cell_render ul li{font-size:12pt;padding:5px;}
table.dataframe{font-size:15px;}
</style>
"""))

# LSTM

In [228]:
# ─────────────────────────────────────────────────────────────────────────────
# LSTM 기반 “순환예측(Recursive Forecast)” 예제
# • 2022~2024 데이터만으로 2025년도 일별 대여량 예측
# • 기상(Weather) 없이 시계열 내재 패턴 + 날짜특성만 사용
# ─────────────────────────────────────────────────────────────────────────────

import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler
from sklearn.metrics       import mean_squared_error, mean_absolute_error

import tensorflow as tf
from tensorflow.keras.models      import Sequential
from tensorflow.keras.layers      import LSTM, Dense
from tensorflow.keras.callbacks   import EarlyStopping, ReduceLROnPlateau

# 1) 데이터 로드 & 날짜특성 생성
df = pd.read_csv("model_1/total_data.csv", parse_dates=["일시"])
df = df.sort_values(["행정구","일시"]).reset_index(drop=True)

df["month"]      = df["일시"].dt.month
df["weekday"]    = df["일시"].dt.weekday
df["is_weekend"] = (df["weekday"] >= 5).astype(int)

# 2) lag 피처 정의
LAGS = [1, 7, 30, 365]   # 과거 1·7·30·365일
def make_features(df_grp):
    dfg = df_grp.set_index("일시").sort_index()
    for d in LAGS:
        dfg[f"lag_{d}"] = dfg["총대여량"].shift(d)
    dfg = dfg.dropna(subset=[f"lag_{d}" for d in LAGS]).reset_index()
    return dfg

# 3) 전체 구별로 features 병합
feat_list = []
for gu, g in df.groupby("행정구"):
    fg = make_features(g)
    feat_list.append(fg)
df_feat = pd.concat(feat_list, ignore_index=True)

# 4) train/valid split (2024-01-01 기준)
cut = pd.to_datetime("2024-01-01")
train_df = df_feat[df_feat["일시"] <  cut]
valid_df = df_feat[df_feat["일시"] >= cut]

# 5) 입력(X)·출력(y) 배열 생성
#   X cols = [lag_1, lag_7, lag_30, lag_365, month, weekday, is_weekend]
X_cols = [f"lag_{d}" for d in LAGS] + ["month","weekday","is_weekend"]
y_col  = "총대여량"

X_tr = train_df[X_cols].values
y_tr = train_df[y_col].values
X_va = valid_df[X_cols].values
y_va = valid_df[y_col].values

# 6) 스케일링 (X와 y 별도)
scaler_X = StandardScaler().fit(X_tr)
X_tr_s   = scaler_X.transform(X_tr)
X_va_s   = scaler_X.transform(X_va)

scaler_y = StandardScaler().fit(y_tr.reshape(-1,1))
y_tr_s   = scaler_y.transform(y_tr.reshape(-1,1)).flatten()

# 7) LSTM 입력 모양으로 재구성
#    여기 timesteps=1, features = len(X_cols)
X_tr_s = X_tr_s.reshape(-1, 1, X_tr_s.shape[1])
X_va_s = X_va_s.reshape(-1, 1, X_va_s.shape[1])

# 8) 모델 정의
model = Sequential([
    LSTM(64, input_shape=(1, X_tr_s.shape[2]), activation="tanh"),
    Dense(32, activation="relu"),
    Dense(1, activation="linear")
])
model.compile(optimizer="adam", loss="mse")

# 9) 학습
es  = EarlyStopping(monitor="val_loss", patience=10, restore_best_weights=True)
rlr = ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=5, verbose=1)

model.fit(
    X_tr_s, y_tr_s,
    validation_data=(X_va_s, scaler_y.transform(y_va.reshape(-1,1)).flatten()),
    epochs=100,
    batch_size=64,
    callbacks=[es, rlr],
    verbose=2
)

# 10) 검증 성능
y_pred_s = model.predict(X_va_s).flatten()
y_pred   = scaler_y.inverse_transform(y_pred_s.reshape(-1,1)).flatten()
print("▶ Valid RMSE:", np.sqrt(mean_squared_error(y_va, y_pred)))
print("▶ Valid MAE:", mean_absolute_error(y_va, y_pred))

# 11) 2025-01-01~2025-06-22 순환 Forecast 함수
def recursive_forecast(df_grp, model, scaler_X, scaler_y, start_date, end_date):
    # df_grp: 2022~2024 원본 group, '일시' idx 없이 month,weekday,is_weekend,lag_* 포함
    history = df_grp.copy().set_index("일시").sort_index()
    preds = {}
    for d in pd.date_range(start=start_date, end=end_date):
        # ① feature 빌드
        lag_vals = [history.loc[d - pd.Timedelta(days=d0),"총대여량"] 
                    for d0 in LAGS]
        month, wd = d.month, d.weekday()
        feat = lag_vals + [month, wd, int(wd>=5)]
        x_s  = scaler_X.transform(np.array(feat).reshape(1,-1))
        x_s  = x_s.reshape(1,1,-1)
        # ② 예측 & 역스케일
        y_s = model.predict(x_s).flatten()
        y   = scaler_y.inverse_transform(y_s.reshape(-1,1))[0,0]
        # ③ 기록 & history에 추가
        preds[d] = y
        history.loc[d] = {
            "총대여량": y,
            **{f"lag_{d0}": np.nan for d0 in LAGS},  # placehold
            "month": month, "weekday": wd, "is_weekend": int(wd>=5)
        }
        # shift lag_* 자동 불가 → 바로 assign 과거 대여량 읽기 OK
    return preds

# 12) 전체 구별 Forecast 실행 예
all_preds = {}
for gu, g in df.groupby("행정구"):
    sub = g.set_index("일시")[["총대여량"]].copy().reset_index()
    p = recursive_forecast(
        sub, model, scaler_X, scaler_y,
        start_date=pd.to_datetime("2025-01-01"),
        end_date  =pd.to_datetime("2025-06-22")
    )
    all_preds[gu] = p.get(pd.to_datetime("2025-06-22"), np.nan)

print("▶ 2025-06-22 예측 (LSTM w/o weather)")
for gu, val in all_preds.items():
    print(f"{gu:5s} → {val:.0f} 대")

Epoch 1/100
143/143 - 2s - loss: 0.3401 - val_loss: 0.1704 - lr: 0.0010 - 2s/epoch - 11ms/step
Epoch 2/100
143/143 - 1s - loss: 0.2068 - val_loss: 0.1601 - lr: 0.0010 - 520ms/epoch - 4ms/step
Epoch 3/100
143/143 - 1s - loss: 0.2006 - val_loss: 0.1596 - lr: 0.0010 - 522ms/epoch - 4ms/step
Epoch 4/100
143/143 - 1s - loss: 0.1969 - val_loss: 0.1593 - lr: 0.0010 - 516ms/epoch - 4ms/step
Epoch 5/100
143/143 - 1s - loss: 0.1917 - val_loss: 0.1675 - lr: 0.0010 - 523ms/epoch - 4ms/step
Epoch 6/100
143/143 - 1s - loss: 0.1897 - val_loss: 0.1623 - lr: 0.0010 - 527ms/epoch - 4ms/step
Epoch 7/100
143/143 - 0s - loss: 0.1869 - val_loss: 0.1640 - lr: 0.0010 - 497ms/epoch - 3ms/step
Epoch 8/100
143/143 - 1s - loss: 0.1860 - val_loss: 0.1593 - lr: 0.0010 - 526ms/epoch - 4ms/step
Epoch 9/100

Epoch 9: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
143/143 - 1s - loss: 0.1811 - val_loss: 0.1652 - lr: 0.0010 - 523ms/epoch - 4ms/step
Epoch 10/100
143/143 - 1s - loss: 0.1786 - val_loss:

KeyError: Timestamp('2024-02-29 00:00:00', freq='D')