# 📈 LSTM 기반 투자수익률 예측 모델 (전체 데이터셋)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

In [None]:
# ✅ 파라미터 설정
time_steps = 8
predict_steps = 4
target_column = "투자수익률"
feature_columns = ["임대료", "공실률", "순영업소득", "투자수익률"]

In [None]:
# ✅ 데이터 로딩
df = pd.read_csv("/content/오피스_임대료_공실률_투자수익률_순영업소득_all.csv", encoding="utf-8-sig")

In [None]:
# ✅ 예측 함수 정의
def predict_for_region(region_name):
    region_df = df[df["CLS_NM"] == region_name].copy()
    region_df = region_df.sort_values("WRTTIME_DESC")
    pivot_df = region_df.pivot(index="WRTTIME_DESC", columns="지표", values="DTA_VAL")[feature_columns].dropna()
    if pivot_df.shape[0] < time_steps + predict_steps:
        raise ValueError("시계열 길이가 부족함")
    scaler = MinMaxScaler()
    scaled_data = scaler.fit_transform(pivot_df)
    X, y = [], []
    for i in range(len(scaled_data) - time_steps - predict_steps + 1):
        X.append(scaled_data[i:i+time_steps])
        y.append(scaled_data[i+time_steps:i+time_steps+predict_steps, 3])
    X, y = np.array(X), np.array(y)
    model = Sequential()
    model.add(LSTM(64, activation="relu", input_shape=(time_steps, X.shape[2])))
    model.add(Dense(predict_steps))
    model.compile(optimizer="adam", loss="mse")
    model.fit(X, y, epochs=100, batch_size=8, verbose=0)
    last_seq = scaled_data[-time_steps:]
    last_seq = np.expand_dims(last_seq, axis=0)
    pred_scaled = model.predict(last_seq)
    future_pred = scaler.inverse_transform(np.concatenate([np.zeros((predict_steps, 3)), pred_scaled.reshape(-1, 1)], axis=1))[:, -1]
    return pivot_df.index[-1], future_pred

In [None]:
# ✅ 전체 지역 예측 실행
results = {}
for region in df["CLS_NM"].unique():
    try:
        last_date, forecast = predict_for_region(region)
        results[region] = forecast
        print(f"✅ {region} 완료 | 마지막 분기: {last_date} | 예측값: {np.round(forecast, 2)}")
    except Exception as e:
        print(f"⚠️ {region} 예측 실패: {e}")

In [None]:
# ✅ 결과 정리 및 저장
result_df = pd.DataFrame(results).T
result_df.columns = [f"예측_{i+1}분기후" for i in range(predict_steps)]
result_df["예측평균"] = result_df.mean(axis=1)
result_df = result_df.sort_values("예측평균", ascending=False)
print("
📌 향후 투자수익률 예측 결과 (상위 지역):")
print(result_df.head())
result_df.to_csv("LSTM_예측_투자수익률_지역별_all.csv", encoding="utf-8-sig")