In [None]:

# ✅ LSTM 예측 결과 시각화 (Feature 4개 버전)

import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
import os

# 1️⃣ 경로
csv_path = '../data/목적별_국적별_결측치.csv'
model_path = '../model/foreign_visitors_lstm_model.h5'

# 2️⃣ 파일 체크
assert os.path.exists(csv_path), f"CSV 파일 없음: {csv_path}"
assert os.path.exists(model_path), f"모델 없음: {model_path}"

# 3️⃣ 데이터 로드
df = pd.read_csv(csv_path)
df = df.fillna(method='ffill')
df = df.sort_values(['country_code', 'purpose_code', 'year', 'month'])

# ✅ Feature 4개만 선택
features = ['visitors_num', 'lag_1', 'rolling_mean_3', 'is_peak']

scaler = MinMaxScaler()
scaled = scaler.fit_transform(df[features])

# 4️⃣ Sliding Window
def create_sequences(data, n_steps):
    X, y = [], []
    for i in range(len(data) - n_steps):
        X.append(data[i:i+n_steps])
        y.append(data[i+n_steps, 0])
    return np.array(X), np.array(y)

n_steps = 12
X, y = create_sequences(scaled, n_steps)

# 5️⃣ 모델
model = load_model(model_path)

# 6️⃣ 예측
y_pred_scaled = model.predict(X)

# 7️⃣ 복원
y_pred_full = np.hstack([y_pred_scaled, np.zeros((len(y_pred_scaled), len(features)-1))])
y_pred_inv = scaler.inverse_transform(y_pred_full)[:,0]

# 8️⃣ 실제값 복원
y_true_full = np.hstack([y.reshape(-1,1), np.zeros((len(y), len(features)-1))])
y_true_inv = scaler.inverse_transform(y_true_full)[:,0]

# 9️⃣ 예측 vs 실제 그래프
plt.figure(figsize=(14,7))
plt.plot(y_true_inv[:500], label='실제값')
plt.plot(y_pred_inv[:500], label='예측값', linestyle='--')
plt.title('입국자수 예측 vs 실제 (샘플 500개) - Feature 4개')
plt.xlabel('샘플 Index')
plt.ylabel('입국자수')
plt.legend()
plt.show()

# 10️⃣ 잔차 그래프
residual = y_true_inv - y_pred_inv

plt.figure(figsize=(12,5))
plt.plot(residual[:500])
plt.title('예측 오차(잔차) - Feature 4개')
plt.xlabel('샘플 Index')
plt.ylabel('차이(실제-예측)')
plt.show()
