In [25]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [26]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import RobustScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Input
from tensorflow.keras.callbacks import EarlyStopping
from datetime import datetime, timezone
from sklearn.metrics import mean_squared_error
import tensorflow as tf
import io
import h5py
import tempfile
from datetime import datetime

from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import LSTM, Dense, Input
from sqlalchemy import create_engine, MetaData, Table, Column, Float
from sqlalchemy import Integer, String, Text, DateTime, LargeBinary
import json
from sqlalchemy import create_engine 
from sqlalchemy.exc import SQLAlchemyError
import cryptography
plt.rcParams['font.family'] = 'MALGUN GOTHIC'
plt.rcParams['axes.unicode_minus'] = False

In [27]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine
import json

# 접속 정보 로딩 (옵션: db-config.json 파일이 있을 경우)
path = './db-config.json'

with open(path) as f:
    config = json.load(f)

user = config['user']
password = config['password']
host = config['host']
port = config['port']
database = config['database']

# SQLAlchemy 엔진 생성
engine = create_engine(f"mysql+pymysql://{user}:{password}@{host}:{port}/{database}?charset=utf8mb4")

# air_pollution_dataset 테이블 데이터 읽어오기
query = "SELECT * FROM air_pollution_dataset"
df = pd.read_sql(query, con=engine)

In [29]:
df = df[['datetime', 'region', 'pm10','pm25']]
df

Unnamed: 0,datetime,region,pm10,pm25
0,2017-01-01 01:00:00,강남구,63.0,48.0
1,2017-01-01 01:00:00,강동구,68.0,53.0
2,2017-01-01 01:00:00,강북구,63.0,51.0
3,2017-01-01 01:00:00,강서구,107.0,65.0
4,2017-01-01 01:00:00,관악구,75.0,57.0
...,...,...,...,...
1401619,2023-12-31 00:00:00,영등포구,40.0,34.0
1401620,2023-12-31 00:00:00,용산구,31.0,18.0
1401621,2023-12-31 00:00:00,은평구,40.0,25.0
1401622,2023-12-31 00:00:00,중구,30.0,27.0


In [30]:
metadata = MetaData()
models_table = Table(
    "models", metadata,
    Column("id", Integer, primary_key=True),
    Column("name", String(100)),
    Column("type", String(100)),
    Column("version", String(50)),
    Column("description", Text),
    Column("created_at", DateTime, default=datetime.utcnow),
    Column("data", LargeBinary)
)


air_quality_day_pred_table = Table(
    "air_quality_day_pred", metadata,
    Column("id", Integer, primary_key=True, autoincrement=True),
    Column("datetime", DateTime),
    Column("region", String(50)),
    Column("pm10", Float),
    Column("pm25", Float)
)
metadata.create_all(engine)

In [33]:
def create_sequences(data, window_size=48):
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data[i:i + window_size])
        y.append(data[i + window_size, 0])
    return np.array(X), np.array(y)


In [34]:
def prepare_series(df, region, pollutant):
    df = df.copy()
    df['datetime'] = pd.to_datetime(df['datetime'])
    df = df[df['region'] == region]
    df.set_index('datetime', inplace=True)
    series = df[pollutant].resample('D').mean().interpolate(method='spline', order=3)
    return series.dropna()


In [35]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Input
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.preprocessing import RobustScaler

def train_lstm_model(series, window_size=48):
    scaler = RobustScaler()
    scaled = scaler.fit_transform(series.values.reshape(-1, 1))

    X, y = create_sequences(scaled, window_size)
    split = int(len(X) * 0.8)
    X_train, y_train = X[:split], y[:split]

    model = Sequential([
        Input(shape=(window_size, 1)),
        LSTM(64, return_sequences=True),
        LSTM(32),
        Dense(1, activation='tanh')
    ])
    model.compile(optimizer='adam', loss='mse')
    early_stop = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
    model.fit(X_train, y_train, epochs=50, batch_size=32, validation_split=0.1, callbacks=[early_stop], verbose=0)

    return model, scaler, scaled

In [36]:
import tempfile
from datetime import datetime, timezone

def save_model_to_db(model, region, pollutant, description="단변량 LSTM 예측"):
    with tempfile.NamedTemporaryFile(suffix=".h5", delete=False) as tmp:
        model.save(tmp.name)
        tmp.seek(0)
        model_binary = tmp.read()

    with engine.connect() as conn:
        try:
            insert_stmt = models_table.insert().values(
                name=f"{region}_{pollutant}_predict_model",
                type="LSTM",
                version="v2.0",
                description=f"{region} - {pollutant}: {description}",
                created_at=datetime.now(timezone.utc),
                data=model_binary
            )
            conn.execute(insert_stmt)
            conn.commit()
            print(f"[{region} - {pollutant}] 모델 저장 완료 ✅")
        except Exception as e:
            print(f"[{region} - {pollutant}] 모델 저장 실패 ❌: {e}")


In [37]:
def forecast_future(model, scaler, series, scaled, days=30, window_size=48):
    last_seq = scaled[-window_size:].reshape(1, window_size, 1)
    future_scaled = []

    for _ in range(days):
        next_pred = model.predict(last_seq, verbose=0)[0][0]
        future_scaled.append([next_pred])
        last_seq = np.append(last_seq[:, 1:, :], [[[next_pred]]], axis=1)

    padded = np.hstack([np.array(future_scaled), np.zeros((days, 0))])
    future_inv = scaler.inverse_transform(padded)[:, 0]
    
    today = pd.Timestamp('2025-07-03')
    future_dates = pd.date_range(start=today + pd.Timedelta(days=1), periods=days)
    return pd.DataFrame({'datetime': future_dates, 'predicted_value': future_inv})


In [38]:
from sqlalchemy.dialects.mysql import insert

def save_forecast_to_db(pm10_df, pm25_df, region):
    if pm10_df is None or pm25_df is None:
        print(f"[{region}] 예측 결과 없음. 저장 생략.")
        return

    df = pm10_df.merge(pm25_df, on='datetime', suffixes=('_pm10', '_pm25')).copy()
    df = df.rename(columns={
        'predicted_value_pm10': 'pm10',
        'predicted_value_pm25': 'pm25'
    })
    df['region'] = region

    records = df[['datetime', 'region', 'pm10', 'pm25']].to_dict(orient='records')

    with engine.connect() as conn:
        try:
            for record in records:
                stmt = insert(air_quality_day_pred_table).values(**record)
                upsert = stmt.on_duplicate_key_update(
                    pm10=stmt.inserted.pm10,
                    pm25=stmt.inserted.pm25
                )
                conn.execute(upsert)
            conn.commit()
            print(f"[{region}] 예측 {len(records)}건 저장 완료 ✅")
        except Exception as e:
            print(f"[{region}] 예측 저장 오류 ❌: {e}")


In [39]:
regions = df['region'].unique()

for region in regions:
    print(f"\n📍 {region} 처리 시작")

    pm10_series = prepare_series(df, region, 'pm10')
    pm25_series = prepare_series(df, region, 'pm25')

    # ① 학습
    pm10_model, pm10_scaler, pm10_scaled = train_lstm_model(pm10_series)
    pm25_model, pm25_scaler, pm25_scaled = train_lstm_model(pm25_series)

    # ② 모델 저장
    save_model_to_db(pm10_model, region, 'pm10')
    save_model_to_db(pm25_model, region, 'pm25')

    # ③ 예측
    pm10_pred_df = forecast_future(pm10_model, pm10_scaler, pm10_series, pm10_scaled)
    pm25_pred_df = forecast_future(pm25_model, pm25_scaler, pm25_series, pm25_scaled)

    # ④ 예측 저장
    save_forecast_to_db(pm10_pred_df, pm25_pred_df, region)


📍 강남구 처리 시작


E0000 00:00:1751545598.250543 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.
E0000 00:00:1751545669.474099 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[강남구 - pm10] 모델 저장 완료 ✅
[강남구 - pm25] 모델 저장 완료 ✅
[강남구] 예측 30건 저장 완료 ✅

📍 강동구 처리 시작


E0000 00:00:1751545732.660038 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[강동구 - pm10] 모델 저장 완료 ✅
[강동구 - pm25] 모델 저장 완료 ✅
[강동구] 예측 30건 저장 완료 ✅

📍 강북구 처리 시작


E0000 00:00:1751545807.014897 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[강북구 - pm10] 모델 저장 완료 ✅
[강북구 - pm25] 모델 저장 완료 ✅


E0000 00:00:1751545867.599590 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[강북구] 예측 30건 저장 완료 ✅

📍 강서구 처리 시작


E0000 00:00:1751545945.808724 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[강서구 - pm10] 모델 저장 완료 ✅
[강서구 - pm25] 모델 저장 완료 ✅
[강서구] 예측 30건 저장 완료 ✅

📍 관악구 처리 시작


E0000 00:00:1751546022.048361 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[관악구 - pm10] 모델 저장 완료 ✅
[관악구 - pm25] 모델 저장 완료 ✅
[관악구] 예측 30건 저장 완료 ✅

📍 광진구 처리 시작


E0000 00:00:1751546101.687164 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[광진구 - pm10] 모델 저장 완료 ✅
[광진구 - pm25] 모델 저장 완료 ✅


E0000 00:00:1751546168.136395 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[광진구] 예측 30건 저장 완료 ✅

📍 구로구 처리 시작


E0000 00:00:1751546254.770662 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[구로구 - pm10] 모델 저장 완료 ✅
[구로구 - pm25] 모델 저장 완료 ✅
[구로구] 예측 30건 저장 완료 ✅

📍 금천구 처리 시작


E0000 00:00:1751546339.266835 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[금천구 - pm10] 모델 저장 완료 ✅
[금천구 - pm25] 모델 저장 완료 ✅


E0000 00:00:1751546417.356113 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[금천구] 예측 30건 저장 완료 ✅

📍 노원구 처리 시작


E0000 00:00:1751546500.492598 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[노원구 - pm10] 모델 저장 완료 ✅
[노원구 - pm25] 모델 저장 완료 ✅
[노원구] 예측 30건 저장 완료 ✅

📍 도봉구 처리 시작


E0000 00:00:1751546564.687706 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[도봉구 - pm10] 모델 저장 완료 ✅
[도봉구 - pm25] 모델 저장 완료 ✅
[도봉구] 예측 30건 저장 완료 ✅

📍 동대문구 처리 시작


E0000 00:00:1751546646.500529 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[동대문구 - pm10] 모델 저장 완료 ✅
[동대문구 - pm25] 모델 저장 완료 ✅


E0000 00:00:1751546709.404602 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[동대문구] 예측 30건 저장 완료 ✅

📍 동작구 처리 시작


E0000 00:00:1751546791.406457 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[동작구 - pm10] 모델 저장 완료 ✅
[동작구 - pm25] 모델 저장 완료 ✅
[동작구] 예측 30건 저장 완료 ✅

📍 마포구 처리 시작


E0000 00:00:1751546871.918393 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[마포구 - pm10] 모델 저장 완료 ✅
[마포구 - pm25] 모델 저장 완료 ✅
[마포구] 예측 30건 저장 완료 ✅

📍 서대문구 처리 시작


E0000 00:00:1751546958.658175 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[서대문구 - pm10] 모델 저장 완료 ✅
[서대문구 - pm25] 모델 저장 완료 ✅


E0000 00:00:1751547027.174823 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[서대문구] 예측 30건 저장 완료 ✅

📍 서초구 처리 시작


E0000 00:00:1751547094.042528 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[서초구 - pm10] 모델 저장 완료 ✅
[서초구 - pm25] 모델 저장 완료 ✅


E0000 00:00:1751547161.506826 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[서초구] 예측 30건 저장 완료 ✅

📍 성동구 처리 시작


E0000 00:00:1751547238.129340 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[성동구 - pm10] 모델 저장 완료 ✅
[성동구 - pm25] 모델 저장 완료 ✅


E0000 00:00:1751547300.944687 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[성동구] 예측 30건 저장 완료 ✅

📍 성북구 처리 시작


E0000 00:00:1751547368.121025 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[성북구 - pm10] 모델 저장 완료 ✅
[성북구 - pm25] 모델 저장 완료 ✅


E0000 00:00:1751547435.537767 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[성북구] 예측 30건 저장 완료 ✅

📍 송파구 처리 시작


E0000 00:00:1751547498.573107 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.
E0000 00:00:1751547580.398891 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[송파구 - pm10] 모델 저장 완료 ✅
[송파구 - pm25] 모델 저장 완료 ✅
[송파구] 예측 30건 저장 완료 ✅

📍 양천구 처리 시작


E0000 00:00:1751547677.055273 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.
E0000 00:00:1751547775.318761 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[양천구 - pm10] 모델 저장 완료 ✅
[양천구 - pm25] 모델 저장 완료 ✅
[양천구] 예측 30건 저장 완료 ✅

📍 영등포구 처리 시작


E0000 00:00:1751547887.252354 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.
E0000 00:00:1751547951.478719 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[영등포구 - pm10] 모델 저장 완료 ✅
[영등포구 - pm25] 모델 저장 완료 ✅
[영등포구] 예측 30건 저장 완료 ✅

📍 용산구 처리 시작


E0000 00:00:1751548015.667163 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.
E0000 00:00:1751548109.679926 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[용산구 - pm10] 모델 저장 완료 ✅
[용산구 - pm25] 모델 저장 완료 ✅
[용산구] 예측 30건 저장 완료 ✅

📍 은평구 처리 시작


E0000 00:00:1751548176.028221 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.
E0000 00:00:1751548252.274330 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[은평구 - pm10] 모델 저장 완료 ✅
[은평구 - pm25] 모델 저장 완료 ✅
[은평구] 예측 30건 저장 완료 ✅

📍 종로구 처리 시작


E0000 00:00:1751548338.549985 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[종로구 - pm10] 모델 저장 완료 ✅
[종로구 - pm25] 모델 저장 완료 ✅


E0000 00:00:1751548407.270317 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[종로구] 예측 30건 저장 완료 ✅

📍 중구 처리 시작


E0000 00:00:1751548477.979665 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[중구 - pm10] 모델 저장 완료 ✅
[중구 - pm25] 모델 저장 완료 ✅


E0000 00:00:1751548544.993431 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[중구] 예측 30건 저장 완료 ✅

📍 중랑구 처리 시작


E0000 00:00:1751548627.464782 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.
E0000 00:00:1751548696.406064 25706283 meta_optimizer.cc:967] PluggableGraphOptimizer failed: INVALID_ARGUMENT: Failed to deserialize the `graph_buf`.


[중랑구 - pm10] 모델 저장 완료 ✅
[중랑구 - pm25] 모델 저장 완료 ✅
[중랑구] 예측 30건 저장 완료 ✅
