In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers


def load_data():
    승하차_파일 = "../../data/결과/승하차/통합/2호선_승하차인원_통합.csv"
    혼잡도_파일 = "../../data/결과/혼잡도/통합/2호선_혼잡도_통합.csv"
    승하차_df = pd.read_csv(승하차_파일, encoding="euc-kr")
    혼잡도_df = pd.read_csv(혼잡도_파일, encoding="euc-kr")
    return 승하차_df, 혼잡도_df

In [2]:
def build_dl_dataset(승하차, 혼잡도, 정원=2000):
    rows = []
    시간컬럼들 = [col for col in 승하차.columns if ":" in col]
    for idx, row in 승하차.iterrows():
        for col in 시간컬럼들:
            hour = int(col.split(":")[0])
            평일주말 = 1 if row["평일주말"] == "주말" else 0
            상행 = 1 if row["구분"] == "상행" else 0
            승차 = 1 if row["구분"] == "승차" else 0
            # 혼잡도 추출
            matched = 혼잡도[
                (혼잡도["역번호"] == row["역번호"])
                & (혼잡도["평일주말"] == row["평일주말"])
                & (혼잡도["구분"] == row["구분"])
            ]
            if matched.empty or col not in matched.columns:
                continue
            congestion = float(matched[col].values[0])
            # 타깃(정원x혼잡도%)
            y = int(congestion * 정원 / 100)
            rows.append(
                [row["역번호"], hour, 평일주말, 상행, 승차, row[col], congestion, y]
            )
    cols = [
        "역번호",
        "hour",
        "평일주말",
        "상행",
        "승차",
        "승하차인원",
        "혼잡도",
        "target",
    ]
    return pd.DataFrame(rows, columns=cols)

In [None]:
# 데이터 준비
승하차, 혼잡도 = load_data()
df = build_dl_dataset(승하차, 혼잡도)

# 입력(X), 타깃(y)
X = df[["역번호", "hour", "평일주말", "상행", "승차", "승하차인원", "혼잡도"]].values
y = df["target"].values

# 스케일링(정규화): 딥러닝에는 효과적
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42
)

In [None]:
# 간단한 Feedforward NN
model = keras.Sequential(
    [
        layers.Dense(128, activation="relu", input_shape=(X_train.shape[1],)),
        layers.Dense(64, activation="relu"),
        layers.Dense(32, activation="relu"),
        layers.Dense(1),
    ]
)
model.compile(optimizer="adam", loss="mse", metrics=["mae"])

history = model.fit(
    X_train, y_train, validation_split=0.1, epochs=40, batch_size=64, verbose=2
)

In [None]:
def dl_predict(역번호, hour, 평일주말, 상행, 승차, 승하차인원, 혼잡도):
    arr = np.array([[역번호, hour, 평일주말, 상행, 승차, 승하차인원, 혼잡도]])
    arr_scaled = scaler.transform(arr)
    pred = model.predict(arr_scaled)
    return int(pred[0][0])

In [None]:
def user_predict():
    역번호 = int(input("역번호: "))
    hour = int(input("시(hour): "))
    평일주말 = int(input("평일=0, 주말=1: "))
    상행 = int(input("상행=1, 하행=0: "))
    승차 = int(input("승차=1, 하차=0: "))
    승하차인원 = float(input("해당 시간 승하차인원: "))
    혼잡도 = float(input("혼잡도(%): "))
    pred = dl_predict(역번호, hour, 평일주말, 상행, 승차, 승하차인원, 혼잡도)
    print(f"딥러닝 예측 예상 인원: {pred} 명")

In [None]:
user_predict()