In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
import matplotlib.pyplot as plt
승하차_파일 = "../../data/결과/승하차/통합/1호선_승하차인원_통합.csv"
혼잡도_파일 = "../../data/결과/혼잡도/통합/1호선_혼잡도_통합.csv"
시간표_파일 = "../../data/결과/운행시간표/통합/1호선_열차운행시각표.csv"

승하차_df = pd.read_csv(승하차_파일, encoding="euc-kr")
혼잡도_df = pd.read_csv(혼잡도_파일, encoding="euc-kr")
시간표_df = pd.read_csv(시간표_파일, encoding="euc-kr")

In [17]:
# 분석 대상 설정
target_station = "신도림역"
target_daytype = "평일"
target_direction = "상행"  # '상행', '하행' 또는 필요에 맞게
time_cols = [col for col in 승하차_df.columns if ":" in col]

result_rows = []
for tcol in time_cols:
    row = {}
    # 승차 찾기
    승차 = 승하차_df[
        (승하차_df["역명"] == target_station) &
        (승하차_df["평일주말"] == target_daytype) &
        (승하차_df["구분"] == "승차")
    ]
    승차값 = int(승차[tcol].values[0]) if len(승차) > 0 else 0
    # 하차 찾기
    하차 = 승하차_df[
        (승하차_df["역명"] == target_station) &
        (승하차_df["평일주말"] == target_daytype) &
        (승하차_df["구분"] == "하차")
    ]
    하차값 = int(하차[tcol].values[0]) if len(하차) > 0 else 0
    # 혼잡도 찾기
    혼잡도 = 혼잡도_df[
        (혼잡도_df["역명"] == target_station) &
        (혼잡도_df["평일주말"] == target_daytype) &
        (혼잡도_df["구분"] == target_direction)
    ]
    혼잡도값 = float(혼잡도[tcol].values[0]) if len(혼잡도) > 0 else np.nan
    # 레코드 저장
    row["시간"] = tcol
    row["승차인원"] = 승차값
    row["하차인원"] = 하차값
    row["혼잡도"] = 혼잡도값
    result_rows.append(row)

final_df = pd.DataFrame(result_rows)
final_df["시간_int"] = final_df["시간"].apply(lambda x: int(x.split(":")[0]))
final_df = final_df.sort_values("시간_int").reset_index(drop=True)
print(final_df.shape)
print(final_df.head())


(20, 5)
     시간  승차인원  하차인원  혼잡도  시간_int
0  0:00     0     0  NaN       0
1  5:00     0     0  NaN       5
2  6:00     0     0  NaN       6
3  7:00     0     0  NaN       7
4  8:00     0     0  NaN       8


In [9]:
from sklearn.preprocessing import MinMaxScaler

features = final_df[["승차인원", "하차인원", "혼잡도"]].fillna(0).values
target = final_df["승차인원"].values  # 예측할 목표값

scaler = MinMaxScaler()
features_scaled = scaler.fit_transform(features)


In [10]:
def create_sequences(data, target, seq_length=4):
    xs, ys = [], []
    for i in range(len(data) - seq_length):
        xs.append(data[i:(i + seq_length)])
        ys.append(target[i + seq_length])
    return np.array(xs), np.array(ys)

X, y = create_sequences(features_scaled, target, seq_length=4)


In [11]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense

model = Sequential([
    LSTM(16, input_shape=(X.shape[1], X.shape[2])),
    Dense(8, activation="relu"),
    Dense(1)
])
model.compile(loss="mse", optimizer="adam")
model.fit(X, y, epochs=60, batch_size=4, verbose=1)


Epoch 1/60


  super().__init__(**kwargs)


[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - loss: 9773276.0000   
Epoch 2/60
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 9211260.0000 
Epoch 3/60
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 7778591.5000 
Epoch 4/60
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 9614016.0000  
Epoch 5/60
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 9920816.0000  
Epoch 6/60
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 9635164.0000 
Epoch 7/60
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 9330652.0000  
Epoch 8/60
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 8788386.0000 
Epoch 9/60
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - loss: 9068557.0000  
Epoch 10/60
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s

<keras.src.callbacks.history.History at 0x22e3dcb3af0>

In [16]:
def predict_station_passenger(model, scaler, df, input_time, seq_len=4):
    시간_int = int(input_time.split(":")[0])
    idx = df[df["시간_int"] == 시간_int].index
    if len(idx) == 0 or idx[0] < seq_len:
        return "해당 시각은 예측 불가(데이터 부족)"
    idx = idx[0]
    x_input = df[["승차인원", "하차인원", "혼잡도"]].fillna(0).values
    x_input = scaler.transform(x_input)
    x_input = x_input[idx - seq_len: idx].reshape(1, seq_len, 3)
    pred = model.predict(x_input)
    return f"{input_time} 기준 예상 승차인원: {int(pred[0,0])}명"

# 사용 예시
print(predict_station_passenger(model, scaler, final_df, "17:00"))


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23ms/step
17:00 기준 예상 승차인원: 17명
