In [1]:
# 임포트

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.metrics import root_mean_squared_error, r2_score, mean_absolute_error
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import Dense, BatchNormalization, Dropout
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam

from xgboost import XGBRegressor
from lightgbm import LGBMRegressor

import holidays
from datetime import date

from IPython.display import clear_output
import warnings
warnings.filterwarnings('ignore')

import gc
import random
import pickle

In [2]:
df_event = pd.read_csv('새만금개발청_새만금지역 공연행사_20230830.csv', encoding='cp949')
df_festival = pd.read_csv('새만금개발청_새만금지역 축제현황_20230830.csv', encoding='cp949')
df_traffic = pd.read_csv('새만금개발청_새만금 방조제 교통량_20230831.csv', encoding='cp949')

In [3]:
df = df_traffic.copy()
df['tm'] = df['조사일 년'] * 1000 + df['조사월']
df['direction_b_g'] = (df['출발']=='부안') & (df['도착지']=='군산')

df.head()

Unnamed: 0,조사일 년,조사월,출발,도착지,대형 차량,소형 차량,tm,direction_b_g
0,2022,1,부안,군산,1096,32534,2022001,True
1,2022,2,부안,군산,984,29186,2022002,True
2,2022,3,부안,군산,963,27774,2022003,True
3,2022,4,부안,군산,1787,42658,2022004,True
4,2022,5,부안,군산,2210,44554,2022005,True


In [4]:
df_event['tm_s'] = df_event['행사시작일'].str[:4].astype(int) * 1000 + df_event['행사시작일'].str[5:7].astype(int)
df_event['tm_e'] = df_event['행사종료일'].str[:4].astype(int) * 1000 + df_event['행사종료일'].str[5:7].astype(int)

df_festival['tm_s'] = df_festival['축제시작일'].str[:4].astype(int) * 1000 + df_festival['축제시작일'].str[5:7].astype(int)
df_festival['tm_e'] = df_festival['축제종료일'].str[:4].astype(int) * 1000 + df_festival['축제종료일'].str[5:7].astype(int)

In [5]:
df['event'] = 0

for row in df_event.itertuples(index=False):
    df.loc[(df['tm'] >= row.tm_s) & (df['tm'] <= row.tm_e), 'event'] += 1

df['festival'] = 0

for row in df_festival.itertuples(index=False):
    df.loc[(df['tm'] >= row.tm_s) & (df['tm'] <= row.tm_e), 'festival'] += 1

In [6]:
df.head()

Unnamed: 0,조사일 년,조사월,출발,도착지,대형 차량,소형 차량,tm,direction_b_g,event,festival
0,2022,1,부안,군산,1096,32534,2022001,True,0,0
1,2022,2,부안,군산,984,29186,2022002,True,0,0
2,2022,3,부안,군산,963,27774,2022003,True,0,0
3,2022,4,부안,군산,1787,42658,2022004,True,0,0
4,2022,5,부안,군산,2210,44554,2022005,True,0,1


In [43]:
X = df.drop(columns=['조사일 년', '조사월', '출발', '도착지', '대형 차량', '소형 차량', 'tm'])
y = df['대형 차량'] + df['소형 차량']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

model = RandomForestRegressor(n_estimators=100, max_depth=13,  min_samples_leaf=5, random_state=42)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

mean_absolute_error(y_test, y_pred), r2_score(y_test, y_pred)

(26118.208526393042, 0.4448915407191928)

In [10]:
X = df.drop(columns=['조사일 년', '조사월', '출발', '도착지', '대형 차량', '소형 차량', 'tm'])
y = df['소형 차량'] + df['대형 차량']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train.shape[1],)),
    BatchNormalization(),
    Dropout(0.3),

    Dense(64, activation='relu'),
    BatchNormalization(),
    Dropout(0.3),

    Dense(32, activation='relu'),

    Dense(1)  # 회귀 출력
])

es = EarlyStopping(monitor='val_loss', min_delta=0, patience=10, verbose=1, restore_best_weights=True)

# 컴파일
model.compile(optimizer=Adam(learning_rate=0.01), loss='mae', metrics=['mae'])

# 학습
history = model.fit(X_train, y_train, epochs=1000, batch_size=8, verbose=1, validation_split=0.2, callbacks=[es])

y_pred = model.predict(X_test)

mean_absolute_error(y_test, y_pred), r2_score(y_test, y_pred)

Epoch 1/1000
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 117ms/step - loss: 69468.2812 - mae: 69468.2812 - val_loss: 66281.9141 - val_mae: 66281.9141
Epoch 2/1000
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 62ms/step - loss: 69232.2266 - mae: 69232.2266 - val_loss: 66281.0859 - val_mae: 66281.0859
Epoch 3/1000
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - loss: 80049.2656 - mae: 80049.2656 - val_loss: 66279.7500 - val_mae: 66279.7500
Epoch 4/1000
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - loss: 80307.0469 - mae: 80307.0469 - val_loss: 66277.5312 - val_mae: 66277.5312
Epoch 5/1000
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 61ms/step - loss: 80363.1328 - mae: 80363.1328 - val_loss: 66274.1484 - val_mae: 66274.1484
Epoch 6/1000
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step - loss: 67503.4219 - mae: 67503.4219 - val_loss: 66268.8906 - val_mae: 66268.89

(27491.62109375, 0.4392043948173523)