In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import pandas as pd
import numpy as np

df = pd.read_csv('/content/drive/MyDrive/Epoch 공모전/2020_2023_최종데이터.csv')
df.drop(['tm_dt'], axis=1, inplace=True)

In [None]:
# 범주형 변수 인코딩
df = pd.get_dummies(df, columns=["address_city", "address_gu", "sub_address"])

In [None]:
# X, y 분리
X = df.drop("call_count", axis=1)
y = df["call_count"]

# 학습/테스트 분할
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# 정규화 (StandardScaler)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [None]:
# 모델 구성
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.regularizers import l2

model = Sequential([
    Dense(128, activation='relu', input_shape=(X_train_scaled.shape[1],)),
    Dropout(0.2),
    Dense(64, activation='relu'),
    Dropout(0.2),
    Dense(1)
])

# 컴파일
model.compile(optimizer='adam', loss='mse', metrics=['mae'])

# 콜백 설정
from tensorflow.keras.callbacks import EarlyStopping

early_stop = EarlyStopping(
    monitor='val_loss',
    patience=5,
    restore_best_weights=True
)

# 학습
history = model.fit(
    X_train_scaled, y_train,
    epochs=100,
    validation_split=0.2,
    callbacks=[early_stop],
    verbose=1
)


Epoch 1/100


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


[1m859/859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss: 5.3607 - mae: 1.0955 - val_loss: 2.1980 - val_mae: 0.8474
Epoch 2/100
[1m859/859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2.9652 - mae: 0.9004 - val_loss: 1.9607 - val_mae: 0.8462
Epoch 3/100
[1m859/859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2.5346 - mae: 0.8731 - val_loss: 1.9276 - val_mae: 0.8107
Epoch 4/100
[1m859/859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 2.5278 - mae: 0.8567 - val_loss: 1.9589 - val_mae: 0.8120
Epoch 5/100
[1m859/859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 3ms/step - loss: 2.2554 - mae: 0.8434 - val_loss: 1.9652 - val_mae: 0.8226
Epoch 6/100
[1m859/859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 2ms/step - loss: 1.8224 - mae: 0.8282 - val_loss: 1.9221 - val_mae: 0.8018
Epoch 7/100
[1m859/859[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 2ms/step - loss:

In [None]:
# 검증 데이터 로드
valid_df = pd.read_csv("/content/drive/MyDrive/Epoch 공모전/2024_최종데이터.csv")

# 훈련 데이터에는 없던 열 제거
valid_df.drop(['tm_dt'], axis=1, inplace=True)

# X 분리
X_valid = valid_df.drop(columns=['call_count'])  # 예측 대상 제외

# 인코딩: 훈련과 동일하게 One-Hot
X_valid = pd.get_dummies(X_valid)

# 누락된 열 보완 및 순서 맞추기
X_valid = X_valid.reindex(columns=X_train.columns, fill_value=0)

# 정규화 (훈련 스케일러 재사용)
X_valid_scaled = scaler.transform(X_valid)

# 예측
y_valid_pred = model.predict(X_valid_scaled)
y_valid_pred = np.round(y_valid_pred).astype(int).flatten()

# 예측값 덮어쓰기
valid_df['call_count'] = y_valid_pred

# 상위 결과 확인
from IPython.display import display
display(valid_df[['call_count']].head(10))

In [None]:
result_df = pd.read_csv('/content/drive/MyDrive/Epoch 공모전/test_call119.csv', encoding='euc-kr')

In [None]:
result_df['call_count'] = valid_df['call_count']

In [None]:
result_df.head()

In [None]:
result_df.to_csv('/content/drive/MyDrive/Epoch 공모전/250028.csv', index=False, encoding='euc-kr')