In [1]:
   import tensorflow as tf
   print(f"TF version: {tf.__version__}")
   gpus = tf.config.list_physical_devices('GPU')
   if gpus:
       print("GPU is available")
       print("Num GPUs Available: ", len(gpus))
   else:
       print("No GPUs available in your system.")

2025-06-11 00:36:21.621042: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-06-11 00:36:21.675513: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1749569781.706570   20772 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1749569781.716500   20772 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1749569781.764497   20772 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking 

TF version: 2.19.0
GPU is available
Num GPUs Available:  1


In [26]:
import pandas as pd
import numpy as np
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.callbacks import EarlyStopping

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder, MinMaxScaler
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

df = pd.read_csv('~/Food_Delivery_Times.csv')

df = df.dropna()

X = df.drop(['Order_ID', 'Delivery_Time_min'], axis='columns')
y = df['Delivery_Time_min']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"훈련 데이터 수: {len(X_train)}개, 테스트 데이터 수: {len(X_test)}개")

categorical_transformer = OneHotEncoder(handle_unknown='ignore')

numeric_transformer = StandardScaler()

preprocessor = ColumnTransformer(
    transformers=[
        ('Traffic_Level', categorical_transformer, ['Traffic_Level']),
        ('Time_of_Day', categorical_transformer, ['Time_of_Day']),
        ('Vehicle_Type', categorical_transformer, ['Vehicle_Type']),
        ('Weather', categorical_transformer, ['Weather']),
        ('Distance_km', numeric_transformer, ['Distance_km']),
        ('Preparation', numeric_transformer, ['Preparation_Time_min']),
        ('Courier', numeric_transformer, ['Courier_Experience_yrs']),
    ],
    remainder='passthrough' 
)


model = Sequential()

model.add(Dense(32, activation='linear'))
model.add(Dropout(0.2))
model.add(Dense(16, activation='linear'))
model.add(Dropout(0.2))
model.add(Dense(8, activation='linear'))
model.add(Dense(1))

model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0001), loss='mean_absolute_error', metrics=['mae'])


pipeline = Pipeline(steps=[('preprocessor', preprocessor),
                           ('regressor', model)])

early_stopping = EarlyStopping(monitor='val_loss', patience=40, restore_best_weights=True)

history = pipeline.fit(X_train, y_train,
                       regressor__epochs=2000,
                       regressor__batch_size=64,
                       regressor__validation_split=0.1,
                       regressor__callbacks=[early_stopping],
                       regressor__verbose=1)


model.summary()

# Preprocess X_test before evaluating the model
X_test_processed = pipeline.named_steps['preprocessor'].transform(X_test)

loss, mae = model.evaluate(X_test_processed, y_test, verbose=0)
print(f"\n{loss}")
print(f"\n테스트 데이터에 대한 최종 Mean Absolute Error (MAE): {mae:,.2f}분")
print(f"-> 모델의 예측치가 실제값과 평균적으로 {mae:,.2f}분 정도 차이남을 의미합니다.")


predictions = pipeline.predict(X_test)

results_df = pd.DataFrame({
    '실제 시간': y_test.values.flatten(),
    '예측 시간': predictions.flatten()
})
results_df['차이'] = results_df['실제 시간'] - results_df['예측 시간']

print("\n--- 실제 시간과 예측 시간 비교 (상위 5개) ---")
print(results_df.head())


훈련 데이터 수: 706개, 테스트 데이터 수: 177개
Epoch 1/2000
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 122ms/step - loss: 57.5885 - mae: 57.5885 - val_loss: 57.4229 - val_mae: 57.4229
Epoch 2/2000
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 56.0050 - mae: 56.0050 - val_loss: 57.3129 - val_mae: 57.3129
Epoch 3/2000
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 56.0626 - mae: 56.0626 - val_loss: 57.2017 - val_mae: 57.2017
Epoch 4/2000
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 55.6174 - mae: 55.6174 - val_loss: 57.0902 - val_mae: 57.0902
Epoch 5/2000
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 56.3882 - mae: 56.3882 - val_loss: 56.9776 - val_mae: 56.9776
Epoch 6/2000
[1m10/10[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 54.9879 - mae: 54.9879 - val_loss: 56.8629 - val_mae: 56.8629
Epoch 7/2000
[1m10/10[0m [3


4.97659158706665

테스트 데이터에 대한 최종 Mean Absolute Error (MAE): 4.98분
-> 모델의 예측치가 실제값과 평균적으로 4.98분 정도 차이남을 의미합니다.
[1m6/6[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step

--- 실제 시간과 예측 시간 비교 (상위 5개) ---
   실제 시간      예측 시간        차이
0     36  34.336887  1.663113
1     27  27.064228 -0.064228
2     49  45.166656  3.833344
3     89  84.603966  4.396034
4     54  51.095783  2.904217


