In [1]:
import pandas as pd
import tensorflow as tf
from datetime import datetime

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

from bayes_opt import BayesianOptimization
from bayes_opt import UtilityFunction

import matplotlib.pyplot as plt
from freeman.plt_setting import plt_settings
from freeman.evaluation import regression_evaluation, f_importances, plot_actual_pred
from freeman.aiddd.data_manager import read_data

# 한글처리 지원
plt_settings()

2023-09-17 16:21:11.005214: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
df_data = read_data('2nd pp pole-position-on-cons-1st')

In [3]:
feature_columns = df_data.select_dtypes(include=['number']).columns.tolist()
target_column = '총공사비'
feature_columns.remove(target_column)

In [4]:
df_X = df_data[feature_columns + [target_column]]
df_y = df_X.pop(target_column)

X_train, X_test, y_train, y_test = \
    train_test_split(df_X, df_y, test_size=0.1)

In [5]:
standard_scaler = StandardScaler()
X_train_scaled = standard_scaler.fit_transform(X_train)
X_test_scaled = standard_scaler.transform(X_test)

In [6]:
model_mlp = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation='relu', input_shape=(X_train.shape[1:])),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(32, activation='relu'),
    tf.keras.layers.Dense(1)
])
model_mlp.compile(
    optimizer='adam',
    loss='mean_squared_error',
    metrics=['mae']
)
model_mlp.summary()

2023-09-17 16:22:09.049929: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-09-17 16:22:09.063248: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2023-09-17 16:22:09.063438: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 128)               4224      
                                                                 
 dense_1 (Dense)             (None, 64)                8256      
                                                                 
 dense_2 (Dense)             (None, 32)                2080      
                                                                 
 dense_3 (Dense)             (None, 1)                 33        
                                                                 
Total params: 14593 (57.00 KB)
Trainable params: 14593 (57.00 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [7]:
model_mlp.fit(
    X_train_scaled, y_train, 
    epochs=200, verbose=2, validation_split=0.2
)

Epoch 1/200


2023-09-17 16:22:21.990229: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x1e373950 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2023-09-17 16:22:21.990268: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce GTX 1070, Compute Capability 6.1
2023-09-17 16:22:22.045913: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:255] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2023-09-17 16:22:22.350644: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8904
2023-09-17 16:22:22.491714: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-09-17 16:22:22.607232: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


341/341 - 3s - loss: 57255940587520.0000 - mae: 5338034.0000 - val_loss: 60902787252224.0000 - val_mae: 5457325.5000 - 3s/epoch - 10ms/step
Epoch 2/200
341/341 - 1s - loss: 49541202051072.0000 - mae: 4951820.5000 - val_loss: 42921575317504.0000 - val_mae: 4512971.0000 - 672ms/epoch - 2ms/step
Epoch 3/200
341/341 - 1s - loss: 29140216446976.0000 - mae: 3569555.2500 - val_loss: 25494638559232.0000 - val_mae: 3128543.7500 - 664ms/epoch - 2ms/step
Epoch 4/200
341/341 - 1s - loss: 20345849380864.0000 - mae: 2639769.2500 - val_loss: 20887812177920.0000 - val_mae: 2432304.7500 - 659ms/epoch - 2ms/step
Epoch 5/200
341/341 - 1s - loss: 16981911666688.0000 - mae: 2031841.6250 - val_loss: 18153365045248.0000 - val_mae: 1955061.2500 - 658ms/epoch - 2ms/step
Epoch 6/200
341/341 - 1s - loss: 15095231938560.0000 - mae: 1793065.6250 - val_loss: 16862232444928.0000 - val_mae: 1885700.8750 - 666ms/epoch - 2ms/step
Epoch 7/200
341/341 - 1s - loss: 14286011236352.0000 - mae: 1815956.5000 - val_loss: 16297

<keras.src.callbacks.History at 0x7fb1592e94b0>

In [8]:
model_mlp.evaluate(X_test_scaled, y_test, verbose=0)

[13973050097664.0, 1792704.25]

In [9]:
pred_mlp = model_mlp.predict(X_test_scaled, verbose=0)
_ = regression_evaluation(y_test, pred_mlp)

R2_SCORE: 0.527097, MAPE: 29.128152, MSE: 13973048813926.271484, RMSE: 3738054.148073, MAE: 1792704.042437
