In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

In [2]:
df = pd.read_csv('../data/06.05위험운전가속.csv')

In [3]:
acc0 = df.query('위험운전가속 == 0')

In [4]:
X_train, X_test, y_train, y_test = train_test_split(acc0, acc0, random_state=42, shuffle=True, test_size=0.0056)

In [5]:
df = df.query('위험운전가속 != 0')

In [6]:
df = pd.concat([df, X_test], axis=0)
df

Unnamed: 0,날짜,번호판,일일주행거리,누적주행거리,정보발생일시,차량속도,RPM,브레이크,GPS_X,GPS_Y,...,월,일,시,분,초,가속도max,가속도min,가속도max2,가속도,위험운전가속
709,20221201,부산70자1854,1,66445,22120105190400,11,1137,0,129.147649,35.225609,...,12,1,5,19,4,7.959340,5.401764,10.516915,6,1
829,20221201,부산70자1854,2,66446,22120105210400,13,1322,0,129.138040,35.219831,...,12,1,5,21,4,7.758123,5.290563,10.225683,6,1
833,20221201,부산70자1854,2,66446,22120105210800,26,1460,0,129.137892,35.219834,...,12,1,5,21,8,6.601077,4.589684,8.612471,5,1
1026,20221201,부산70자1854,4,66448,22120105242100,9,991,0,129.125022,35.219140,...,12,1,5,24,21,8.167955,5.513215,10.822696,6,1
1083,20221201,부산70자1854,4,66448,22120105251800,8,812,0,129.124679,35.217222,...,12,1,5,25,18,8.275195,5.568950,10.981439,6,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1384279,20221230,부산70자1854,21,72162,22123009190600,24,562,1,129.166023,35.162066,...,12,30,9,19,6,6.764248,4.693844,8.834652,-1,0
3490280,20221213,부산70자1894,19,3091803,22121309051800,1,578,1,129.174542,35.168420,...,12,13,9,5,18,9.087118,5.955880,12.218356,-2,0
1003278,20221222,부산70자1854,165,70543,22122220094000,44,915,0,129.132879,35.172475,...,12,22,20,9,40,5.317101,3.745206,6.888995,0,0
4394920,20221230,부산70자1894,120,3095428,22123016350300,31,1736,0,129.140638,35.167250,...,12,30,16,35,3,6.212917,4.337265,8.088569,2,0


In [7]:
X = df[['시', '분', 'GPS_X', 'GPS_Y']]
y = df['위험운전가속']

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, shuffle=True, test_size=0.3)

In [9]:
from tensorflow.keras.utils import to_categorical

y_train_categorical = to_categorical(y_train, num_classes=4)
y_test_categorical = to_categorical(y_test, num_classes=4)

In [10]:
features = {
    '시': X_train['시'].values,
    '분': X_train['분'].values,
    'GPS_X': X_train['GPS_X'].values,
    'GPS_Y': X_train['GPS_Y'].values,
}
labels = y_train_categorical

In [11]:
predict_features = {
    '시': X_train['시'].values,
    '분': X_train['분'].values,
    'GPS_X': X_train['GPS_X'].values,
    'GPS_Y': X_train['GPS_Y'].values,
}

In [12]:
features

{'시': array([11, 20, 18, ..., 19, 23, 21], dtype=int64),
 '분': array([56,  3,  5, ..., 35,  5, 33], dtype=int64),
 'GPS_X': array([129.124649, 129.167668, 129.16751 , ..., 129.177271, 129.152031,
        129.178835]),
 'GPS_Y': array([35.213512, 35.162029, 35.174219, ..., 35.170273, 35.160333,
        35.176593])}

In [13]:
colname = ['시', '분', 'GPS_X', 'GPS_Y']
unique_values = {}
for col in colname:
    unique_values[col] = X[col].unique()
    print(f"{col}: {len(unique_values[col])}")

시: 22
분: 60
GPS_X: 36737
GPS_Y: 32869


In [18]:
df['GPS_Y'].mean()

35.181028702785625

In [20]:

inputs = {
    '시': tf.keras.Input(shape=(), dtype='int64'),
    '분': tf.keras.Input(shape=(), dtype='int64'),
    'GPS_X': tf.keras.Input(shape=(), dtype='float64'),
    'GPS_Y': tf.keras.Input(shape=(), dtype='float64')
}

hour_output = tf.keras.layers.Normalization(axis=None, mean=243, variance=22)(inputs['시'])
minute_output = tf.keras.layers.Normalization(axis=None, mean=1830, variance=60)(inputs['분'])
GPS_X_output = tf.keras.layers.Normalization(axis=None, mean=129.14, variance=36237)(inputs['GPS_X'])
GPS_Y_output = tf.keras.layers.Normalization(axis=None, mean=35.18, variance=32869)(inputs['GPS_Y'])

outputs = {
    '시': hour_output,
    '분': minute_output,
    'GPS_X': GPS_X_output,
    'GPS_Y': GPS_Y_output,
}

preprocessing_model = tf.keras.Model(inputs, outputs)
preprocessing_model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_19 (InputLayer)          [(None,)]            0           []                               
                                                                                                  
 input_20 (InputLayer)          [(None,)]            0           []                               
                                                                                                  
 input_18 (InputLayer)          [(None,)]            0           []                               
                                                                                                  
 input_17 (InputLayer)          [(None,)]            0           []                               
                                                                                              

In [16]:
# Apply the preprocessing in tf.data.Dataset.map.
dataset = tf.data.Dataset.from_tensor_slices((features, labels)).batch(1)
dataset = dataset.map(lambda x, y: (preprocessing_model(x), y),
                      num_parallel_calls=tf.data.AUTOTUNE)
# Display a preprocessed input sample.
next(dataset.take(1).as_numpy_iterator())

({'시': array([11], dtype=int64),
  '분': array([19], dtype=int64),
  'GPS_X': array([2011], dtype=int64),
  'GPS_Y': array([20918], dtype=int64)},
 array([[0., 1., 0., 0.]], dtype=float32))

In [29]:
inputs = {
    '시': tf.keras.Input(shape=(), dtype='float64'),
    '분': tf.keras.Input(shape=(), dtype='float64'),
    'GPS_X': tf.keras.Input(shape=(), dtype='float64'),
    'GPS_Y': tf.keras.Input(shape=(), dtype='float64')
}

outputs = tf.keras.layers.Concatenate()([
    tf.expand_dims(inputs['시'], 1),
    tf.expand_dims(inputs['분'], 2),
    tf.expand_dims(inputs['GPS_X'], 3),
    tf.expand_dims(inputs['GPS_Y'], 4),
])

from keras.models import Sequential, Model
from keras.layers import Input, Embedding, Dense, concatenate, Flatten, Conv1D, GlobalMaxPooling1D, LSTM

outputs = tf.expand_dims(outputs, axis=2)

# CNN Layer
cnn_output = Conv1D(filters=128, kernel_size=1, activation='relu')(outputs)  # kernel_size 수정
cnn_output = GlobalMaxPooling1D()(cnn_output)

# LSTM Layer
lstm_output = LSTM(128)(outputs)

# Dense Layer
dense_input = concatenate([cnn_output, lstm_output], axis=2)
dense_output = Dense(64, activation='relu', name='Dense_output')(dense_input)

outputs = Dense(4, activation='softmax', name="Prediction_output")(dense_output)

# outputs = tf.keras.layers.Dense(1)(outputs)
training_model = tf.keras.Model(inputs, outputs)
training_model.summary()

ValueError: Exception encountered when calling layer "tf.expand_dims_21" (type TFOpLambda).

dim 2 not in the interval [-2, 1]. for '{{node tf.expand_dims_21/ExpandDims}} = ExpandDims[T=DT_DOUBLE, Tdim=DT_INT32](Placeholder, tf.expand_dims_21/ExpandDims/dim)' with input shapes: [?], [] and with computed input tensors: input[1] = <2>.

Call arguments received by layer "tf.expand_dims_21" (type TFOpLambda):
  • input=tf.Tensor(shape=(None,), dtype=float64)
  • axis=2
  • name=None

In [20]:
outputs.shape

TensorShape([None, 4])

In [21]:
training_model.compile(loss=tf.keras.losses.CategoricalCrossentropy(from_logits=False),
                       optimizer=tf.keras.optimizers.Adam(),
                       metrics=['accuracy'])

training_model.fit(dataset, epochs=10, batch_size=32)

Epoch 1/10