#1. Mount Drive

In [None]:
from google.colab import drive
drive.mount('/gdrive')

In [None]:
!ls -al '/gdrive/My Drive/data'

#2. Korean *Characters*

In [None]:
!sudo apt-get install -y fonts-nanum
!sudo fc-cache -fv
!rm ~/.cache/matplotlib -rf

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.rc('font', family='NanumBarunGothic')

In [None]:
import pandas as pd

In [None]:
# Load a file under "data" folder
file = "/gdrive/My Drive/data/data_2020_1_12.csv"
data = pd.read_csv(file)

In [None]:
data.head()

In [None]:
#101,서울 105,기흥 110,목천 115,대전 120,황간 125,남구미 130,동김천 135,경주 140,부산
#101,서울(406.94) 105,기흥(387.19) 110,목천(329.91) 115,대전(271.94) 120,황간(222.38) 125,남구미(167.25) 130,동김천(192.00) 135,경주(68.26) 140,부산(0)
# Distance from Seoul : 0, 19.75, 77.03, 135, 184.56, 214.94, 239.69, 338.68, 406.94
# Distance from Seoul : 0, 20, 77, 135, 185, 240, 215, 339, 407
data['도착영업소코드'] = data['도착영업소코드'].map({105: 20, 110: 77, 115: 135, 120: 185, 125: 240, 130: 215, 135: 339, 140: 407})

In [None]:
data.rename(columns={"도착영업소코드":"거리"}, inplace=True)

In [None]:
data.head()

In [None]:
data_destination = data[data['거리'].isin([407])]

In [None]:
data_destination

In [None]:
data_time = data_destination.groupby(['집계시','요일','거리'])['통행시간'].mean()

In [None]:
data_time

In [None]:
data_distance = data_time.unstack(level=-1)

In [None]:
data_destination

In [None]:
data_distance.dropna()

In [None]:
data_out = data_distance.reset_index()

In [None]:
data_out

In [None]:
stat = data_out.describe()
print(stat)

In [None]:
value_25 = stat[407][4]
value_75 = stat[407][6]
print(value_25, value_75)

In [None]:
data_out['Grade'] = 0
data_out

In [None]:
def get_grade(input):
    if input > value_25:
        if input > value_75:
            output = 2
        else:
            output =1
    else:
        output = 0
    return output

In [None]:
data_grade = pd.DataFrame(columns=['time','day','grade'])

In [None]:
data_grade

In [None]:
for index, item in data_out.iterrows():
    print(index, item['집계시'],item['요일'])
    data_grade = data_grade.append({
        'time' : item['집계시'],
        'day' : item['요일'],
        'grade' : get_grade(item[407])
    }, ignore_index = True)

In [None]:
data_grade

In [None]:
# Dataframe to List
data_list = data_grade.values.tolist()

In [None]:
# Generate train dataset
x_train = [ r[:2] for r in data_list]
y_train = [ int(r[-1]) for r in data_list]

In [None]:
import tensorflow as tf
# One hot encode [0, 1, 2] to [[1,0,0], [0,1,0], [0,0,1]]
y_one_hot = tf.keras.utils.to_categorical(y_train)

In [None]:
print(x_train[:3], y_train[:3], y_one_hot[:3])

#4. Multinomial Classification using Tensorflow

In [None]:
learning_rate = 1e-2
learning_epochs = 1000

In [None]:
# Stochastic gradient descent (SGD) Optimizer
sgd = tf.keras.optimizers.SGD(learning_rate=learning_rate)

model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Dense(3, input_shape=(2,), activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer=sgd,
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
import numpy as np

history = model.fit(np.array(x_train), np.array(y_one_hot), epochs=learning_epochs)

In [None]:
plt.figure(figsize=(10, 10))
plt.plot(history.history['loss'])
plt.title('비용 학습량')
plt.ylabel('비용')
plt.xlabel('학습량')
plt.show()

In [None]:
print("%20s %20s %20s" % ('학습량', '비용', '정확도')+'\n')
for step in range(learning_epochs):
    if step % 100 == 0:
        cost_val = history.history['loss'][step]
        acc_val = history.history['accuracy'][step]
        print("%20i %20.5f %20.5f" % (step, cost_val, acc_val))

In [None]:
Time = 14 #@param {type:"slider", min:0, max:23, step:1}
Day = 3 #@param {type:"slider", min:0, max:6, step:1}

In [None]:
time_condition = data_out['집계시'] == Time
day_condition = data_out['요일'] == Day
data_out[time_condition & day_condition]

In [None]:
input = [Time, Day]
result = model.predict(np.array([input]))

grade_list = ['빠름', '보통', '느림']
grade_index = np.argmax(result[0])
grdae = grade_list[grade_index]
print("%30s" % ('속도 등급')+'\n')
print("%30s" % (grdae)+'\n')
print(result[0], grade_index)