In [None]:
#!sudo apt-get install -y fonts-nanum
#!sudo fc-cache -fv
#!rm ~/.cache/matplotlib -rf

In [None]:
from google.colab import auth
auth.authenticate_user()

from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from datetime import datetime as dt
import warnings
 
warnings.filterwarnings("ignore")
plt.rc('font', family='NanumBarunGothic') 
  

from sklearn.model_selection import train_test_split

%matplotlib inline

  import pandas.util.testing as tm


## 데이터

In [None]:
# 통화 건 수
delivery = pd.read_csv('./gdrive/My Drive/빅콘 대상팀/data/delivery/DELIVERY_FINAL_V1.csv', parse_dates=['일자'], encoding='cp949')
# 날씨
seoul_weather = pd.read_csv('./gdrive/My Drive/빅콘 대상팀/data/일별데이터/WEATHER_SEOUL.csv',parse_dates=['일시'])
seoul_weather = seoul_weather.iloc[:,2:]
seoul_weather.columns = ['STD_YMD','기온','최저기온','최고기온','일강수량']
seoul_weather['일교차'] = seoul_weather['최고기온']-seoul_weather['최저기온']
seoul_weather = seoul_weather.interpolate(method='values')  
seoul_weather = seoul_weather.query('STD_YMD>=2020')
# 미세먼지
seoul_dust = pd.read_csv('./gdrive/My Drive/빅콘 대상팀/data/일별데이터/20미세먼지.csv',encoding='cp949',parse_dates=['date'])
seoul_dust.columns=['STD_YMD','dust']
# 공휴일
seoul_holiday = pd.read_csv('./gdrive/My Drive/빅콘 대상팀/data/일별데이터/20holiday.csv',encoding='cp949',parse_dates=['date'])
seoul_holiday.columns.values[0]='STD_YMD'

In [None]:
# 데이터 합치기 
delivery = pd.merge(delivery, search, left_on=['일자'], right_on=['STD_YMD'])
other = pd.merge(pd.merge(seoul_weather, seoul_dust), seoul_holiday)
data = pd.merge(delivery, other, left_on=['일자'], right_on=['STD_YMD'])

data = data.drop(['시군구', 'STD_YMD_x', 'STD_YMD_y'], axis=1)

In [None]:
# 데이터 전처리 
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()

def minmax_scalar(X):
  idx = X.index
  col = X.columns

  scalar = MinMaxScaler()
  scaled_X = pd.DataFrame(scalar.fit_transform(X))
  scaled_X.index = idx
  scaled_X.columns = col

  return scaled_X

In [None]:
data = data.groupby(['행정동', '일자']).mean()
data = data[['통화건수', '기온', '최저기온', '최고기온', '일강수량', '일교차', 'dust']]
data['y'] = data['통화건수']

In [None]:
# 최종 데이터 
data = data.reset_index(level=0)
dong_list = list(data['행정동'].unique()) 

## 모델링

In [None]:
import keras
from keras.models import Sequential
from keras.layers import Dense, SimpleRNN, LSTM, GRU, Bidirectional
from keras.callbacks import EarlyStopping
from keras.layers import Dense, Activation, Flatten
from sklearn.metrics import mean_squared_log_error
from sklearn.metrics import mean_absolute_error

In [None]:
# 시계열 데이터로 만들기 
def split_xy3(dataset, time_steps, y_column):
  x, y = list(), list()
  for i in range(len(dataset)):
    x_end_number = i + time_steps
    y_end_number = x_end_number + y_column

    if y_end_number > len(dataset):
      break
    tmp_x = np.array(dataset)[i:x_end_number, :]
    tmp_y = np.array(dataset)[x_end_number:y_end_number, -1]
    x.append(tmp_x)
    y.append(tmp_y)
  return np.array(x), np.array(y)
data


def RMSLE_fun(origin,pred):
  rmsle = np.sqrt(mean_squared_log_error(origin+1, pred+1))
  return rmsle

# LSTM 
def LSTM_fun(data, timesteps):
  X, y = split_xy3(dong_data, 7, 1)
  X_train, y_train = X[:-7],y[:-7]
  X_test, y_test = X[-7:],y[-7:]
  X_test = X_test.reshape(1, 7, X_train.shape[2])
  y_test = y_test.reshape(-1, 1)

  model = Sequential()
  model.add(LSTM(100, input_shape = (7, X_train.shape[2])))
  #model.add(Dense(10))
  model.add(Dense(1))

  model.compile(optimizer='adam', loss='mse')
  early_stopping = EarlyStopping(monitor='val_loss', patience=30, mode='min', restore_best_weights=True)
  model.fit(X_train, y_train, epochs=5000, batch_size=32, verbose=0, callbacks=[early_stopping], validation_data = (X_test, y_test))

  y_pred = model.predict(X_test, batch_size=1)
  y_pred = y_pred.reshape(-1,1)

  mse = np.mean((y_test-y_pred)**2)

  return mse

# GRU 
def GRU_fun(data,min,max, timesteps):
  X, y = split_xy3(dong_data, 7, 1)
  X_train, y_train = X[:-7],y[:-7]
  X_test, y_test = X[-7:],y[-7:]
  X_test = X_test.reshape(-1, 7, X_train.shape[2])
  y_test = y_test.reshape(-1,1)

  model = Sequential()
  model.add(GRU(512, input_shape = (7, X_train.shape[2])))
  model.add(Dense(100))
  model.add(Dense(1))

  model.compile(optimizer='adam', loss='mse')
  early_stopping = EarlyStopping(monitor='val_loss', patience=30, mode='min', restore_best_weights=True)
  model.fit(X_train, y_train, epochs=5000, batch_size=32, verbose=0, callbacks=[early_stopping], validation_data = (X_test, y_test))
  model.save('./gdrive/My Drive/빅콘 대상팀/분석 code/GRU/delivery models/{}GRU.hdf5'.format(dong_list[i]))

  y_pred = model.predict(X_test, batch_size=1)
  #y_pred = y_pred.reshape(-1,1)*(max-min)+min
  #y_test = y_test*(max-min)+min

  mse = np.mean((y_test-y_pred)**2)
  rmse = np.sqrt(mse)
  mae = mean_absolute_error(y_test, y_pred)
  rmsle = RMSLE_fun(np.array(y_test),np.array(y_pred))

  return mse, rmse, mae, rmsle, y_pred

In [None]:
MSE = [] 
RMSE = []
MAE = []
RMSLE = []
pred = []

for i in range(0, len(dong_list)):
  dong_data = data[data['행정동'] == dong_list[i]]
  dong_data = dong_data.iloc[:, 2:]
  #print(dong_data.iloc[:, 1:])
  dong_data = dong_data.dropna()
  #min = dong_data['y'].min()
  #max = dong_data['y'].max()
  dong_data = minmax_scalar(dong_data)

  mse, rmse, mae, rmlse, y_pred = GRU_fun(dong_data,min,max, 7)
  #print()

  MSE.append(mse)
  RMSE.append(rmse)
  MAE.append(mae)
  RMSLE.append(rmlse)
  pred.append(y_pred)

In [None]:
GRU_result = pd.DataFrame({'dong':dong_list,
              'MSE':MSE,
              'RMSE':RMSE,
              'MAE':MAE,
              'RMSLE':RMSLE})
print(GRU_result)
GRU_result.mean()

      dong       MSE      RMSE       MAE     RMSLE
0     공릉1동  0.005217  0.072231  0.062240  0.032879
1     공릉2동  0.005446  0.073795  0.064611  0.033909
2      광희동  0.000049  0.007002  0.004849  0.003514
3      다산동  0.001945  0.044100  0.038905  0.017260
4      동화동  0.003485  0.059033  0.053544  0.024270
5       명동  0.012569  0.112110  0.094814  0.045979
6    상계10동  0.003213  0.056687  0.042266  0.023660
7     상계1동  0.002612  0.051105  0.044639  0.020361
8     상계2동  0.003921  0.062615  0.048642  0.025968
9   상계3.4동  0.001765  0.042015  0.034643  0.017442
10    상계5동  0.002562  0.050617  0.045507  0.021328
11  상계6.7동  0.001891  0.043490  0.032697  0.017562
12    상계8동  0.002130  0.046147  0.034821  0.020835
13    상계9동  0.002991  0.054693  0.043025  0.021996
14     소공동  0.033452  0.182898  0.161767  0.073103
15    신당5동  0.002539  0.050385  0.042715  0.019430
16     신당동  0.004310  0.065651  0.058671  0.025800
17     약수동  0.002600  0.050989  0.038374  0.019172
18    월계1동  0.046322  0.215225 

MSE      0.012582
RMSE     0.090906
MAE      0.075291
RMSLE    0.038471
dtype: float64