In [None]:
from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
from matplotlib import font_manager, rc
import seaborn as sns
from sklearn.model_selection import train_test_split
from datetime import datetime as dt
import warnings
import matplotlib.font_manager as fm
%config InlineBackend.figure_format = 'retina'
%matplotlib inline

In [None]:
final_data = pd.read_csv('./gdrive/My Drive/빅콘 대상팀/data/model_data.csv')

In [None]:
gu = pd.read_excel('./gdrive/My Drive/빅콘 대상팀/data/지역데이터/구_동.xlsx')
gs = pd.read_csv('./gdrive/My Drive/빅콘 대상팀/data/all_amt.csv',parse_dates=['STD_YMD'])
#gs = gs.drop(['Unnamed: 0'],axis=1)
gs = pd.concat([gs.iloc[:,[0,1]],gs.filter(like='GS')],axis=1)
gs = pd.merge(gs,gu,on='HDONG_NM')
gs['CITY'] = gs['HDONG_GU'].apply(lambda x: x[0:2])
gs_seoul = gs.query('CITY == "서울" & STD_YMD > "2020"')

gs_eat = gs_seoul.iloc[:,[0,1,3]].sort_values(['HDONG_NM','STD_YMD'])
gs_snack = gs_seoul.iloc[:,[0,1,4]].sort_values(['HDONG_NM','STD_YMD'])
gs_drink = gs_seoul.iloc[:,[0,1,5]].sort_values(['HDONG_NM','STD_YMD'])

In [None]:
final_data4 = final_data.drop(['COVID_CNT','covid_p1','sc_m1','cj_m1','covid_p1','최저기온','최고기온','일강수량'],axis=1) #최종사용데이터

In [None]:
dong_list = list(final_data4.HDONG_NM.unique())
dong_list.remove('상계8동')

In [None]:
def build_data(data,dong,cat):

  X = data.query('HDONG_NM==@dong').reset_index(drop=True)
  
  if cat == "식사":
    eat = gs_eat.query('HDONG_NM==@dong').reset_index(drop=True)
    X['self_m7'] = eat['GS_식사'].shift(7)
    X['y'] = eat['GS_식사']
  elif cat == "간식":
    snack = gs_snack.query('HDONG_NM==@dong').reset_index(drop=True)
    X['self_m7'] = snack['GS_간식'].shift(7)
    X['y'] = snack['GS_간식']
  elif cat == "마실거리":
    drink = gs_drink.query('HDONG_NM==@dong').reset_index(drop=True)
    X['self_m7'] = drink['GS_마실거리'].shift(7)
    X['y'] = drink['GS_마실거리']
  
  X.index = X['STD_YMD']
  del X['STD_YMD'],X['HDONG_NM']

  return X

In [None]:
from sklearn.preprocessing import MinMaxScaler 

def minmax_scalar(X):
  idx = X.index
  col = X.columns

  scalar = MinMaxScaler()
  scaled_X = pd.DataFrame(scalar.fit_transform(X))
  scaled_X.index = idx
  scaled_X.columns = col

  return scaled_X

In [None]:
def split_xy(dataset, time_steps, y_column):

  x, y = list(), list()
  for i in range(len(dataset)):
    x_end_number = i + time_steps
    y_end_number = x_end_number + y_column

    if y_end_number > len(dataset):
      break
    tmp_x = np.array(dataset)[i:x_end_number, :]
    tmp_y = np.array(dataset)[x_end_number:y_end_number, -1]
    x.append(tmp_x)
    y.append(tmp_y)
  return np.array(x), np.array(y)

In [None]:
import numpy as np
from sklearn.metrics import mean_squared_log_error
from sklearn.metrics import mean_absolute_error

def RMSLE_fun(origin,pred):
  rmsle = np.sqrt(mean_squared_log_error(origin+1, pred+1))
  return rmsle

In [None]:
def train_test_split(n,X,y):
  if isinstance(X, pd.DataFrame):
    total = X.shape[0]
    X_train,X_test = X.iloc[:total-n, :],X.iloc[-n:, :]
    y_train,y_test = y[:total-n],y[-n:]
  else :
    total = X.shape[0]
    X_train,X_test = X[:total-n, :],X[-n:, :]
    y_train,y_test = y[:total-n],y[-n:]
  return X_train,X_test,y_train,y_test

In [None]:
import keras
from keras.models import Sequential, load_model
from keras.layers import Dense, SimpleRNN, LSTM, GRU
from keras.callbacks import EarlyStopping

In [None]:
def data_pipeline(data, dong, cat, time_steps, y_columns):
  data = build_data(data,dong,cat)
  
  #y = data['y']
  #del data['y']
  min = data['y'].min()
  max = data['y'].max()

  X = minmax_scalar(data)
  #Xy = pd.concat([X,y],axis=1)
  Xy = X.dropna()
  
  X,y = split_xy(Xy,time_steps,y_columns)

  X_train, y_train = X[:-7],y[:-7]
  X_test, y_test = X[-7:],y[-7:]

  X_test=X_test.reshape(-1,time_steps,X_train.shape[2])
  y_test=y_test.reshape(-1,y_columns)



  return X_train,y_train,X_test,y_test,min,max

In [None]:
MSE_eat = []
RMSE_eat = []
MAE_eat = []
RMSLE_eat = []

MSE_snack = []
RMSE_snack = []
MAE_snack = []
RMSLE_snack = []

MSE_drink = []
RMSE_drink = []
MAE_drink = []
RMSLE_drink = []

for dong in dong_list:
  for cat in ['식사', '간식', '마실거리']:

    model = load_model('./gdrive/My Drive/빅콘 대상팀/분석 code/GRU/models/{}{} GRU.hdf5'.format(dong, cat))

    X_train,y_train,X_test,y_test,min,max = data_pipeline(final_data4, dong, cat, 7, 1)

    y_pred = model.predict(X_test, batch_size=1)
    y_pred = y_pred.reshape(-1, 1) *(max-min)+min
    y_test = y_test *(max-min)+min

    mse = np.mean((y_test-y_pred)**2)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred)
    rmsle = RMSLE_fun(np.array(y_test), np.array(y_pred))

    print('{0}, {1}, {2}, {3}, {4}, {5}'.format(dong, cat, mse, rmse, mae, rmsle))

    if cat == '식사':
      MSE_eat.append(mse)
      RMSE_eat.append(rmse)
      MAE_eat.append(mae)
      RMSLE_eat.append(rmsle)
      
    elif cat == '간식':
      MSE_snack.append(mse)
      RMSE_snack.append(rmse)
      MAE_snack.append(mae)
      RMSLE_snack.append(rmsle)
      
    else: 
      MSE_drink.append(mse)
      RMSE_drink.append(rmse)
      MAE_drink.append(mae)
      RMSLE_drink.append(rmsle)

공릉1동, 식사, 0.5004277842893995, 0.7074092056860721, 0.5551705758231051, 0.026747592510797214
공릉1동, 간식, 0.712037470790815, 0.8438231276700201, 0.6390670373099182, 0.05034097408545398
공릉1동, 마실거리, 1.2115534889988118, 1.100705904862335, 0.9922896815708698, 0.03424997964924946
공릉2동, 식사, 0.6614575821266863, 0.8133004255050444, 0.5903433750697553, 0.026091209121813175
공릉2동, 간식, 1.0129546385739931, 1.0064564762442503, 0.8442487378801624, 0.04614075419334631
공릉2동, 마실거리, 1.4126749825565297, 1.1885600458355183, 1.0754755467006123, 0.031451234910119565
광희동, 식사, 0.8776774230812814, 0.9368443964081129, 0.8309238455636171, 0.032466394046993516
광희동, 간식, 1.7854769399509103, 1.3362173999581468, 1.2166565660749167, 0.07163627864403804
광희동, 마실거리, 3.2413146172986598, 1.8003651344376395, 1.3783821280343196, 0.045444016935788964
다산동, 식사, 0.49156363548019455, 0.7011159928857668, 0.6174699990408763, 0.02243504728046846
다산동, 간식, 2.746751048145634, 1.6573325098318785, 1.2769052690778457, 0.06263219217864915
다산동, 마

In [None]:
gru_result = pd.DataFrame({'동':dong_list,
              '식사_MSE':MSE_eat,
              '식사_RMSE':RMSE_eat,
              '식사_MAE':MAE_eat,
              '식사_RMSLE':RMSLE_eat,
              '간식_MSE':MSE_snack,
              '간식_RMSE':RMSE_snack,
              '간식_MAE':MAE_snack,
              '간식_RMSLE':RMSLE_snack,
              '마실거리_MSE':MSE_drink,
              '마실거리_RMSE':RMSE_drink,
              '마실거리_MAE':MAE_drink,
              '마실거리_RMSLE':RMSLE_drink})

In [None]:
gru_result.mean()

식사_MSE        5.197275
식사_RMSE       1.883504
식사_MAE        1.570157
식사_RMSLE      0.051581
간식_MSE        3.135837
간식_RMSE       1.588339
간식_MAE        1.313508
간식_RMSLE      0.060718
마실거리_MSE      7.154878
마실거리_RMSE     2.410009
마실거리_MAE      1.981455
마실거리_RMSLE    0.053843
dtype: float64