In [None]:
from google.colab import drive  
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [None]:
import pandas as pd
import numpy as np
# 결측값 확인 패키지
import missingno as msno

from matplotlib import pyplot as plt
import seaborn as sns

import warnings
warnings.filterwarnings("ignore")

import math # POW함수 

from urllib.request import  urlopen, Request
from urllib.parse import urlencode,quote_plus,unquote
import requests
#XML 파일 읽기
import xml.etree.ElementTree as el

from matplotlib import font_manager, rc
plt.style.use('fivethirtyeight')


In [None]:
PATH='/content/gdrive/MyDrive/개인 공부/dacon/dacon_energy/dacon_energy'

In [None]:
train = pd.read_csv(PATH + '/train.csv', encoding='cp949', header=0)
mis_test = pd.read_csv(PATH + '/test.csv', encoding='cp949', header=0)
sub = pd.read_csv(PATH + '/sample_submission.csv', encoding='cp949')

In [None]:
train.columns = ['num','datetime','power(kWh)','tempe(°C)','wind(m/s)','hum(%)','rain(mm)','sol(hr)','ne_cool','sol_energy']
mis_test.columns = ['num','datetime','tempe(°C)','wind(m/s)','hum(%)','rain(mm)','sol(hr)','ne_cool','sol_energy']

In [None]:
#건물별로 '비전기냉방설비운영'과 '태양광보유'를 판단해 test set의 결측치를 보간해줍니다.
train[['num', 'ne_cool','sol_energy']]
ice={}
hot={}
count=0
for i in range(0, len(train), len(train)//60):
    count +=1
    ice[count]=train.loc[i,'ne_cool']
    hot[count]=train.loc[i,'sol_energy']

In [None]:
for i in range(len(mis_test)):
    mis_test.loc[i, 'ne_cool']=ice[mis_test['num'][i]]
    mis_test.loc[i, 'sol_energy']=hot[mis_test['num'][i]]

In [None]:
#시간 변수와 요일 변수를 추가해봅니다.
def time(x):
    return int(x[-2:])

train['time']=train['datetime'].apply(lambda x: time(x))
mis_test['time']=mis_test['datetime'].apply(lambda x: time(x))

# weekday() 함수 : 요일 반환 (0:월, 1:화, 2:수, 3:목, 4:금, 5:토, 6:일)
def weekday(x):
    return pd.to_datetime(x[:10]).weekday()

train['weekday']=train['datetime'].apply(lambda x :weekday(x))
mis_test['weekday']=mis_test['datetime'].apply(lambda x :weekday(x))

In [None]:
test = mis_test.interpolate(method='values')
test

Unnamed: 0,num,datetime,tempe(°C),wind(m/s),hum(%),rain(mm),sol(hr),ne_cool,sol_energy,time,weekday
0,1,2020-08-25 00,27.800000,1.500000,74.000000,0.0,0.000000,0.0,0.0,0,1
1,1,2020-08-25 01,27.633333,1.366667,75.333333,0.0,0.000000,0.0,0.0,1,1
2,1,2020-08-25 02,27.466667,1.233333,76.666667,0.0,0.000000,0.0,0.0,2,1
3,1,2020-08-25 03,27.300000,1.100000,78.000000,0.0,0.000000,0.0,0.0,3,1
4,1,2020-08-25 04,26.900000,1.166667,79.666667,0.0,0.000000,0.0,0.0,4,1
...,...,...,...,...,...,...,...,...,...,...,...
10075,60,2020-08-31 19,28.633333,3.566667,66.000000,0.0,0.533333,1.0,1.0,19,0
10076,60,2020-08-31 20,28.266667,3.833333,67.000000,0.0,0.266667,1.0,1.0,20,0
10077,60,2020-08-31 21,27.900000,4.100000,68.000000,0.0,0.000000,1.0,1.0,21,0
10078,60,2020-08-31 22,27.900000,4.100000,68.000000,0.0,0.000000,1.0,1.0,22,0


In [None]:
# 불쾌지수 계산 (T : 기온, H : 습도)
def discomfort(data, T, H):
  data['discomfort'] = 1.8 * T - 0.55 * (1-H) * (1.8 * T - 26) + 32

# 체감온도 계산을 위해서 풍속에 0.15 제곱이 필요
def get_pow(series):
    return math.pow(series, 0.15)

# 체감온도 계산 (T : 기온, V : 풍속)
def sensible(data, T, V):
  V = V.apply(get_pow)
  data['sensible'] = 13.12 + 0.6215 * T - 11.37 * V + 0.3965 * V * T

In [None]:
discomfort(train,train['tempe(°C)'] , train['hum(%)'] )
sensible(train,train['tempe(°C)'] , train['wind(m/s)'] )

In [None]:
discomfort(test,test['tempe(°C)'] , test['hum(%)'] )
sensible(test,test['tempe(°C)'] , test['wind(m/s)'] )

In [None]:
print(len(train))
print(len(test))

122400
10080


# 코로나 확진자 수 파생변수 생성


In [None]:
#서비스url
serviceUrl = 'http://openapi.data.go.kr/openapi/service/rest/Covid19/getCovid19InfStateJson'
#인증키
serviceKey = 'go23rIXCgAhqPLaRenUm7PbVOVandRtKNaHZWoquQGF0bYKkiG8oJELCQdzvH7kUttrNa1pDH3rmDLJSjwVLIA=='
serviceKey_decode=unquote(serviceKey)

pageNo = '1'
numOfRows = '10'
startCreateDt = '20200531'
endCreateDt = '20200831'
#api문서대로 파라미터를 설정합니다.
parameters = {"serviceKey":serviceKey_decode,"pageNo":pageNo,"numOfRows":numOfRows,"startCreateDt":startCreateDt,"endCreateDt":endCreateDt}

#get요청을 보냅니다.
response = requests.get(serviceUrl,params=parameters)
#xml형태이고 String객체로 받아온것을 알 수 있습니다.
print(response.text)

<?xml version="1.0" encoding="UTF-8" standalone="yes"?><response><header><resultCode>00</resultCode><resultMsg>NORMAL SERVICE.</resultMsg></header><body><items><item><accDefRate>1.061062989</accDefRate><accExamCnt>1937689</accExamCnt><createDt>2020-08-31 00:00:00.000</createDt><deathCnt>324</deathCnt><decideCnt>19946</decideCnt><seq>225</seq><stateDt>20200831</stateDt><stateTime>00:00</stateTime><updateDt>2021-10-07 10:30:51.51</updateDt></item><item><accDefRate>1.055542725</accDefRate><accExamCnt>1924170</accExamCnt><createDt>2020-08-30 00:00:00.000</createDt><deathCnt>323</deathCnt><decideCnt>19698</decideCnt><seq>224</seq><stateDt>20200830</stateDt><stateTime>00:00</stateTime><updateDt>2021-10-07 10:30:51.51</updateDt></item><item><accDefRate>1.045608675</accDefRate><accExamCnt>1909329</accExamCnt><createDt>2020-08-29 00:00:00.000</createDt><deathCnt>321</deathCnt><decideCnt>19399</decideCnt><seq>223</seq><stateDt>20200829</stateDt><stateTime>00:00</stateTime><updateDt>2021-10-07 10

In [None]:
if response.status_code==200:
    #xml파서객체로 받아온다
    tree=el.fromstring(response.text)
    #item태그 사이에 있는 모든태그들을 iter로 받아옴
    iter=tree.iter(tag="item")

    str = ''
    corona_data = pd.DataFrame()
    
    #iter로 순회 후 데이터 읽기
    for element in iter:
        createDt = element.find('createDt') #일시
        decideCnt = element.find('decideCnt') #확진자

        # data frame 만들기 
        corona_data = corona_data.append({'date' : createDt.text, 'decideCnt' : decideCnt.text},ignore_index=True)
        str += "날짜 : {}  확진자 수 : {}".format(createDt.text, decideCnt.text)

    print(str)

날짜 : 2020-08-31 00:00:00.000  확진자 수 : 19946날짜 : 2020-08-30 00:00:00.000  확진자 수 : 19698날짜 : 2020-08-29 00:00:00.000  확진자 수 : 19399날짜 : 2020-08-28 00:00:00.000  확진자 수 : 19076날짜 : 2020-08-27 00:00:00.000  확진자 수 : 18705날짜 : 2020-08-26 00:00:00.000  확진자 수 : 18264날짜 : 2020-08-25 00:00:00.000  확진자 수 : 17944날짜 : 2020-08-24 00:00:00.000  확진자 수 : 17664날짜 : 2020-08-23 00:00:00.000  확진자 수 : 17398날짜 : 2020-08-22 00:00:00.000  확진자 수 : 17002날짜 : 2020-08-21 00:00:00.000  확진자 수 : 16670날짜 : 2020-08-20 00:00:00.000  확진자 수 : 16346날짜 : 2020-08-19 00:00:00.000  확진자 수 : 16058날짜 : 2020-08-18 00:00:00.000  확진자 수 : 15761날짜 : 2020-08-17 00:00:00.000  확진자 수 : 15515날짜 : 2020-08-16 00:00:00.000  확진자 수 : 15318날짜 : 2020-08-15 00:00:00.000  확진자 수 : 15039날짜 : 2020-08-14 00:00:00.000  확진자 수 : 14873날짜 : 2020-08-13 00:00:00.000  확진자 수 : 14770날짜 : 2020-08-12 00:00:00.000  확진자 수 : 14714날짜 : 2020-08-11 00:00:00.000  확진자 수 : 14660날짜 : 2020-08-10 00:00:00.000  확진자 수 : 14626날짜 : 2020-08-09 00:00:00.000  확진자 수 : 14598날짜 : 2020-0

In [None]:
corona_data = corona_data.sort_values(['date'])
corona_data = corona_data.reset_index()

In [None]:
corona_join = []
for i in range(len(corona_data)-1):
  corona_count = int(corona_data['decideCnt'][i+1])-int(corona_data['decideCnt'][i])
  corona_join.append(corona_count)

In [None]:
corona_join = pd.DataFrame(corona_join, columns = ["corona_count"])

In [None]:
corona_join

Unnamed: 0,corona_count
0,35
1,38
2,49
3,39
4,39
...,...
87,441
88,371
89,323
90,299


In [None]:
corona_data = corona_data[1:][['date','decideCnt']].reset_index()

In [None]:
corona_result = pd.concat([corona_data[['date','decideCnt']], pd.DataFrame(corona_join)],axis = 1)

In [None]:
from datetime import datetime,date
corona_result['date'] = corona_result['date'].apply(lambda x: datetime.strftime(pd.to_datetime(x), "%Y-%m-%d"))

In [None]:
corona_result.head()

Unnamed: 0,date,decideCnt,corona_count
0,2020-06-01,11503,35
1,2020-06-02,11541,38
2,2020-06-03,11590,49
3,2020-06-04,11629,39
4,2020-06-05,11668,39


In [None]:
train['datetime']

0         2020-06-01 00
1         2020-06-01 01
2         2020-06-01 02
3         2020-06-01 03
4         2020-06-01 04
              ...      
122395    2020-08-24 19
122396    2020-08-24 20
122397    2020-08-24 21
122398    2020-08-24 22
122399    2020-08-24 23
Name: datetime, Length: 122400, dtype: object

In [None]:
train['date'] = train['datetime'].apply(lambda x: datetime.strftime(pd.to_datetime(x), "%Y-%m-%d"))
test['date'] = test['datetime'].apply(lambda x: datetime.strftime(pd.to_datetime(x), "%Y-%m-%d"))

In [None]:
train

Unnamed: 0,num,datetime,power(kWh),tempe(°C),wind(m/s),hum(%),rain(mm),sol(hr),ne_cool,sol_energy,time,weekday,discomfort,sensible,date
0,1,2020-06-01 00,8179.056,17.6,2.5,92.0,0.8,0.0,0.0,0.0,0,0,347.964,19.019755,2020-06-01
1,1,2020-06-01 01,8135.640,17.7,2.9,91.0,0.3,0.0,0.0,0.0,1,0,353.930,19.014988,2020-06-01
2,1,2020-06-01 02,8107.128,17.5,3.2,91.0,0.0,0.0,0.0,0.0,2,0,335.750,18.720323,2020-06-01
3,1,2020-06-01 03,8048.808,17.1,3.2,91.0,0.0,0.0,0.0,0.0,3,0,299.390,18.282891,2020-06-01
4,1,2020-06-01 04,8043.624,17.0,3.3,92.0,0.0,0.0,0.0,0.0,4,0,292.830,18.148033,2020-06-01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122395,60,2020-08-24 19,4114.368,27.8,2.3,68.0,0.0,0.7,1.0,1.0,19,0,967.914,30.004183,2020-08-24
122396,60,2020-08-24 20,3975.696,27.3,1.2,71.0,0.0,0.0,1.0,1.0,20,0,972.030,29.526274,2020-08-24
122397,60,2020-08-24 21,3572.208,27.3,1.8,71.0,0.0,0.0,1.0,1.0,21,0,972.030,29.491116,2020-08-24
122398,60,2020-08-24 22,3299.184,27.1,1.8,74.0,0.0,0.0,1.0,1.0,22,0,995.397,29.280207,2020-08-24


In [None]:
train = pd.merge(train, corona_result, on='date', how='inner')
test = pd.merge(test, corona_result, on='date', how='inner')

In [None]:
print(len(train))
print(len(test))

122400
10080


In [None]:
train

Unnamed: 0,num,datetime,power(kWh),tempe(°C),wind(m/s),hum(%),rain(mm),sol(hr),ne_cool,sol_energy,time,weekday,discomfort,sensible,date,decideCnt,corona_count
0,1,2020-06-01 00,8179.056,17.6,2.5,92.0,0.8,0.0,0.0,0.0,0,0,347.964,19.019755,2020-06-01,11503,35
1,1,2020-06-01 01,8135.640,17.7,2.9,91.0,0.3,0.0,0.0,0.0,1,0,353.930,19.014988,2020-06-01,11503,35
2,1,2020-06-01 02,8107.128,17.5,3.2,91.0,0.0,0.0,0.0,0.0,2,0,335.750,18.720323,2020-06-01,11503,35
3,1,2020-06-01 03,8048.808,17.1,3.2,91.0,0.0,0.0,0.0,0.0,3,0,299.390,18.282891,2020-06-01,11503,35
4,1,2020-06-01 04,8043.624,17.0,3.3,92.0,0.0,0.0,0.0,0.0,4,0,292.830,18.148033,2020-06-01,11503,35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122395,60,2020-08-24 19,4114.368,27.8,2.3,68.0,0.0,0.7,1.0,1.0,19,0,967.914,30.004183,2020-08-24,17664,266
122396,60,2020-08-24 20,3975.696,27.3,1.2,71.0,0.0,0.0,1.0,1.0,20,0,972.030,29.526274,2020-08-24,17664,266
122397,60,2020-08-24 21,3572.208,27.3,1.8,71.0,0.0,0.0,1.0,1.0,21,0,972.030,29.491116,2020-08-24,17664,266
122398,60,2020-08-24 22,3299.184,27.1,1.8,74.0,0.0,0.0,1.0,1.0,22,0,995.397,29.280207,2020-08-24,17664,266


In [None]:
train = train.drop(['date','decideCnt','datetime'], axis = 1)

In [None]:
test = test.drop(['date','decideCnt','datetime'], axis = 1)

In [None]:
len(test)

10080

In [None]:
len(train)

122400

In [None]:
train

Unnamed: 0,num,power(kWh),tempe(°C),wind(m/s),hum(%),rain(mm),sol(hr),ne_cool,sol_energy,time,weekday,discomfort,sensible,corona_count
0,1,8179.056,17.6,2.5,92.0,0.8,0.0,0.0,0.0,0,0,347.964,19.019755,35
1,1,8135.640,17.7,2.9,91.0,0.3,0.0,0.0,0.0,1,0,353.930,19.014988,35
2,1,8107.128,17.5,3.2,91.0,0.0,0.0,0.0,0.0,2,0,335.750,18.720323,35
3,1,8048.808,17.1,3.2,91.0,0.0,0.0,0.0,0.0,3,0,299.390,18.282891,35
4,1,8043.624,17.0,3.3,92.0,0.0,0.0,0.0,0.0,4,0,292.830,18.148033,35
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
122395,60,4114.368,27.8,2.3,68.0,0.0,0.7,1.0,1.0,19,0,967.914,30.004183,266
122396,60,3975.696,27.3,1.2,71.0,0.0,0.0,1.0,1.0,20,0,972.030,29.526274,266
122397,60,3572.208,27.3,1.8,71.0,0.0,0.0,1.0,1.0,21,0,972.030,29.491116,266
122398,60,3299.184,27.1,1.8,74.0,0.0,0.0,1.0,1.0,22,0,995.397,29.280207,266


In [None]:
All = pd.concat([train,test], axis = 0)

# BI-LSTM

### time step = 5

In [None]:
#Library Imports
import numpy as np
import pandas as pd
import math
import os
import matplotlib.pyplot as plt

from sklearn.metrics import mean_absolute_error

#######딥러닝 라이브러리##########
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Reshape, GRU, RNN
from tensorflow.keras.layers import Dense, Embedding, Bidirectional, LSTM, Concatenate, Dropout
import keras.backend as K 
from tensorflow.keras import Input, Model
from keras.layers import Dense, Activation, Flatten
from keras.callbacks import EarlyStopping
tf.keras.backend.set_floatx('float64')

In [None]:
all_pred = []
for i in range(1,61):
  print("---------------------------------------------")
  print(i)
  All_1 = All[All['num'] == i].reset_index(drop = True)

  data_y = []
  data_X = []

  X = All_1.drop(['power(kWh)','num'], axis = 1)
  y = All_1['power(kWh)']

  for j in range(len(X)-5):
    X_data = X.iloc[0+j:5+j].values.tolist()
    data_X.append(X_data)

  for w in range(len(y)-5):
    y_data = y.loc[5+w]
    data_y.append(y_data)

  train_X = data_X[:2035]
  train_y = data_y[:2035]

  test_X = data_X[2035:]
  test_y = data_y[2035:]

  K.clear_session()
  visible = Input(shape=(5,12))
  forward_layer = LSTM(10, activation='linear', return_sequences=True)
  backward_layer = LSTM(10, activation='linear', return_sequences=True,
                        go_backwards=True)
  hidden1 = Bidirectional(forward_layer, backward_layer=backward_layer)(visible)
  hidden2 = Flatten()(hidden1)
  pred = Dense(1,activation='linear')(hidden2)

  model = Model(inputs=visible, outputs=pred)

  model.compile(loss='mse', optimizer='adam')

  early_stop = EarlyStopping(monitor='loss', patience=3, verbose=1)

  model.fit(train_X, train_y, epochs=100, batch_size=8, verbose=1, callbacks=[early_stop])

  pred = model.predict(test_X)

  all_pred = all_pred + pred.tolist()

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 12: early stopping
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 11: early stopping
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 29: early stopping
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 8: early stopping
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/

In [None]:
# 10080
len(all_pred)

10080

In [None]:
predict_1 = sum(all_pred, [])

In [None]:
sub['answer'] = predict_1

In [None]:
sub.to_csv('/content/gdrive/MyDrive/개인 공부/dacon/dacon_energy/LSTM_baseline_submission1.csv', index=False)

# BI-GRU

In [None]:
#Library Imports
import numpy as np
import pandas as pd
import math
import os
import matplotlib.pyplot as plt

from sklearn.metrics import mean_absolute_error

#######딥러닝 라이브러리##########
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Reshape, GRU, RNN
from tensorflow.keras.layers import Dense, Embedding, Bidirectional, LSTM, Concatenate, Dropout
import keras.backend as K 
from tensorflow.keras import Input, Model
from keras.layers import Dense, Activation, Flatten
from keras.callbacks import EarlyStopping
tf.keras.backend.set_floatx('float64')

In [None]:
all_pred = []
for i in range(1,61):
  print("---------------------------------------------")
  print(i)
  All_1 = All[All['num'] == i].reset_index(drop = True)

  data_y = []
  data_X = []

  X = All_1.drop(['power(kWh)','num'], axis = 1)
  y = All_1['power(kWh)']

  for j in range(len(X)-5):
    X_data = X.iloc[0+j:5+j].values.tolist()
    data_X.append(X_data)

  for w in range(len(y)-5):
    y_data = y.loc[5+w]
    data_y.append(y_data)

  train_X = data_X[:2035]
  train_y = data_y[:2035]

  test_X = data_X[2035:]
  test_y = data_y[2035:]

  K.clear_session()
  visible = Input(shape=(5,12))
  forward_layer = GRU(10, activation='linear', return_sequences=True)
  backward_layer = GRU(10, activation='linear', return_sequences=True,
                        go_backwards=True)
  hidden1 = Bidirectional(forward_layer, backward_layer=backward_layer)(visible)
  hidden2 = Flatten()(hidden1)
  pred = Dense(1,activation='linear')(hidden2)

  model = Model(inputs=visible, outputs=pred)

  model.compile(loss='mse', optimizer='adam')

  early_stop = EarlyStopping(monitor='loss', patience=3, verbose=1)

  model.fit(train_X, train_y, epochs=100, batch_size=8, verbose=1, callbacks=[early_stop])

  pred = model.predict(test_X)

  all_pred = all_pred + pred.tolist()

---------------------------------------------
1
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 25: early stopping
---------------------------------------------
2
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 17: early stopping
---------------------------------------------
3
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
E

In [None]:
# 10080
len(all_pred)

10080

In [None]:
predict_1 = sum(all_pred, [])

In [None]:
sub['answer'] = predict_1

In [None]:
sub.to_csv('/content/gdrive/MyDrive/개인 공부/dacon/dacon_energy/GRU_baseline_submission2.csv', index=False)

# BI-RNN

In [None]:
#Library Imports
import numpy as np
import pandas as pd
import math
import os
import matplotlib.pyplot as plt

from sklearn.metrics import mean_absolute_error

#######딥러닝 라이브러리##########
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Reshape, GRU, RNN
from tensorflow.keras.layers import Dense, Embedding, Bidirectional, LSTM, Concatenate, Dropout, SimpleRNN
import keras.backend as K 
from tensorflow.keras import Input, Model
from keras.layers import Dense, Activation, Flatten
from keras.callbacks import EarlyStopping
tf.keras.backend.set_floatx('float64')

In [None]:
all_pred = []
for i in range(1,61):
  print("---------------------------------------------")
  print(i)
  All_1 = All[All['num'] == i].reset_index(drop = True)

  data_y = []
  data_X = []

  X = All_1.drop(['power(kWh)','num'], axis = 1)
  y = All_1['power(kWh)']

  for j in range(len(X)-5):
    X_data = X.iloc[0+j:5+j].values.tolist()
    data_X.append(X_data)

  for w in range(len(y)-5):
    y_data = y.loc[5+w]
    data_y.append(y_data)

  train_X = data_X[:2035]
  train_y = data_y[:2035]

  test_X = data_X[2035:]
  test_y = data_y[2035:]

  K.clear_session()
  visible = Input(shape=(5,12))
  forward_layer = SimpleRNN(10, activation='linear', return_sequences=True)
  backward_layer = SimpleRNN(10, activation='linear', return_sequences=True,
                        go_backwards=True)
  hidden1 = Bidirectional(forward_layer, backward_layer=backward_layer)(visible)
  hidden2 = Flatten()(hidden1)
  pred = Dense(1,activation='linear')(hidden2)

  model = Model(inputs=visible, outputs=pred)

  model.compile(loss='mse', optimizer='adam')

  early_stop = EarlyStopping(monitor='loss', patience=3, verbose=1)

  model.fit(train_X, train_y, epochs=100, batch_size=8, verbose=1, callbacks=[early_stop])

  pred = model.predict(test_X)

  all_pred = all_pred + pred.tolist()

---------------------------------------------
1
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 26: early stopping
---------------------------------------------
2
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 24: early stopping
---------------------------------------------
3
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
E

In [None]:
# 10080
len(all_pred)

10080

In [None]:
predict_1 = sum(all_pred, [])

In [None]:
sub['answer'] = predict_1

In [None]:
sub.to_csv('/content/gdrive/MyDrive/개인 공부/dacon/dacon_energy/RNN_baseline_submission2.csv', index=False)