### **rnnを実装してみる**

## **必要な関数・ライブラリ等の準備**

In [31]:
# データ加工・処理・分析モジュール
import numpy as np
import pandas as pd

import tensorflow as tf

In [32]:
def set_time(dataframe, col_name):
    '''
    to_datetimeを使うための前処理
    '''
    dataframe[col_name] = dataframe[col_name].map(lambda x : transform_time(x))
    return dataframe

In [33]:
def transform_time(x):
    '''
    set_time内で使う関数
    to_datetimeで24時をサポートしないので00に変更する処理
    '''
    str_x = str(x)
    res = ''
    if str(x)[8:10] == '24':
        res = str_x[0:4] + '-' + str_x[4:6] + '-' + str_x[6:8] + ' 00:'+str_x[10:12] 
    else:
        res = str_x[0:4] + '-' + str_x[4:6] + '-' + str_x[6:8] + ' '+ str_x[8:10] +':'+str_x[10:12]
    return res

## **データの準備**

In [52]:
# データの読み込み
# 処理済みデータを読み込む。無かったら作る
try:
    output_data = pd.read_csv('data/processed_data/out_put.tsv', delimiter = '\t')
    
except:
    # train_kwhをエクセル等で開くとdatetimeが指数表示に直される可能性がある
    # その場合うまくいかないので201201010120の形になってることを確認する必要あり
    output_data = pd.read_csv('data/raw_data/train_kwh.tsv', delimiter = '\t')

    # datetimeの行をpd.Timestampのインスタンスに変更
    output_data = set_time(output_data, 'datetime')
    output_data['datetime'] = output_data['datetime'].map(lambda x : pd.to_datetime(x))

    # 30分ごとに合計を集計
    output_data = output_data.set_index('datetime').groupby(pd.TimeGrouper(freq='1800s', closed='left')).sum()

    output_data.to_csv('data/processed_data/out_put.tsv', sep='\t') 
    
output_data.head()

Unnamed: 0,datetime,SOLA01,SOLA02,SOLA03
0,2012-01-01 00:00:00,0,0.0,
1,2012-01-01 00:30:00,0,0.0,
2,2012-01-01 01:00:00,0,0.0,
3,2012-01-01 01:30:00,0,0.0,
4,2012-01-01 02:00:00,0,0.0,


In [54]:
# データの読み込み
# 処理済みデータを読み込む。無かったら作る
try:
    amd_data = pd.read_csv('data/processed_data/amd_data.tsv', delimiter='\t')

except:
    # amd_46061は浮島発電所に一番近い日吉アメダスのデータが入っている
    # 各amdidはamd_masterに記載されている
    amd_data = pd.read_csv('data/raw_data/amd_46061.tsv', delimiter = '\t')

    amd_data = set_time(amd_data, 'datetime')
    amd_data['datetime'] = amd_data['datetime'].map(lambda x : pd.to_datetime(x))

    # 30分ごとに平均を集計
    amd_data = amd_data.set_index('datetime').groupby(pd.TimeGrouper(freq='1800s', closed='left')).mean()

    amd_data.to_csv('data/processed_data/amd_data.tsv', sep='\t')

amd_data.head()

Unnamed: 0,datetime,pr,f_pr,max_ws,f_max_ws,ave_wv,f_ave_wv,ave_ws,f_ave_ws,max_tp,f_max_tp,min_tp,f_min_tp,sl,f_sl,sd,f_sd,dsd,f_dsd
0,2012-01-01 00:00:00,0.0,0.0,,56.0,,56.0,,56.0,,56.0,,56.0,,56.0,,56.0,,56.0
1,2012-01-01 00:30:00,0.0,0.0,,56.0,,56.0,,56.0,,56.0,,56.0,,56.0,,56.0,,56.0
2,2012-01-01 01:00:00,0.0,0.0,,56.0,,56.0,,56.0,,56.0,,56.0,,56.0,,56.0,,56.0
3,2012-01-01 01:30:00,0.0,0.0,,56.0,,56.0,,56.0,,56.0,,56.0,,56.0,,56.0,,56.0
4,2012-01-01 02:00:00,0.0,0.0,,56.0,,56.0,,56.0,,56.0,,56.0,,56.0,,56.0,,56.0
