# **各時点の気象情報で発電量が予測できるかの検証**

各時点の気象情報の予測を行なってからその予測に基づいて書く時点の発電量を予測するというモデルを組みたい

モデルを組む前に書く時点の気象情報の予測力を検証する

### **必要な関数・モジュール**

In [3]:
# データ加工・処理・分析モジュール
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

%matplotlib inline

In [5]:
def set_time(dataframe, col_name):
    '''
    to_datetimeを使うための前処理
    '''
    dataframe[col_name] = dataframe[col_name].map(lambda x : transform_time(x))
    return dataframe

In [6]:
def transform_time(x):
    '''
    set_time内で使う関数
    to_datetimeで24時をサポートしないので00に変更する処理
    '''
    str_x = str(x)
    res = ''
    if str(x)[8:10] == '24':
        res = str_x[0:4] + '-' + str_x[4:6] + '-' + str_x[6:8] + ' 00:'+str_x[10:12] 
    else:
        res = str_x[0:4] + '-' + str_x[4:6] + '-' + str_x[6:8] + ' '+ str_x[8:10] +':'+str_x[10:12]
    return res

### **データの準備**

In [7]:
# 10分単位の発電量のデータを取ってくる
output_data = pd.read_csv('data/raw_data/train_kwh.tsv', delimiter = '\t')

# datetimeの行をpd.Timestampのインスタンスに変更
output_data = set_time(output_data, 'datetime')
output_data['datetime'] = output_data['datetime'].map(lambda x : pd.to_datetime(x))
    
output_data.head()

Unnamed: 0,datetime,SOLA01,SOLA02,SOLA03
0,2012-01-01 00:10:00,0,0.0,
1,2012-01-01 00:20:00,0,0.0,
2,2012-01-01 00:30:00,0,0.0,
3,2012-01-01 00:40:00,0,0.0,
4,2012-01-01 00:50:00,0,0.0,


In [9]:
# 予測する発電所番号を決める
target_place = 2

In [11]:
# アメダスデータの読み込み

if target_place == 1 or target_place == 2:
    # 横浜アメダスのデータを使って予測する, amd_46106
    # 各amdidはamd_masterに記載されている
    amd_data = pd.read_csv('data/raw_data/amd_46106.tsv', delimiter = '\t')
    amd_data = set_time(amd_data, 'datetime')
    amd_data['datetime'] = amd_data['datetime'].map(lambda x : pd.to_datetime(x))
elif target_place == 3:
    # 甲府アメダスのデータを使って予測する, amd_49142
    # 各amdidはamd_masterに記載されている
    amd_data = pd.read_csv('data/raw_data/amd_49142.tsv', delimiter = '\t')
    amd_data = set_time(amd_data, 'datetime')
    amd_data['datetime'] = amd_data['datetime'].map(lambda x : pd.to_datetime(x))
else:
    raise ValueError("invalid input target_place_num")
    
amd_data.head()

Unnamed: 0,datetime,pr,f_pr,max_ws,f_max_ws,ave_wv,f_ave_wv,ave_ws,f_ave_ws,max_tp,f_max_tp,min_tp,f_min_tp,sl,f_sl,sd,f_sd,dsd,f_dsd
0,2012-01-01 00:10:00,0,0,48.0,0,36.0,0,32.0,0,57.0,0,56.0,0,0.0,2,0.0,0,0.0,0
1,2012-01-01 00:20:00,0,0,48.0,0,36.0,0,35.0,0,56.0,0,55.0,0,0.0,2,0.0,0,0.0,0
2,2012-01-01 00:30:00,0,0,49.0,0,36.0,0,32.0,0,56.0,0,55.0,0,0.0,2,0.0,0,0.0,0
3,2012-01-01 00:40:00,0,0,49.0,0,35.0,0,35.0,0,56.0,0,54.0,0,0.0,2,0.0,0,0.0,0
4,2012-01-01 00:50:00,0,0,61.0,0,35.0,0,34.0,0,55.0,0,53.0,0,0.0,2,0.0,0,0.0,0


In [59]:
input_data = amd_data[['datetime', 'pr', 'max_ws', 'ave_wv', 'ave_ws', 'max_tp', 'min_tp', 'sl', 'sd', 'dsd']]

In [67]:
# SettingWithCopyWarningで予期せぬ結果を得ないようにassignを使う
input_data.assign(
    hour = input_data['datetime'].map(lambda x : int(x.hour))
)

input_data.assign(
    minute = input_data['datetime'].map(lambda x : x)
)

input_data.head()

Unnamed: 0,datetime,pr,max_ws,ave_wv,ave_ws,max_tp,min_tp,sl,sd,dsd
0,2012-01-01 00:10:00,0,48.0,36.0,32.0,57.0,56.0,0.0,0.0,0.0
1,2012-01-01 00:20:00,0,48.0,36.0,35.0,56.0,55.0,0.0,0.0,0.0
2,2012-01-01 00:30:00,0,49.0,36.0,32.0,56.0,55.0,0.0,0.0,0.0
3,2012-01-01 00:40:00,0,49.0,35.0,35.0,56.0,54.0,0.0,0.0,0.0
4,2012-01-01 00:50:00,0,61.0,35.0,34.0,55.0,53.0,0.0,0.0,0.0


In [63]:
input_data.assign(
    hour_cos = input_data['hour'].map(lambda x: np.cos(np.pi * x / 24)),
    hour_sin = input_data['hour'].map(lambda x: np.sin(np.pi * x / 24))
#    min_cos = input_data['minute'].map(lambda x: np.cos(np.pi * x / 60)),
 #   min_sin = input_data['minute'].map(lambda x: np.sin(np.pi * x / 60))
)

KeyError: 'hour'

In [47]:
input_data['hour'] = amd_data['datetime'].map(lambda x : int(x.hour))
# input_data['minute'] = amd_data['datetime'].map(lambda x : int(x.minute))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':


In [25]:
input_data['hour_cos'] = input_data['hour'].map(lambda x: np.cos(np.pi * x / 24))
input_data['hour_sin'] = input_data['hour'].map(lambda x: np.sin(np.pi * x / 24))

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  if __name__ == '__main__':
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app


In [33]:
input_data.head()

Unnamed: 0,datetime,pr,max_ws,ave_wv,ave_ws,max_tp,min_tp,sl,sd,dsd,hour,hour_cos,hour_sin,minute
0,2012-01-01 00:10:00,0,48.0,36.0,32.0,57.0,56.0,0.0,0.0,0.0,0,1.0,0.0,10
1,2012-01-01 00:20:00,0,48.0,36.0,35.0,56.0,55.0,0.0,0.0,0.0,0,1.0,0.0,20
2,2012-01-01 00:30:00,0,49.0,36.0,32.0,56.0,55.0,0.0,0.0,0.0,0,1.0,0.0,30
3,2012-01-01 00:40:00,0,49.0,35.0,35.0,56.0,54.0,0.0,0.0,0.0,0,1.0,0.0,40
4,2012-01-01 00:50:00,0,61.0,35.0,34.0,55.0,53.0,0.0,0.0,0.0,0,1.0,0.0,50
