# **シンプルなブレンディングを試してみる**

### **必要な関数・ライブラリ**

In [44]:
# データ加工・処理・分析モジュール
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
import os
import random

%matplotlib inline

import time

In [2]:
def set_time(dataframe, col_name):
    '''
    to_datetimeを使うための前処理
    '''
    dataframe[col_name] = dataframe[col_name].map(lambda x : transform_time(x))
    return dataframe

In [3]:
def transform_time(x):
    '''
    set_time内で使う関数
    to_datetimeで24時をサポートしないので00に変更する処理
    '''
    str_x = str(x)
    res = ''
    if str(x)[8:10] == '24':
        res = str_x[0:4] + '-' + str_x[4:6] + '-' + str_x[6:8] + ' 00:'+str_x[10:12] 
    else:
        res = str_x[0:4] + '-' + str_x[4:6] + '-' + str_x[6:8] + ' '+ str_x[8:10] +':'+str_x[10:12]
    return res

In [37]:
def drop_nan(X, Y):
    '''
    正解データがnanであるデータの組を削除
    '''
    mask = np.isnan(Y)
    X = X[~mask]
    Y = Y[~mask]
    return X, Y

### **浮島発電所について**

In [5]:
target_place = 1

In [11]:
# 発電量データ
output_30 = pd.read_csv('data/processed_data/out_put.tsv', delimiter = '\t')
output_30['datetime'] = output_30['datetime'].map(lambda x : pd.to_datetime(x))
output_30 = output_30[['datetime', 'SOLA0'+str(target_place)]]

In [60]:
# いろんなモデルの予測値の集合(学習用)
train_prediction = pd.read_csv("data/predicted_data/predict_train_SOLA0"+str(target_place)+".tsv", delimiter="\t")
train_prediction['datetime'] = train_prediction['datetime'].map(lambda x : pd.to_datetime(x))
train_prediction.head()

Unnamed: 0,datetime,targetplace_1_kwh_yokohama_one_layer_1000,targetplace_1_kwh_yokohama_nagoya_one_layer_1000,targetplace_1_kwh_yokohama_nagoya_one_layer_2000,targetplace_1_kwh_yokohama_nagoya_hamamatsu_one_layer_2000,targetplace_1_kwh_yokohama_nagoya_hamamatsu_one_layer_3000,targetplace_1_kwh_yokohama_nagoya_hamamatsu_one_layer_4000,targetplace_1_kwh_yokohama_nagoya_hamamatsu_one_layer_5000,targetplace_1_kwh_yokohama_nagoya_hamamatsu_osaka_one_layer_2000,targetplace_1_kwh_yokohama_nagoya_hamamatsu_osaka_one_layer_3000,targetplace_1_kwh_yokohama_nagoya_hamamatsu_osaka_one_layer_4000,targetplace_1_kwh_yokohama_nagoya_hamamatsu_osaka_hannou_one_layer_2000,targetplace_1_kwh_yokohama_nagoya_hamamatsu_osaka_hannou_one_layer_3000,targetplace_1_kwh_yokohama_nagoya_hamamatsu_osaka_hannou_one_layer_4000,targetplace_1_kwh_yokohama_nagoya_hamamatsu_osaka_hannou_one_layer_5000
0,2012-01-02 00:00:00,3.105392,0.584295,0.366131,0.507623,0.526732,0.435298,0.42589,0.282983,0.494249,0.431781,0.870295,0.619958,0.655971,0.210968
1,2012-01-02 00:30:00,3.13196,0.618539,0.508954,0.36,0.351915,0.446544,0.478751,0.468173,0.552132,0.605183,0.762069,0.471676,0.808759,0.426646
2,2012-01-02 01:00:00,3.085109,0.429691,0.251736,0.580159,0.575437,0.674362,0.445777,0.457296,0.758317,0.560383,0.969751,0.901004,0.940726,0.510415
3,2012-01-02 01:30:00,2.128919,0.458611,0.525926,0.247995,0.601296,0.574019,0.248612,0.487184,0.715846,0.616522,1.034894,0.884828,0.795637,0.541487
4,2012-01-02 02:00:00,3.173604,0.618225,0.500653,0.613033,0.419641,0.688143,0.491308,0.454776,0.709843,0.561034,0.821815,0.886088,0.67307,0.180117


In [61]:
# いろんなモデルの予測値の集合(ほんちゃん)
test_prediction = pd.read_csv("data/predicted_data/predict_SOLA0"+str(target_place)+".tsv", delimiter="\t")
test_prediction['datetime'] = test_prediction['datetime'].map(lambda x : pd.to_datetime(x))
test_prediction.head()

Unnamed: 0,datetime,targetplace_1_kwh_yokohama_one_layer_1000,targetplace_1_kwh_yokohama_nagoya_one_layer_1000,targetplace_1_kwh_yokohama_nagoya_one_layer_2000,targetplace_1_kwh_yokohama_nagoya_hamamatsu_one_layer_2000,targetplace_1_kwh_yokohama_nagoya_hamamatsu_one_layer_3000,targetplace_1_kwh_yokohama_nagoya_hamamatsu_one_layer_4000,targetplace_1_kwh_yokohama_nagoya_hamamatsu_one_layer_5000,targetplace_1_kwh_yokohama_nagoya_hamamatsu_osaka_one_layer_2000,targetplace_1_kwh_yokohama_nagoya_hamamatsu_osaka_one_layer_3000,targetplace_1_kwh_yokohama_nagoya_hamamatsu_osaka_one_layer_4000,targetplace_1_kwh_yokohama_nagoya_hamamatsu_osaka_hannou_one_layer_2000,targetplace_1_kwh_yokohama_nagoya_hamamatsu_osaka_hannou_one_layer_3000,targetplace_1_kwh_yokohama_nagoya_hamamatsu_osaka_hannou_one_layer_4000,targetplace_1_kwh_yokohama_nagoya_hamamatsu_osaka_hannou_one_layer_5000
0,2016-01-01 00:00:00,3.102139,0.616103,0.290069,0.483874,0.535722,0.472097,0.452454,0.30998,0.406369,0.410427,0.90789,0.560007,0.632275,0.242667
1,2016-01-01 00:30:00,3.135696,0.662252,0.430663,0.33326,0.358028,0.503266,0.519345,0.517439,0.451588,0.571756,0.77449,0.432968,0.772846,0.484909
2,2016-01-01 01:00:00,3.135677,0.4542,0.193504,0.556736,0.578685,0.746731,0.460922,0.50444,0.648091,0.522546,0.997072,0.832968,0.917066,0.56973
3,2016-01-01 01:30:00,2.133753,0.497315,0.434302,0.23198,0.61062,0.647444,0.254992,0.546575,0.615026,0.575646,1.073786,0.809754,0.735941,0.605107
4,2016-01-01 02:00:00,3.182955,0.658555,0.413559,0.576666,0.424812,0.780053,0.539888,0.496472,0.623379,0.535032,0.853515,0.793363,0.632229,0.201132


In [51]:
t_s_datetime = train_prediction['datetime'][0]
t_e_datetime = train_prediction['datetime'][len(train_prediction)-1]
train_y_s_idx = output_30[output_30['datetime'] == t_s_datetime].index[0]
train_y_e_idx = output_30[output_30['datetime'] == t_e_datetime].index[0]

X = np.array(train_prediction.drop(["datetime"], axis=1))
Y = output_30["SOLA0"+str(target_place)][train_y_s_idx:train_y_e_idx+1]
X_test = np.array(test_prediction.drop(["datetime"], axis=1))

In [41]:
X, Y = drop_nan(X, Y)

In [42]:
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=int((X.shape[0] * 0.3)))

In [48]:
# ランダムフォレストリグレッサー
rf = RandomForestRegressor()
rf.fit(X_train, Y_train)
Y_train_pred = rf.predict(X_train)
Y_val_pred = rf.predict(X_val)
print("train_mae : ", np.abs(Y_train_pred - Y_train).mean())
print("val_mae : ", np.abs(Y_val_pred - Y_val).mean())

train_mae :  39.7222256197
test_mae :  99.7048183029


In [52]:
Y_test_pred = rf.predict(X_test)

In [62]:
s_idx = test_prediction[test_prediction['datetime'] == pd.to_datetime('2016/01/01 00:00')].index[0]
e_idx = test_prediction[test_prediction['datetime'] == pd.to_datetime('2017/3/31 23:30')].index[0]
predict_data = pd.DataFrame({"datetime":test_prediction['datetime'][s_idx:e_idx+1]})
predict_data.index = np.arange(len(predict_data))

In [64]:
# 2016/01/01 00:00 ~ 2017/3/31 23:50の予測データを書き出す
predict_data["blending_"+str(target_place)] = Y_test_pred

### **扇島発電所について**

In [95]:
target_place = 2

In [96]:
# 発電量データ
output_30 = pd.read_csv('data/processed_data/out_put.tsv', delimiter = '\t')
output_30['datetime'] = output_30['datetime'].map(lambda x : pd.to_datetime(x))
output_30 = output_30[['datetime', 'SOLA0'+str(target_place)]]

In [97]:
# いろんなモデルの予測値の集合(学習用)
train_prediction = pd.read_csv("data/predicted_data/predict_train_SOLA0"+str(target_place)+".tsv", delimiter="\t")
train_prediction['datetime'] = train_prediction['datetime'].map(lambda x : pd.to_datetime(x))
train_prediction.head()

Unnamed: 0,datetime,targetplace_2_kwh_yokohama_one_layer_1000,targetplace_2_kwh_yokohama_nagoya_one_layer_1000,targetplace_2_kwh_yokohama_nagoya_one_layer_2000,targetplace_2_kwh_yokohama_nagoya_one_layer_3000,targetplace_2_kwh_yokohama_nagoya_one_layer_4000,targetplace_2_kwh_yokohama_nagoya_hamamatsu_one_layer_2000,targetplace_2_kwh_yokohama_nagoya_hamamatsu_one_layer_3000,targetplace_2_kwh_yokohama_nagoya_hamamatsu_one_layer_4000,targetplace_2_kwh_yokohama_nagoya_hamamatsu_one_layer_5000,targetplace_2_kwh_yokohama_nagoya_hamamatsu_osaka_one_layer_2000,targetplace_2_kwh_yokohama_nagoya_hamamatsu_osaka_one_layer_3000,targetplace_2_kwh_yokohama_nagoya_hamamatsu_osaka_one_layer_4000,targetplace_2_kwh_yokohama_nagoya_hamamatsu_osaka_hannou_one_layer_2000,targetplace_2_kwh_yokohama_nagoya_hamamatsu_osaka_hannou_one_layer_3000,targetplace_2_kwh_yokohama_nagoya_hamamatsu_osaka_hannou_one_layer_4000,targetplace_2_kwh_yokohama_nagoya_hamamatsu_osaka_hannou_one_layer_5000
0,2012-01-02 00:00:00,4.312512,1.350127,1.846533,2.056616,4.138687,0.709933,1.152182,0.965924,0.330774,1.427932,0.61941,1.041507,0.477801,1.367784,1.62929,0.628233
1,2012-01-02 00:30:00,4.220214,1.468282,1.934622,3.296697,4.504558,2.015995,1.389139,0.944138,0.257641,1.450741,0.945623,1.116966,0.748659,1.420782,1.43725,0.704795
2,2012-01-02 01:00:00,3.452633,0.717685,1.14563,3.536475,3.663627,1.709559,1.401737,0.758929,0.691827,1.271705,0.878071,1.077916,1.013752,0.992279,1.143461,0.766413
3,2012-01-02 01:30:00,4.541975,1.357778,1.724712,3.772834,2.691621,0.950184,1.323959,0.796854,0.47731,1.422236,0.902087,1.067585,0.537741,1.337654,0.668815,1.040341
4,2012-01-02 02:00:00,4.84034,1.140907,1.766148,3.062171,3.79795,1.980675,1.345151,0.958233,0.623899,1.105628,0.538818,0.855381,1.123988,1.366375,1.305309,1.153907


In [98]:
# いろんなモデルの予測値の集合(ほんちゃん)
test_prediction = pd.read_csv("data/predicted_data/predict_SOLA0"+str(target_place)+".tsv", delimiter="\t")
test_prediction['datetime'] = test_prediction['datetime'].map(lambda x : pd.to_datetime(x))
test_prediction.head()

Unnamed: 0,datetime,targetplace_2_kwh_yokohama_one_layer_1000,targetplace_2_kwh_yokohama_nagoya_one_layer_1000,targetplace_2_kwh_yokohama_nagoya_one_layer_2000,targetplace_2_kwh_yokohama_nagoya_one_layer_3000,targetplace_2_kwh_yokohama_nagoya_one_layer_4000,targetplace_2_kwh_yokohama_nagoya_hamamatsu_one_layer_2000,targetplace_2_kwh_yokohama_nagoya_hamamatsu_one_layer_3000,targetplace_2_kwh_yokohama_nagoya_hamamatsu_one_layer_4000,targetplace_2_kwh_yokohama_nagoya_hamamatsu_one_layer_5000,targetplace_2_kwh_yokohama_nagoya_hamamatsu_osaka_one_layer_2000,targetplace_2_kwh_yokohama_nagoya_hamamatsu_osaka_one_layer_4000,targetplace_2_kwh_yokohama_nagoya_hamamatsu_osaka_hannou_one_layer_2000,targetplace_2_kwh_yokohama_nagoya_hamamatsu_osaka_hannou_one_layer_3000,targetplace_2_kwh_yokohama_nagoya_hamamatsu_osaka_hannou_one_layer_4000,targetplace_2_kwh_yokohama_nagoya_hamamatsu_osaka_hannou_one_layer_5000
0,2016-01-01 00:00:00,3.160053,1.493951,1.410255,2.013175,2.504576,0.605936,1.068827,0.864294,0.319165,1.541367,1.13697,0.487081,1.389242,1.408321,0.623524
1,2016-01-01 00:30:00,3.126663,1.604819,1.469316,3.272986,2.374758,1.629168,1.245419,0.883028,0.253979,1.503825,1.217671,0.761375,1.388189,1.286681,0.701457
2,2016-01-01 01:00:00,2.770603,0.79734,0.902198,3.369431,2.327004,1.334273,1.202851,0.680596,0.68422,1.395249,1.189887,0.999203,0.998005,0.982205,0.773481
3,2016-01-01 01:30:00,3.617577,1.482394,1.339579,3.653701,1.720477,0.812151,1.157099,0.725437,0.453027,1.501916,1.172815,0.544221,1.325145,0.606077,1.019055
4,2016-01-01 02:00:00,3.764788,1.281863,1.294433,2.87315,2.300791,1.658397,1.16131,0.887921,0.621787,1.193376,0.926496,1.135794,1.362948,1.170286,1.119554


In [99]:
train_prediction.columns

Index(['datetime', 'targetplace_2_kwh_yokohama_one_layer_1000',
       'targetplace_2_kwh_yokohama_nagoya_one_layer_1000',
       'targetplace_2_kwh_yokohama_nagoya_one_layer_2000',
       'targetplace_2_kwh_yokohama_nagoya_one_layer_3000',
       'targetplace_2_kwh_yokohama_nagoya_one_layer_4000',
       'targetplace_2_kwh_yokohama_nagoya_hamamatsu_one_layer_2000',
       'targetplace_2_kwh_yokohama_nagoya_hamamatsu_one_layer_3000',
       'targetplace_2_kwh_yokohama_nagoya_hamamatsu_one_layer_4000',
       'targetplace_2_kwh_yokohama_nagoya_hamamatsu_one_layer_5000',
       'targetplace_2_kwh_yokohama_nagoya_hamamatsu_osaka_one_layer_2000',
       'targetplace_2_kwh_yokohama_nagoya_hamamatsu_osaka_one_layer_3000',
       'targetplace_2_kwh_yokohama_nagoya_hamamatsu_osaka_one_layer_4000',
       'targetplace_2_kwh_yokohama_nagoya_hamamatsu_osaka_hannou_one_layer_2000',
       'targetplace_2_kwh_yokohama_nagoya_hamamatsu_osaka_hannou_one_layer_3000',
       'targetplace_2_kwh_yokohama_

In [100]:
test_prediction.columns

Index(['datetime', 'targetplace_2_kwh_yokohama_one_layer_1000',
       'targetplace_2_kwh_yokohama_nagoya_one_layer_1000',
       'targetplace_2_kwh_yokohama_nagoya_one_layer_2000',
       'targetplace_2_kwh_yokohama_nagoya_one_layer_3000',
       'targetplace_2_kwh_yokohama_nagoya_one_layer_4000',
       'targetplace_2_kwh_yokohama_nagoya_hamamatsu_one_layer_2000',
       'targetplace_2_kwh_yokohama_nagoya_hamamatsu_one_layer_3000',
       'targetplace_2_kwh_yokohama_nagoya_hamamatsu_one_layer_4000',
       'targetplace_2_kwh_yokohama_nagoya_hamamatsu_one_layer_5000',
       'targetplace_2_kwh_yokohama_nagoya_hamamatsu_osaka_one_layer_2000',
       'targetplace_2_kwh_yokohama_nagoya_hamamatsu_osaka_one_layer_4000',
       'targetplace_2_kwh_yokohama_nagoya_hamamatsu_osaka_hannou_one_layer_2000',
       'targetplace_2_kwh_yokohama_nagoya_hamamatsu_osaka_hannou_one_layer_3000',
       'targetplace_2_kwh_yokohama_nagoya_hamamatsu_osaka_hannou_one_layer_4000',
       'targetplace_2_kwh_yo

In [91]:
t_s_datetime = train_prediction['datetime'][0]
t_e_datetime = train_prediction['datetime'][len(train_prediction)-1]
train_y_s_idx = output_30[output_30['datetime'] == t_s_datetime].index[0]
train_y_e_idx = output_30[output_30['datetime'] == t_e_datetime].index[0]

X = np.array(train_prediction.drop(["datetime"], axis=1))
Y = output_30["SOLA0"+str(target_place)][train_y_s_idx:train_y_e_idx+1]
X_test = np.array(test_prediction.drop(["datetime"], axis=1))

In [92]:
X, Y = drop_nan(X, Y)

In [93]:
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=int((X.shape[0] * 0.3)))

In [94]:
# ランダムフォレストリグレッサー
rf = RandomForestRegressor()
rf.fit(X_train, Y_train)
Y_train_pred = rf.predict(X_train)
Y_val_pred = rf.predict(X_val)
print("train_mae : ", np.abs(Y_train_pred - Y_train).mean())
print("val_mae : ", np.abs(Y_val_pred - Y_val).mean())

train_mae :  48.2200689935
val_mae :  118.279277146


In [74]:
Y_test_pred = rf.predict(X_test)

ValueError: Number of features of the model must match the input. Model n_features is 16 and input n_features is 15 

In [62]:
s_idx = test_prediction[test_prediction['datetime'] == pd.to_datetime('2016/01/01 00:00')].index[0]
e_idx = test_prediction[test_prediction['datetime'] == pd.to_datetime('2017/3/31 23:30')].index[0]
predict_data = pd.DataFrame({"datetime":test_prediction['datetime'][s_idx:e_idx+1]})
predict_data.index = np.arange(len(predict_data))

In [64]:
# 2016/01/01 00:00 ~ 2017/3/31 23:50の予測データを書き出す
predict_data["blending_"+str(target_place)] = Y_test_pred

### **米倉山発電所について**

In [75]:
target_place = 3

In [76]:
# 発電量データ
output_30 = pd.read_csv('data/processed_data/out_put.tsv', delimiter = '\t')
output_30['datetime'] = output_30['datetime'].map(lambda x : pd.to_datetime(x))
output_30 = output_30[['datetime', 'SOLA0'+str(target_place)]]

In [77]:
# いろんなモデルの予測値の集合(学習用)
train_prediction = pd.read_csv("data/predicted_data/predict_train_SOLA0"+str(target_place)+".tsv", delimiter="\t")
train_prediction['datetime'] = train_prediction['datetime'].map(lambda x : pd.to_datetime(x))
train_prediction.head()

Unnamed: 0,datetime,targetplace_3_kwh_kouhu_one_layer_1000,targetplace_3_kwh_kouhu_nagoya_one_layer_1000,targetplace_3_kwh_kouhu_nagoya_one_layer_2000,targetplace_3_kwh_kouhu_nagoya_one_layer_3000,targetplace_3_kwh_kouhu_nagoya_hamamatsu_one_layer_2000,targetplace_3_kwh_kouhu_nagoya_hamamatsu_one_layer_3000,targetplace_3_kwh_kouhu_nagoya_hamamatsu_one_layer_4000,targetplace_3_kwh_kouhu_nagoya_hamamatsu_one_layer_5000,targetplace_3_kwh_kouhu_nagoya_hamamatsu_osaka_one_layer_3000,targetplace_3_kwh_kouhu_nagoya_hamamatsu_osaka_one_layer_4000,targetplace_3_kwh_kouhu_nagoya_hamamatsu_osaka_one_layer_5000,targetplace_3_kwh_kouhu_nagoya_hamamatsu_osaka_hannou_one_layer_2000,targetplace_3_kwh_kouhu_nagoya_hamamatsu_osaka_hannou_one_layer_3000,targetplace_3_kwh_kouhu_nagoya_hamamatsu_osaka_hannou_one_layer_4000,targetplace_3_kwh_kouhu_nagoya_hamamatsu_osaka_hannou_one_layer_5000
0,2012-01-02 00:00:00,2.308487,0.559252,1.127626,1.683913,0.642713,1.078388,0.618694,0.337617,0.800982,0.906205,0.696676,0.71265,0.607263,1.389687,0.421978
1,2012-01-02 00:30:00,4.94489,0.55134,1.00129,0.795333,0.660754,0.877083,0.9168,0.934619,0.922812,0.897853,0.798927,0.767619,0.666228,0.525429,0.701459
2,2012-01-02 01:00:00,2.790131,0.676289,1.0264,1.528497,0.480383,1.064998,1.232166,0.958891,0.693139,0.854016,0.52334,0.7276,0.656375,1.331395,0.821308
3,2012-01-02 01:30:00,3.450849,0.548826,1.068072,1.470731,0.446712,1.11794,1.058437,0.857397,0.886272,0.894049,0.79723,0.61614,0.563783,1.362975,0.395647
4,2012-01-02 02:00:00,3.413586,0.385758,1.034849,1.722281,0.361472,1.099302,1.225836,0.932935,0.450891,0.929036,0.745994,0.699741,1.056772,0.970795,0.812383


In [78]:
# いろんなモデルの予測値の集合(ほんちゃん)
test_prediction = pd.read_csv("data/predicted_data/predict_SOLA0"+str(target_place)+".tsv", delimiter="\t")
test_prediction['datetime'] = test_prediction['datetime'].map(lambda x : pd.to_datetime(x))
test_prediction.head()

Unnamed: 0,datetime,targetplace_3_kwh_kouhu_one_layer_1000,targetplace_3_kwh_kouhu_nagoya_one_layer_1000,targetplace_3_kwh_kouhu_nagoya_one_layer_2000,targetplace_3_kwh_kouhu_nagoya_one_layer_3000,targetplace_3_kwh_kouhu_nagoya_hamamatsu_one_layer_2000,targetplace_3_kwh_kouhu_nagoya_hamamatsu_one_layer_3000,targetplace_3_kwh_kouhu_nagoya_hamamatsu_one_layer_4000,targetplace_3_kwh_kouhu_nagoya_hamamatsu_one_layer_5000,targetplace_3_kwh_kouhu_nagoya_hamamatsu_osaka_one_layer_3000,targetplace_3_kwh_kouhu_nagoya_hamamatsu_osaka_one_layer_4000,targetplace_3_kwh_kouhu_nagoya_hamamatsu_osaka_one_layer_5000,targetplace_3_kwh_kouhu_nagoya_hamamatsu_osaka_hannou_one_layer_2000,targetplace_3_kwh_kouhu_nagoya_hamamatsu_osaka_hannou_one_layer_3000,targetplace_3_kwh_kouhu_nagoya_hamamatsu_osaka_hannou_one_layer_4000,targetplace_3_kwh_kouhu_nagoya_hamamatsu_osaka_hannou_one_layer_5000
0,2016-01-01 00:00:00,2.71169,0.501146,1.003754,1.598509,0.684491,1.096679,0.815955,0.340564,0.991975,0.868828,0.825216,0.761345,0.6251,1.534002,0.42791
1,2016-01-01 00:30:00,6.042439,0.479045,0.894748,0.788822,0.742885,0.871565,1.220302,0.902435,1.087348,0.863249,0.90538,0.833394,0.687537,0.577708,0.712118
2,2016-01-01 01:00:00,3.3435,0.601717,0.9262,1.48235,0.511361,1.079379,1.709084,0.933322,0.866358,0.829707,0.589989,0.78658,0.679824,1.504481,0.834292
3,2016-01-01 01:30:00,4.078022,0.491593,0.944126,1.43635,0.500788,1.129413,1.406878,0.82807,1.054116,0.865883,0.917643,0.66113,0.583295,1.572956,0.401139
4,2016-01-01 02:00:00,4.038239,0.313213,0.92977,1.670874,0.395283,1.074867,1.669039,0.901087,0.514698,0.89551,0.866784,0.757641,1.091079,1.067387,0.826559


In [79]:
t_s_datetime = train_prediction['datetime'][0]
t_e_datetime = train_prediction['datetime'][len(train_prediction)-1]
train_y_s_idx = output_30[output_30['datetime'] == t_s_datetime].index[0]
train_y_e_idx = output_30[output_30['datetime'] == t_e_datetime].index[0]

X = np.array(train_prediction.drop(["datetime"], axis=1))
Y = output_30["SOLA0"+str(target_place)][train_y_s_idx:train_y_e_idx+1]
X_test = np.array(test_prediction.drop(["datetime"], axis=1))

In [80]:
X, Y = drop_nan(X, Y)

In [81]:
X_train, X_val, Y_train, Y_val = train_test_split(X, Y, test_size=int((X.shape[0] * 0.3)))

In [82]:
# ランダムフォレストリグレッサー
rf = RandomForestRegressor()
rf.fit(X_train, Y_train)
Y_train_pred = rf.predict(X_train)
Y_val_pred = rf.predict(X_val)
print("train_mae : ", np.abs(Y_train_pred - Y_train).mean())
print("val_mae : ", np.abs(Y_val_pred - Y_val).mean())

train_mae :  47.6942978896
val_mae :  118.310448232


In [83]:
Y_test_pred = rf.predict(X_test)

In [85]:
# 2016/01/01 00:00 ~ 2017/3/31 23:50の予測データを書き出す
predict_data["blending_"+str(target_place)] = Y_test_pred

In [86]:
predict_data

Unnamed: 0,datetime,blending_3
0,2016-01-01 00:00:00,0.0
1,2016-01-01 00:30:00,0.0
2,2016-01-01 01:00:00,0.0
3,2016-01-01 01:30:00,0.0
4,2016-01-01 02:00:00,0.0
5,2016-01-01 02:30:00,0.0
6,2016-01-01 03:00:00,0.0
7,2016-01-01 03:30:00,0.0
8,2016-01-01 04:00:00,0.0
9,2016-01-01 04:30:00,0.0
