In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from math import sqrt

# model itself
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from sklearn.metrics import mean_absolute_error, mean_squared_error

Using TensorFlow backend.


In [0]:
df = pd.read_csv('pvdaq_2012_2014_hourly.csv', header=0, infer_datetime_format=True, parse_dates=['Date-Time'], index_col=['Date-Time'])

In [0]:
df = pd.read_csv('pvdaq_2012_2014_hourly.csv')

In [3]:
df.dtypes

SiteID                 float64
ac_current             float64
ac_power               float64
ac_voltage             float64
ambient_temp           float64
dc_current             float64
dc_power               float64
dc_voltage             float64
inverter_error_code    float64
inverter_temp          float64
module_temp            float64
poa_irradiance         float64
power_factor           float64
relative_humidity      float64
wind_direction         float64
wind_speed             float64
dtype: object

In [0]:
# columns to use for forecasting
cols = ['ambient_temp', 'inverter_temp', 'module_temp', 'poa_irradiance', 
        'relative_humidity', 'wind_direction', 'wind_speed']
time_indexes = [df.index.hour, df.index.month]
# we will forecast dc power output
target = ['dc_power']

In [0]:
# array stacking
def create_sequence(df, cols, target):
  seqs = []
  for col in cols:
    seq = df[col].values.reshape((len(df[col]), 1))
    seqs.append(seq)
  for index in time_indexes:
    seq = index.values.reshape((len(df[col]), 1))
    seqs.append(seq)
  seq = df[target].values.reshape((len(df[target]), 1))
  for i in range(len(seq)):
    if seq[i] < 0:
      seq[i] = 0
  seqs.append(seq)
  dataset = np.hstack((seqs))  
  return dataset

dataset = (create_sequence(df, cols, target))

In [54]:
dataset.shape

(26304, 10)

In [0]:
# single step multivariate sequence
def split_sequence(sequence, n_steps):
  X, y = list(), list()
  for i in range(len(sequence)):
    end_ix = i + n_steps
    # check if we are not beyond range
    if end_ix > len(sequence)-1:
      break
    seq_x, seq_y = sequence[i:end_ix, :], sequence[end_ix ,-1]
    X.append(seq_x)
    y.append(seq_y)
  return np.array(X), np.array(y)

In [14]:
X, y = split_sequence(dataset, 4)
print(X. shape, y.shape)

(26300, 4, 10) (26300,)


In [36]:
print(df.head(1), y[11])

            SiteID  ac_current  ac_power  ac_voltage  ambient_temp  \
Date-Time                                                            
2012-01-01  1277.0         0.0    -150.0       285.5      4.437502   

            dc_current  dc_power  dc_voltage  inverter_error_code  \
Date-Time                                                           
2012-01-01        -1.0    -100.0        0.25                  0.0   

            inverter_temp  module_temp  poa_irradiance  power_factor  \
Date-Time                                                              
2012-01-01           16.5     0.208335             0.0           0.0   

            relative_humidity  wind_direction  wind_speed  
Date-Time                                                  
2012-01-01             44.628        306.7035    0.565841   [23400. 22800. 18025. 11475.  3850.   325.]


In [0]:
def split_sequence_multi(sequence, n_steps, n_steps_out):
  X, y = list(), list()
  for i in range(len(sequence)):
    end_ix = i + n_steps
    out_ix = end_ix + n_steps_out
    # boundary check
    if out_ix > len(sequence):
      break
    seq_x, seq_y = sequence[i:end_ix, :], sequence[end_ix:out_ix, -1]
    X.append(seq_x)
    y.append(seq_y)
  return np.array(X), np.array(y)
    

In [7]:
X, y = split_sequence_multi(dataset, 12, 6)
print(X.shape, y.shape)

(26287, 12, 10) (26287, 6)


In [8]:
X[0][0], y[0]

(array([4.43750176e+00, 1.65000000e+01, 2.08334750e-01, 0.00000000e+00,
        4.46280000e+01, 3.06703500e+02, 5.65840880e-01, 0.00000000e+00,
        1.00000000e+00, 0.00000000e+00]),
 array([22600., 18175., 13400.,  3400.,   125.,     0.]))

In [9]:
# n_steps is amount of time steps per sample
# n_steps_out is the amount of time steps model has to forecast
n_steps, n_steps_out = 24, 6
# number of features in each timestep
n_features=X.shape[2]
X, y = split_sequence_multi(dataset, n_steps, n_steps_out)
train_X, train_y = X[:-20,:], y[:-20,:]
test_X, test_y = X[-20:,:], y[-20:,:]

model = Sequential()
model.
model.add(LSTM(300, activation='relu', return_sequences=True, input_shape=(n_steps, n_features)))
model.add(LSTM(200, activation='relu'))
model.add(Dense(n_steps_out))
model.compile(optimizer='adam', loss='mse')

model.fit(train_X, train_y, epochs = 20)

predictions = model.predict(test_X)



Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [10]:
X.shape, y.shape

((26275, 24, 10), (26275, 6))

In [13]:
predictions1 = model.predict(train_X)
mse = mean_squared_error(train_y, predictions1)
rmse = sqrt(mse)
mae = mean_absolute_error(train_y, predictions1)
print(mse, rmse, mae)

14457035.265728503 3802.240821637749 2192.9482805505477


In [11]:
mse = mean_squared_error(test_y, predictions)
rmse = sqrt(mse)
mae = mean_absolute_error(test_y, predictions)
print(mse, rmse, mae)
for i in range(len(test_y)):
  print("prediction" + str(i))
  for j in range(n_steps_out):
    print(int(abs(test_y[i][j]-predictions[i][j])), int(test_y[i][j]), int(predictions[i][j]))


33705334.64578794 5805.6295649815565 3910.296704999606
prediction0
66 0 66
80 0 80
96 0 96
147 0 147
148 0 148
26 0 26
prediction1
238 0 -238
89 0 -89
79 0 -79
56 0 -56
155 0 -155
182 0 -182
prediction2
496 0 -496
14 0 -14
141 0 141
118 0 118
95 0 -95
182 0 -182
prediction3
340 0 -340
89 0 89
97 0 97
9 0 -9
268 0 -268
244 0 -244
prediction4
761 0 -761
397 0 397
735 0 735
590 0 590
47 0 47
296 150 -146
prediction5
734 0 734
1256 0 1256
1107 0 1107
798 0 798
157 150 307
1473 1700 226
prediction6
1462 0 1462
1799 0 1799
1655 0 1655
1658 150 1808
451 1700 2151
293 2850 3143
prediction7
598 0 598
1854 0 1854
3258 150 3408
3814 1700 5514
4944 2850 7794
6526 3800 10326
prediction8
2116 0 2116
3853 150 4003
4437 1700 6137
5753 2850 8603
6931 3800 10731
7135 5575 12710
prediction9
4027 150 4177
5366 1700 7066
6914 2850 9764
8506 3800 12306
8117 5575 13692
8992 5400 14392
prediction10
6436 1700 8136
8512 2850 11362
9677 3800 13477
9385 5575 14960
9424 5400 14824
9879 3975 13854
prediction11
1097