In [1]:
from math import sqrt
from numpy import split
from numpy import array
from pandas import read_csv
from sklearn.metrics import mean_squared_error
from matplotlib import pyplot
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import LSTM
from keras.layers import RepeatVector , BatchNormalization ,MaxPooling3D
from keras.layers import TimeDistributed
from keras.layers import ConvLSTM2D
from keras.utils.vis_utils import plot_model
from tensorflow import TensorShape, Variable, int32

In [2]:
# split a univariate dataset into train/test sets
def split_dataset(data):
    # split into standard weeks
    train, test = data[1:-328], data[-328:-6]
    # restructure into windows of weekly data
    train = array(split(train, len(train)/7))
    test = array(split(test, len(test)/7))
    return train, test

In [3]:
def evaluate_forecasts(actual, predicted):
    scores = list()
    # calculate an RMSE score for each day
    for i in range(actual.shape[1]):
        # calculate mse
        mse = mean_squared_error(actual[:, i], predicted[:, i])
        # calculate rmse
        rmse = sqrt(mse)
        # store
        scores.append(rmse)
        # calculate overall RMSE
    s = 0
    for row in range(actual.shape[0]):
        for col in range(actual.shape[1]):
            s += (actual[row, col] - predicted[row, col])**2
    score = sqrt(s / (actual.shape[0] * actual.shape[1]))
    return score, scores

In [4]:
# summarize scores
def summarize_scores(name, score, scores):
    s_scores = ', '.join(['%.1f' % s for s in scores])
    print('%s: [%.3f] %s' % (name, score, s_scores))

In [5]:
# convert history into inputs and outputs
def to_supervised(train, n_input, n_out=7):
    # flatten data
    data = train.reshape((train.shape[0]*train.shape[1], train.shape[2]))
    X, y = list(), list()
    in_start = 0
    # step over the entire history one time step at a time
    for _ in range(len(data)):
        # define the end of the input sequence
        in_end = in_start + n_input
        out_end = in_end + n_out
        # ensure we have enough data for this instance
        if out_end < len(data):
            # x_input = data[in_start:in_end, 0]
            # x_input = x_input.reshape((len(x_input), 1))
            X.append(data[in_start:in_end, :])
            y.append(data[in_end:out_end, 0])
        # move along one time step
        in_start += 1
    return array(X), array(y)

In [9]:
# train the model
def build_model(train, n_steps, n_length, n_input):
    # prepare data
    train_x, train_y = to_supervised(train, n_input)
    # define parameters
    verbose, epochs, batch_size = 0, 20, 16
    n_features, n_outputs = train_x.shape[4], train_y.shape[1]
    # reshape into subsequences [samples, timesteps, rows, cols, channels]
    train_x = train_x.reshape((train_x.shape[0], n_steps, 1, n_length, n_features))
    # reshape output into [samples, timesteps, features]
    train_y = train_y.reshape((train_y.shape[0], train_y.shape[1], 1))
    
    
    # define model 1 
    model = Sequential()
    model.add(ConvLSTM2D(filters=64, kernel_size=(1,3), activation='relu',
    input_shape=(n_steps, 1, n_length, n_features)))
    model.add(Flatten())
    model.add(RepeatVector(n_outputs))
    model.add(LSTM(200, activation='relu', return_sequences=True))
    model.add(TimeDistributed(Dense(100, activation='relu')))
    model.add(TimeDistributed(Dense(1)))
    model.compile(loss='mse', optimizer='adam')

    # define model 2 
    # model = Sequential()
    # model.add(ConvLSTM2D(filters=64, kernel_size=(1,3), activation='tanh',input_shape=(n_steps, 1, n_length, n_features)))
    # model.add(BatchNormalization())
    # model.add(MaxPooling3D(pool_size=(1,2,2)))
    # # model.add(ConvLSTM2D(filters=32, kernel_size=(1,2), activation='tanh'))
    # # model.add(BatchNormalization())
    # model.add(Flatten())
    # model.add(RepeatVector(n_outputs))
    # model.add(LSTM(200, activation='relu', return_sequences=True))
    # model.add(TimeDistributed(Dense(100, activation='relu')))
    # model.add(TimeDistributed(Dense(1)))
    # model.compile(loss='mse', optimizer='adam')
    
    


    
    plot_model(model, to_file='model_ConvLSTM_Multivatiable_plot.png', show_shapes=True, show_layer_names=True)

    # fit network
    model.fit(train_x, train_y, epochs=epochs, batch_size=batch_size, verbose=verbose)
    return model

In [11]:
# make a forecast
def forecast(model, history, n_steps, n_length, n_input):
    # flatten data
    data = array(history)
    data = data.reshape((data.shape[0]*data.shape[1], data.shape[2]))
    # retrieve last observations for input data
    input_x = data[-n_input:, 0]
    # reshape into [samples, timesteps, rows, cols, channels]
    input_x = input_x.reshape (  Variable(TensorShape(None),dtype=int32) , Variable(TensorShape(None),dtype=int32) ,1, 7, 8) #(None, None, 1, 7, 8) instead of (1, n_steps, 1, n_length, 1)  -- (  Variable(TensorShape(None),dtype=int32)  , Variable(TensorShape(None),dtype=int32)   , 1, n_length, 1)
    # forecast the next week
    yhat = model.predict(input_x, verbose=0)
    # we only want the vector forecast
    yhat = yhat[0]
    return yhat

In [12]:
# evaluate a single model
def evaluate_model(train, test, n_steps, n_length, n_input):
    # fit model
    model = build_model(train, n_steps, n_length, n_input)
    model.summary()
    # history is a list of weekly data
    history = [x for x in train]
    # walk-forward validation over each week
    predictions = list()
    for i in range(len(test)):
        # predict the week
        yhat_sequence = forecast(model, history, n_steps, n_length, n_input)
        # store the predictions
        predictions.append(yhat_sequence)
        # get real observation and add to history for predicting the next week
        history.append(test[i, :])
    # evaluate predictions days for each week
    predictions = array(predictions)
    score, scores = evaluate_forecasts(test[:, :, 0], predictions)
    return score, scores

In [13]:
# load the new file
dataset = read_csv('household_power_consumption_days.csv', header=0,
infer_datetime_format=True, parse_dates=['datetime'], index_col=['datetime'])
# split into train and test
train, test = split_dataset(dataset.values)
# define the number of subsequences and the length of subsequences
n_steps, n_length = 2, 7
# define the total days to use as input
n_input = n_length * n_steps
score, scores = evaluate_model(train, test, n_steps, n_length, n_input)
# summarize scores
summarize_scores('lstm', score, scores)
# plot scores
days = ['sun', 'mon', 'tue', 'wed', 'thr', 'fri', 'sat']
pyplot.plot(days, scores, marker='o', label='lstm')
pyplot.show()

IndexError: tuple index out of range

In [14]:
train_x, train_y = to_supervised(train, n_input)
# define parameters
verbose, epochs, batch_size = 0, 20, 16
n_features, n_outputs = train_x.shape[2], train_y.shape[1]
# reshape into subsequences [samples, timesteps, rows, cols, channels]
train_x = train_x.reshape((train_x.shape[0], n_steps, 1, n_length, n_features))
# reshape output into [samples, timesteps, features]
train_y = train_y.reshape((train_y.shape[0], train_y.shape[1], 1))

In [44]:
train[0]

array([[3.39046000e+03, 2.26006000e+02, 3.45725320e+05, 1.43986000e+04,
        2.03300000e+03, 4.18700000e+03, 1.33410000e+04, 3.69466667e+04],
       [2.20382600e+03, 1.61792000e+02, 3.47373640e+05, 9.24720000e+03,
        1.06300000e+03, 2.62100000e+03, 1.40180000e+04, 1.90284333e+04],
       [1.66619400e+03, 1.50942000e+02, 3.48479010e+05, 7.09400000e+03,
        8.39000000e+02, 7.60200000e+03, 6.19700000e+03, 1.31319000e+04],
       [2.22574800e+03, 1.60998000e+02, 3.48923610e+05, 9.31300000e+03,
        0.00000000e+00, 2.64800000e+03, 1.40630000e+04, 2.03848000e+04],
       [1.72328800e+03, 1.44434000e+02, 3.47096410e+05, 7.26640000e+03,
        1.76500000e+03, 2.69200000e+03, 1.04560000e+04, 1.38084667e+04],
       [2.34133800e+03, 1.86906000e+02, 3.47305750e+05, 9.89700000e+03,
        3.15100000e+03, 3.50000000e+02, 1.11310000e+04, 2.43903000e+04],
       [4.77338600e+03, 2.21470000e+02, 3.45795950e+05, 2.02004000e+04,
        2.66900000e+03, 4.25000000e+02, 1.47260000e+04, 6.

In [15]:
test.shape

(46, 7, 8)

In [78]:
train_x[0]

array([[[[3.39046000e+03, 2.26006000e+02, 3.45725320e+05,
          1.43986000e+04, 2.03300000e+03, 4.18700000e+03,
          1.33410000e+04, 3.69466667e+04],
         [2.20382600e+03, 1.61792000e+02, 3.47373640e+05,
          9.24720000e+03, 1.06300000e+03, 2.62100000e+03,
          1.40180000e+04, 1.90284333e+04],
         [1.66619400e+03, 1.50942000e+02, 3.48479010e+05,
          7.09400000e+03, 8.39000000e+02, 7.60200000e+03,
          6.19700000e+03, 1.31319000e+04],
         [2.22574800e+03, 1.60998000e+02, 3.48923610e+05,
          9.31300000e+03, 0.00000000e+00, 2.64800000e+03,
          1.40630000e+04, 2.03848000e+04],
         [1.72328800e+03, 1.44434000e+02, 3.47096410e+05,
          7.26640000e+03, 1.76500000e+03, 2.69200000e+03,
          1.04560000e+04, 1.38084667e+04],
         [2.34133800e+03, 1.86906000e+02, 3.47305750e+05,
          9.89700000e+03, 3.15100000e+03, 3.50000000e+02,
          1.11310000e+04, 2.43903000e+04],
         [4.77338600e+03, 2.21470000e+02, 3.45

In [67]:
# test[:,:,:].shape
test[6,:,:]

array([[1.68829400e+03, 1.24660000e+02, 3.50126410e+05, 6.95780000e+03,
        1.22300000e+03, 3.20000000e+02, 1.32080000e+04, 1.33872334e+04],
       [2.09713200e+03, 1.31270000e+02, 3.50563360e+05, 8.70100000e+03,
        3.46800000e+03, 3.32000000e+02, 1.60650000e+04, 1.50872000e+04],
       [1.79775600e+03, 1.19606000e+02, 3.51005420e+05, 7.41360000e+03,
        1.94500000e+03, 3.17000000e+02, 1.28200000e+04, 1.48806000e+04],
       [2.05456000e+03, 1.19560000e+02, 3.48979420e+05, 8.55540000e+03,
        2.29400000e+03, 6.63600000e+03, 1.01010000e+04, 1.52116667e+04],
       [1.76115600e+03, 1.40000000e+02, 3.50767000e+05, 7.25500000e+03,
        9.65000000e+02, 3.85000000e+02, 1.48840000e+04, 1.31186000e+04],
       [1.95357200e+03, 1.35150000e+02, 3.48058660e+05, 8.14200000e+03,
        2.12400000e+03, 3.97000000e+02, 1.45430000e+04, 1.54955333e+04],
       [1.98372800e+03, 1.51248000e+02, 3.48176110e+05, 8.27360000e+03,
        3.74800000e+03, 3.82000000e+02, 1.44700000e+04, 1.

In [102]:
import matplotlib.pyplot as plt 
import numpy as np
for i in range(10) :
    plt.plot(,test[i,:,0])


AttributeError: 'numpy.ndarray' object has no attribute 'index'

In [107]:
plt.plot(np.arange(0,46),test[:,0,0])

TypeError: '(slice(None, None, None), 0, 0)' is an invalid key

In [103]:
train, test = dataset[1:-328], dataset[-328:-6]

In [106]:
len(dataset)

1442

In [94]:
train = array(split(train, len(train)/7))
test = array(split(test, len(test)/7))

In [95]:
test.shape

(46, 7, 8)

In [101]:
test[1,:,:]

array([[2.50846600e+03, 2.27170000e+02, 3.51130490e+05, 1.04060000e+04,
        3.56400000e+03, 2.03200000e+03, 1.41080000e+04, 2.21037667e+04],
       [1.51868400e+03, 1.56206000e+02, 3.54339520e+05, 6.24260000e+03,
        1.25100000e+03, 3.40000000e+02, 1.02790000e+04, 1.34414000e+04],
       [1.99579600e+03, 2.02538000e+02, 3.52730520e+05, 8.26500000e+03,
        1.80300000e+03, 3.64300000e+03, 1.25090000e+04, 1.53082667e+04],
       [1.99579600e+03, 2.02538000e+02, 3.52730520e+05, 8.26500000e+03,
        1.80300000e+03, 3.64300000e+03, 1.25090000e+04, 1.53082667e+04],
       [2.11622400e+03, 2.39598000e+02, 3.52565090e+05, 8.80460000e+03,
        2.44200000e+03, 3.72700000e+03, 1.24900000e+04, 1.66114000e+04],
       [2.19676000e+03, 2.18290000e+02, 3.50965100e+05, 9.12240000e+03,
        1.95700000e+03, 2.95200000e+03, 1.35460000e+04, 1.81576666e+04],
       [2.15011200e+03, 2.42518000e+02, 3.49985140e+05, 8.95640000e+03,
        2.60500000e+03, 2.41500000e+03, 1.36520000e+04, 1.