In [None]:
import pandas as pd
from pandas import read_csv
import matplotlib.pyplot as plt
import glob
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import math
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn import metrics
import random
from keras.callbacks import ModelCheckpoint, EarlyStopping
import tensorflow as tf
from keras import backend as K
from math import sqrt
import statsmodels.api as sm
from statsmodels.graphics.tsaplots import plot_acf 
from statsmodels.graphics.tsaplots import plot_pacf
from statsmodels.tsa.arima_model import ARIMA

In [None]:
#Basic

def basic_avg(df):
    
    df['tstp'] = pd.to_datetime(df['tstp'],format = '%d/%m/%Y %H:%M')
    
    #Separate into train and test
    print('separating into train and test')
    train = df[:33600]
    test = df[33600:]
    y_test = test['energy(kWh/hh)'].to_list()
               
    # groupby (in our case this is the prediction)
    print('Predicting')
    prediction_hh = []
    mean = train["energy(kWh/hh)"].mean()
    prediction_hh.append(mean)
    #Since our test is 4 weeks long, our prediction has to be 4 weeks long
    y_pred = prediction_hh*1488   
        
    #Metrics
    print('Calculating metric')
    rmsle_i = rmsle(y_test,y_pred)
    rmse_i = sqrt(mean_squared_error(y_test, y_pred))
    
    return y_pred

In [None]:
#Weekly

def average_week(df):
    
    df['tstp'] = pd.to_datetime(df['tstp'],format = '%d/%m/%Y %H:%M')
    df['year'] = pd.DatetimeIndex(df['tstp']).year
    
    #Separate into train and test
    print('Separating into train and test')
    train = df[:33648]
    test = df[33648:34992]
    y_test = test['energy(kWh/hh)'].to_list()
        
    #create columns for the grouping by tstp
    train['day_of_week'] = pd.DatetimeIndex(train['tstp']).dayofweek
    train['hour'] = pd.DatetimeIndex(train['tstp']).hour
    train['minute'] = pd.DatetimeIndex(train['tstp']).minute
        
    #Predict
    print('Predicting')
    # groupby (in our case this is the prediction)
    prediction_hh = train.groupby(['day_of_week','hour','minute']).mean().reset_index()
    del prediction_hh['year']
    #Since our test is 4 weeks long, our prediction has to be 4 weeks long
    y_pred_sem1 = prediction_hh['energy(kWh/hh)']
    y_pred_sem2 = prediction_hh['energy(kWh/hh)']
    y_pred_sem3 = prediction_hh['energy(kWh/hh)']
    y_pred_sem4 = prediction_hh['energy(kWh/hh)']
    frames = [y_pred_sem1,y_pred_sem2,y_pred_sem3,y_pred_sem4]
    y_pred = pd.concat(frames).to_list()
            
    #Metrics
    print('Calculating metrics')
    rmsle_i = rmsle(y_test,y_pred)
    rmse_i = sqrt(mean_squared_error(y_test, y_pred))
    
    return y_pred

In [None]:
#ARIMA

def arima_loop(dataset):

    # split into train and test sets
    print('Separating into train and test')
    train = dataset.iloc[:33600]
    test = dataset.iloc[33600:]
    y_test = test['energy(kWh/hh)'].to_list()
    
    # Create Model
    print('Creating model')
    arima_model = ARIMA(train,order = (2,0,0)).fit()

    #Predict and save results
    print('Prediciting and saving results')
    ARIMA_prediction = arima_model.predict(start=33600, end =35087,typ= 'levels')
    y_pred = ARIMA_prediction.to_list()
    
    #Metrics
    print('Calculating metrics')
    rmsle_i = rmsle(y_test,y_pred)
    rmse_i = sqrt(mean_squared_error(y_test, y_pred))
    
    return y_pred

In [None]:
# LSTM Univariate
# convert an array of values into a dataset matrix
def create_dataset(dataset, look_back=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-look_back-1):
        a = dataset[i:(i+look_back), 0]
        dataX.append(a)
        dataY.append(dataset[i + look_back, 0])
    return np.array(dataX), np.array(dataY)

def lstm_univariate_loop(dataset):
    
    random.seed(1)
    dataset = df.values
    dataset = df.astype('float32')
    
    # normalize the dataset
    scaler = MinMaxScaler(feature_range=(0, 1))
    dataset = scaler.fit_transform(dataset)
    
    # split into train and test sets
    print('Spliting into train and test')
    train_size = int(33600)
    test_size = len(dataset) - train_size
    train, test = dataset[0:train_size,:], dataset[train_size:len(dataset),:]
    
    # reshape into X=t and Y=t+1
    look_back = 3
    trainX, trainY = create_dataset(train, look_back)
    testX, testY = create_dataset(test, look_back)
    testY_copy = testY.copy()
    # reshape input to be [samples, time steps, features]
    trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
    testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))
    
    # create and fit the LSTM network
    print('Creating and fitting the LSTM network')
    
    model = Sequential()
    model.add(LSTM(200, input_shape=(1, look_back)))
    model.add(Dense(1))
    model.compile(loss=rmsle_loss, optimizer='adam', metrics = [rmsle_loss])
    # simple early stopping
    es = EarlyStopping(monitor='val_loss', mode='min', verbose=1)
    history = model.fit(trainX, trainY, epochs=15, batch_size=70,validation_split=0.04, verbose=1, shuffle=False, callbacks = [es])
    plt.plot(history.history['loss'])
    plt.plot(history.history['val_loss'])
    plt.ylabel('loss')
    plt.xlabel('epoch')
    plt.legend(['train', 'val'], loc='upper left')
    plt.show()
    
    # make predictions
    print('Making predictions')
    trainPredict = model.predict(trainX)
    testPredict = model.predict(testX)
    
    # invert predictions
    trainPredict = scaler.inverse_transform(trainPredict)
    trainY = scaler.inverse_transform([trainY])
    testPredict = scaler.inverse_transform(testPredict)
    testY = scaler.inverse_transform([testY])
    
    #Metrics
    print('Calculating metrics')
    rmsle_i = rmsle(testY_copy,testPredict)
    rmse_i = sqrt(mean_squared_error(testY_copy, testPredict))
    
    return testPredict

In [None]:
#LSTM Multivariate

In [None]:
#Household

