In [None]:
# Libraries
import numpy as np
import pandas as pd
import math
import time
from datetime import datetime

from scipy.signal import savgol_filter

import xgboost as xgb
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.ensemble import GradientBoostingRegressor

from tensorflow import keras
from tensorflow.keras import layers
from keras.models import Sequential
from keras.layers.core import Dense
from keras.optimizers import RMSprop
from keras import backend as K
from keras.engine.topology import Layer
from sklearn.cluster import KMeans
from keras.layers import LSTM


from keras.initializers import RandomUniform, Initializer, Constant


In [None]:
# Mount Drive - Google Drive
from google.colab import drive
drive.mount('/content/drive')
!cp 'drive/My Drive/C32_15Feb2015_28Mar2015.csv' 'data.csv'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
# Structure data

dataPath = 'data.csv'
validationDays = 7

class dataReady():
  '''Making Data ready to feed Model'''

  def __init__(self,dataPath,houseNum,validationDays):
    data = pd.read_csv(dataPath, names=['date', 'time', 'meter_reading'], skiprows=1)
    data["timestamp"] = data["date"] + " " + data["time"].astype(str)
    data['timestamp'] = pd.to_datetime(data['timestamp'],
                                       errors='raise',
                                       infer_datetime_format=True)
    data['siteId'] = houseNum
    data['weekDay'] = data.timestamp.apply(lambda x: x.weekday())
    data['dayHour'] = data.timestamp.apply(lambda x: x.hour)
    data['dayMinute'] = data.timestamp.apply(lambda x: x.minute+(x.hour)*60)

    data["date"] = data.timestamp.apply(lambda x: x.date())
    lastDay = max(data['date'])

    data['dayNum'] = data.date.apply(lambda x: (lastDay-x).days)

    data = data.drop(['time', 'date'], axis=1)

    
    # Separate Feed & Validation data
    def seprateData(dayNum,validationDays):
      if dayNum < validationDays:
        return 'Validation'
      else:
        return 'Feed'
        
    data['status'] = data.dayNum.apply(lambda x: seprateData(x,validationDays))

    self.data = data


  def exportFeedRawData(self):
    data = self.data[self.data['status']=='Feed']
    return data

  def weekSection(self):
    '''Distinguish between working and Off days'''
    def WSection(weekNum):
      '''Week section'''
      if weekNum < 5:
        return 'AW' # At work
      else:
        return 'AH' # At Home

    self.data['weekDaySection'] = self.data.weekDay.apply(lambda x: WSection(x))

    return self.data
    
  def daySection(self):
    '''Distinguish between Leisure, sleep and working hour'''
    def DSection(dayHour):
      '''Day Section'''
      if dayHour < 7 or dayHour >= 22:
        return 'S' # Sleep 
      elif (dayHour >=7 and dayHour <9) or (dayHour >= 17 and dayHour < 22):
        return 'L' # Leisure
      else:
        return 'W' # Working
    self.data['daySection'] = self.data.dayHour.apply(lambda x: DSection(x))

    return self.data
  def smothedData(self):
    window = 13
    poly = 3
    feed = self.data[self.data['status']=='Feed']
    mad = feed \
    .groupby('dayMinute')['meter_reading']\
    .agg('mad') \
    .pipe(pd.DataFrame) \
    .reset_index()

    mad['meter_reading']= savgol_filter(mad['meter_reading'], window, poly)

    mapping = dict(mad[['dayMinute', 'meter_reading']].values)
    self.data['mad'] = self.data.dayMinute.map(mapping)


    mean = feed \
    .groupby('dayMinute')['meter_reading']\
    .agg('mean') \
    .pipe(pd.DataFrame) \
    .reset_index()

    mean['meter_reading']= savgol_filter(mean['meter_reading'],window, poly)

    mapping = dict(mean[['dayMinute', 'meter_reading']].values)
    self.data['mean'] = self.data.dayMinute.map(mapping)


    self.data = self.data.drop(['dayMinute'], axis=1)


    return self.data
  def addSections(self):
    
    self.data = self.weekSection()
    self.data = self.daySection()
    self.data = self.smothedData()

    return self.data

  def meanSections(self):
    data = self.addSections()
    '''Mean energy consumption within day sections'''
    # Filter feed data to leave validation data untouched
    feedData = data[data['status']=='Feed']
    #------day Section
    data['daySection'] = data['daySection'].map(
        {'S': (feedData.loc[feedData['daySection'] == 'S', 'meter_reading']).mean(),
         'L': (feedData.loc[feedData['daySection'] == 'L', 'meter_reading']).mean(),
         'W': (feedData.loc[feedData['daySection'] == 'W', 'meter_reading']).mean()})
    
    #------week Section 
    data['weekDaySection'] = data['weekDaySection'].map(
        {'AW': (feedData.loc[feedData['weekDaySection'] == 'AW', 'meter_reading']).mean(),
         'AH': (feedData.loc[feedData['weekDaySection'] == 'AH', 'meter_reading']).mean()})
    self.data = data

    return self.data


  def movingAvarage(self):
    data = self.meanSections()
    data['EMV1'] = data.meter_reading.ewm(span=1, adjust=False).mean() 
    data['EMV2'] = data.meter_reading.ewm(span=2, adjust=False).mean()
    data['EMV3'] = data.meter_reading.ewm(span=3, adjust=False).mean()
    data['EMV4'] = data.meter_reading.ewm(span=4, adjust=False).mean()

    data['MV1'] = data.meter_reading.rolling(window=1).mean()
    data['MV2'] = data.meter_reading.rolling(window=2).mean()
    data['MV3'] = data.meter_reading.rolling(window=3).mean()
    data['MV4'] = data.meter_reading.rolling(window=4).mean()
 
    self.data = data

    return self.data


  def dataShifted(self):
    data = self.movingAvarage()
    #data['meter_reading'] = data.meter_reading.shift(1)
    data['EMV1'] = data.EMV1.shift(1)
    data['EMV2'] = data.EMV2.shift(1)
    data['EMV3'] = data.EMV3.shift(1)
    data['EMV4'] = data.EMV4.shift(1)

    data['MV1'] = data.MV1.shift(1)
    data['MV2'] = data.MV2.shift(1)
    data['MV3'] = data.MV3.shift(1)
    data['MV4'] = data.MV4.shift(1)
    
    self.data = data

    return self.data

  def dataClean(self):
    data = self.dataShifted()
    data = data.fillna('N')
    data = data[data['MV4']!='N']
    self.data = data

    return self.data


  def dataExport(self):
    data = self.dataClean()
    dataFeed = data[data['status']=='Feed']
    dataFeed = dataFeed[['meter_reading',
                         'weekDaySection',
                         'daySection',
                         'mad',
                         'mean',
                         'EMV1',
                         'EMV2',
                         'EMV3',
                         'EMV4',
                         'MV1',
                         'MV2',
                         'MV3',
                         'MV4',
                         ]]

    dataValidation = data[data['status']=='Validation']
    validationIndex = data['timestamp']
    dataValidation = dataValidation[['meter_reading',
                         'weekDaySection',
                         'daySection',
                         'mad',
                         'mean',
                         'EMV1',
                         'EMV2',
                         'EMV3',
                         'EMV4',
                         'MV1',
                         'MV2',
                         'MV3',
                         'MV4',
                         ]]
    return dataFeed , dataValidation ,validationIndex
  

data = dataReady(dataPath,"C32",validationDays)

Feed , Validation , validationIndex = data.dataExport()

Feed.to_csv('me.csv')

In [None]:
features = ['weekDaySection',
            'daySection',
            'mad',
            'mean',
            'EMV1',
            'EMV2',
            'EMV3',
            'EMV4',
            #'MV1',
            #'MV2',
            #'MV3',
            #'MV4',
            ]
featuresLen = len(features)
print(featuresLen)
Xtrain, ytrain = Feed[[*features]].values,Feed['meter_reading'].values
Xtest, ytest = Validation[[*features]].values,Validation['meter_reading'].values

8


In [None]:
reg = GradientBoostingRegressor(random_state=1, learning_rate=0.03,max_depth=4)
reg.fit(Xtrain, ytrain)
y_pred=reg.predict(Xtest)

print('MSE:',mean_squared_error(y_true=ytest ,y_pred=y_pred))
print('MAE:',mean_absolute_error(y_true=ytest ,y_pred=y_pred))


In [None]:
class InitCentersKMeans(Initializer):

    def __init__(self, X, max_iter=100):
        self.X = X
        self.max_iter = max_iter

    def __call__(self, shape, dtype=None):
        assert shape[1] == self.X.shape[1]

        n_centers = shape[0]
        km = KMeans(n_clusters=n_centers, max_iter=self.max_iter, verbose=0)
        km.fit(self.X)
        return km.cluster_centers_

class InitCentersRandom(Initializer):

    def __init__(self, X):
        self.X = X

    def __call__(self, shape, dtype=None):
        assert shape[1] == self.X.shape[1]
        idx = np.random.randint(self.X.shape[0], size=shape[0])
        return self.X[idx, :]


class RBFLayer(Layer):

    def __init__(self, output_dim, initializer=None, betas=1.0, **kwargs):
        self.output_dim = output_dim
        self.init_betas = betas
        if not initializer:
            self.initializer = RandomUniform(0.0, 1.0)
        else:
            self.initializer = initializer
        super(RBFLayer, self).__init__(**kwargs)

    def build(self, input_shape):

        self.centers = self.add_weight(name='centers',
                                       shape=(self.output_dim, input_shape[1]),
                                       initializer=self.initializer,
                                       trainable=True)
        self.betas = self.add_weight(name='betas',
                                     shape=(self.output_dim,),
                                     initializer=Constant(
                                         value=self.init_betas),
                                     # initializer='ones',
                                     trainable=True)

        super(RBFLayer, self).build(input_shape)

    def call(self, x):

        C = K.expand_dims(self.centers)
        H = K.transpose(C-K.transpose(x))
        return K.exp(-self.betas * K.sum(H**2, axis=1))

    def compute_output_shape(self, input_shape):
        return (input_shape[0], self.output_dim)

    def get_config(self):
        # have to define get_config to be able to use model_from_json
        config = {
            'output_dim': self.output_dim
        }
        base_config = super(RBFLayer, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

In [None]:
# Model with RBF kernel
model = Sequential()
rbflayer = RBFLayer(20,
                    initializer=InitCentersKMeans(Xtrain),
                    betas=1.0,
                    input_shape=(6,))
model.add(rbflayer)
model.add(Dense(1))

model.compile(loss='mean_squared_error',
              optimizer=RMSprop())

model.fit(Xtrain, ytrain,
          batch_size=21,
          epochs=100,
          verbose=1)

y_pred_train = model.predict(Xtrain)
y_pred_Validation = model.predict(Xtest)

print('MSE Train:',mean_squared_error(y_true=ytrain ,y_pred=y_pred_train))
print('MAE Train:',mean_absolute_error(y_true=ytrain ,y_pred=y_pred_train))

print('MSE Test:',mean_squared_error(y_true=ytest ,y_pred=y_pred_Validation))
print('MAE Test:',mean_absolute_error(y_true=ytest ,y_pred=y_pred_Validation))


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

KeyboardInterrupt: ignored

In [None]:
import plotly.graph_objects as go


y_pred_Validation = np.array(y_pred_Validation).ravel()

fig = go.Figure()
fig.add_trace(go.Scatter(x=validationIndex, y=ytest,
                    mode='lines',
                    name='True'))
fig.add_trace(go.Scatter(x=validationIndex, y=y_pred_Validation,
                    mode='lines+markers',
                    name='Predicted'))

fig.show()