In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import joblib
from tensorflow import keras 
from tensorflow.keras.models import Model
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import MinMaxScaler
from sklearn.preprocessing import StandardScaler


In [2]:
def multivariate_data(dataset, target, start_index, end_index, history_size,
                      target_size, step, single_step=False):
    data = []
    labels = []

    start_index = start_index + history_size
    if end_index is None:
        end_index = len(dataset) - target_size
    for i in range(start_index, end_index):
        indices = range(i-history_size, i, step)
        data.append(dataset[indices])

        if single_step:
            labels.append(target[i+target_size])
        else:
            labels.append(target[i:i+target_size])

    return np.array(data), np.array(labels)

In [3]:
df = pd.read_csv("../data/data_processed_cleaned_fake.csv")
df['date'] = pd.to_datetime(df['date'], format='%Y/%m/%d %H:%M')

past_history = 96
future_target = 4
STEP = 1

for i in range(0,future_target):
    df["{}_step_prod".format(i+1)] = 0
    df["{}_step_cons".format(i+1)] = 0
    
features_considered = [
                    'price', 'isWeekend', 
                    'isHoliday',  'temp', 'wind','day_cos', 'day_sin',
                    'month_cos', 'month_sin'
                    ]


In [4]:
df.head()

Unnamed: 0,date,shops,office_building,industrial_park,fve,mve,wpg,price,isWeekend,isHoliday,...,month_cos,month_sin,1_step_prod,1_step_cons,2_step_prod,2_step_cons,3_step_prod,3_step_cons,4_step_prod,4_step_cons
0,2020-01-01 00:00:00,16.0236,44.616,19.844,0.0,243.738,8.1108,33.2,0,1,...,0.999963,-0.008601,0,0,0,0,0,0,0,0
1,2020-01-01 00:15:00,11.664,37.908,15.4188,0.0,264.5154,8.0586,33.2,0,1,...,0.999965,-0.008422,0,0,0,0,0,0,0,0
2,2020-01-01 00:30:00,11.646,45.9888,14.6196,0.0,263.1303,7.3188,33.2,0,1,...,0.999966,-0.008243,0,0,0,0,0,0,0,0
3,2020-01-01 00:45:00,11.79,37.6272,14.3676,0.0,320.5708,8.2379,33.2,0,1,...,0.999967,-0.008064,0,0,0,0,0,0,0,0
4,2020-01-01 01:00:00,16.02,37.6272,23.0252,0.0,265.8654,6.6474,30.4,0,1,...,0.999969,-0.007884,0,0,0,0,0,0,0,0


In [31]:
%%time
pred_model = keras.models.load_model('../models/CNN_LSTM.h5')
x_StandardScaler = joblib.load('../models/StandardScaler_alldata.save')
x_MinMaxScaler = joblib.load('../models/MinMaxScaler_alldata.save')

df_scaled = df[features_considered].copy()
df_scaled = x_StandardScaler.transform(df_scaled.values)
df_scaled = x_MinMaxScaler.fit_transform(df_scaled)

for index, row in df.iterrows():
    if len(df) > index+2*past_history:
        x, y1 = multivariate_data(df_scaled,
                                  df['production_usage'].values, 
                                  index,
                                  index+past_history+1,
                                  past_history,
                                  future_target,
                                  STEP,
                                  single_step=True)
        pred = pred_model.predict(x)
        pred_production = pred[0].flatten()
        pred_consumption = pred[1].flatten()
        for i in range(0,future_target):
            df.at[index,"{}_step_prod".format(i+1)] = pred_production[i]
            df.at[index,"{}_step_cons".format(i+1)] = pred_consumption[i]

CPU times: user 12min 30s, sys: 2min 42s, total: 15min 12s
Wall time: 14min 19s


192

In [30]:
df[:-(past_history*2)].to_csv("../data/data_fake_withPredictions.csv")