### Introduction

Data Set Information:

This archive contains 2075259 measurements gathered between December 2006 and November 2010 (47 months).
Notes:
1.(globalactivepower*1000/60 - submetering1 - submetering2 - submetering3) represents the active energy consumed every minute (in watt hour) in the household by electrical equipment not measured in sub-meterings 1, 2 and 3.

2.The dataset contains some missing values in the measurements (nearly 1,25% of the rows). All calendar timestamps are present in the dataset but for some timestamps, the measurement values are missing: a missing value is represented by the absence of value between two consecutive semi-colon attribute separators. For instance, the dataset shows missing values on April 28, 2007.

Attribute Information:
1.date: Date in format dd/mm/yyyy

2.time: time in format hh:mm:ss

3.globalactivepower: household global minute-averaged active power (in kilowatt)

4.globalreactivepower: household global minute-averaged reactive power (in kilowatt)

5.voltage: minute-averaged voltage (in volt)

6.global_intensity: household global minute-averaged current intensity (in ampere)

7.submetering1: energy sub-metering No. 1 (in watt-hour of active energy). It corresponds to the kitchen, containing mainly a dishwasher, an oven and a microwave (hot plates are not electric but gas powered).

8.submetering2: energy sub-metering No. 2 (in watt-hour of active energy). It corresponds to the laundry room, containing a washing-machine, a tumble-drier, a refrigerator and a light.

9.submetering3: energy sub-metering No. 3 (in watt-hour of active energy). It corresponds to an electric water-heater and an air-conditioner.

### Importing Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt 
import seaborn as sns
import warnings
warnings.filterwarnings('ignore') # igoring any kind of warning if comes 

In [None]:
df = pd.read_csv('../input/electric-power-consumption-data-set/household_power_consumption.txt',sep=';', 
                 parse_dates={'date_time' : ['Date', 'Time']}, infer_datetime_format=True, 
                 low_memory=False, na_values=['nan','?'], index_col='date_time')

In [None]:
df.head()

In [None]:
df.info()

In [None]:
df.describe()

In [None]:
df.shape

In [None]:
# check for missing values
df.isnull().sum()

There are some missing values in this dataset. The missing values can be handled as below:

In [None]:
# filling missing values by the value of one day before 
def fill_missing(data):
    one_day = 23*60
    for row in range(data.shape[0]):
        for col in range(data.shape[1]):
            if np.isnan(data[row,col]):
                data[row,col] = data[row-one_day,col]

In [None]:
fill_missing(df.values)


In [None]:
df.isnull().sum()

In [None]:
# save clean data in csv file
df.to_csv('clean_data.csv')
df = pd.read_csv('clean_data.csv',parse_dates=['date_time'], index_col= 'date_time')
df.head()

### Data Visualization


In [None]:
#group together the data

day_data = df.resample('D').sum()# sum all units for the particular day
day_data.head()

In [None]:
#visualize

fig, ax = plt.subplots(figsize=(15, 15))

for i in range(len(day_data.columns)):
    plt.subplot(len(day_data.columns), 1, i+1)
    name = day_data.columns[i]
    plt.plot(day_data[name])
    plt.title(name, y=0, loc='right')
    plt.yticks([])
plt.show()
fig.tight_layout()

In [None]:
# lets plot for the year

years = ['2007','2008','2009','2010']
fig, ax = plt.subplots(figsize=(15, 15))

for i in range(len(years)):
    plt.subplot(len(day_data.columns),1,i+1)
    year = years[i]
    active_power_data = day_data[str(year)]['Global_active_power']
    plt.plot(active_power_data)
    plt.title(str(year)+" > "+str(np.round(active_power_data.sum(),1)),y = 0, loc = 'left')
plt.show()
fig.tight_layout()

In [None]:
day_data['2010']['Global_active_power'].sum()

In [None]:
def normalize_series(data, min, max):
    data = data - min
    data = data / max
    return data

def windowed_dataset(series, batch_size, past_value, future_value, shift=1):
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(size= past_value + future_value, shift=shift, drop_remainder=True)
    ds = ds.flat_map(lambda w: w.batch(past_value + future_value))
    ds = ds.map(lambda w: (w[:past_value], w[past_value:]))
    return ds.batch(batch_size).prefetch(1)


def model_forecast(model, series, window_size, batch_size):
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(window_size, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda w: w.batch(window_size))
    ds = ds.batch(batch_size, drop_remainder=True).prefetch(1)
    forecast = model.predict(ds)
    return forecast

In [None]:
# normalize the data
data = day_data.values
data = normalize_series(data, data.min(axis=0), data.max(axis=0))

In [None]:
# features
feature_value = len(df.columns)
feature_value

# split the data into train and test

time_split = int(len(data) * 0.6) 
x_train = data[:time_split]
x_valid = data[time_split:]

In [None]:
import tensorflow as tf

tf.keras.backend.clear_session()
tf.random.set_seed(42)
batch_size = 64 
past_value = 21
future_value = 21
shift =1
window_size = past_value + future_value

train_set = windowed_dataset(series=x_train, batch_size=batch_size,
                                 past_value = past_value, future_value=future_value,
                                 shift=shift)
valid_set = windowed_dataset(series=x_valid, batch_size=batch_size,
                                 past_value = past_value, future_value =future_value,
                                shift=shift)

In [None]:
model = tf.keras.models.Sequential([
    tf.keras.layers.Conv1D(filters=21, kernel_size=5,
                               strides=1, padding='causal',
                               activation='relu',
                               input_shape = (past_value, feature_value)),
        
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(24, return_sequences=True)),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(24, return_sequences=True)),
    tf.keras.layers.Dense(feature_value)
    ])

print(model.summary())

In [None]:
optimizer = tf.keras.optimizers.SGD(learning_rate=1e-1, momentum=0.9) 
model.compile(loss = tf.keras.losses.Huber(),
        optimizer = optimizer,
        metrics = ['mae']
    )

In [None]:
model.fit(train_set, epochs=200)

In [None]:
rnn_forecast = model_forecast(model, data, past_value, batch_size)
rnn_forecast = rnn_forecast[time_split - past_value:-1, 0, :]


In [None]:
x_valid = x_valid[:rnn_forecast.shape[0]]

In [None]:
tf.keras.metrics.mean_absolute_error(x_valid, rnn_forecast).numpy().mean()