<a href="https://colab.research.google.com/github/daisyKim12/Tensorflow_Study/blob/main/Lecture_C5_House_Electricity.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Category 5

`Individual House Hold Electric Power Consumption Dataset`

The original 'Individual House Hold Electric Power Consumption Dataset'
has Measurements of electric power consumption in one household with
a one-minute sampling rate over a period of almost 4 years.

Different electrical quantities and some sub-metering values are available.

For the purpose of the examination we have provided a subset containing
the data for the first 60 days in the dataset. We have also cleaned the
dataset beforehand to remove missing values. The dataset is provided as a
csv file in the project.

The dataset has a total of 7 features ordered by time.

Your neural network must have a **validation MAE of approximately 0.055** or less on the normalized validation dataset for top marks.

#Import

In [None]:
import urllib
import os
import zipfile
import pandas as pd

import tensorflow as tf
from tensorflow.keras.layers import Dense, Conv1D, LSTM, Bidirectional
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint

In [None]:
def download_and_extract_data():
  url = 'https://storage.googleapis.com/download.tensorflow.org/data/certificate/household_power.zip'
  urllib.request.urlretrieve(url, 'household_power.zip')
  with zipfile.ZipFile('household_power.zip', 'r') as zip_ref:
    zip_ref.extractall()



In [None]:
download_and_extract_data()

In [None]:
df = pd.read_csv('household_power_consumption.csv', sep=',', infer_datetime_format=True, index_col='datetime', header=0)
df.head(10)

Unnamed: 0_level_0,Global_active_power,Global_reactive_power,Voltage,Global_intensity,Sub_metering_1,Sub_metering_2,Sub_metering_3
datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
2006-12-16 17:24:00,4.216,0.418,234.84,18.4,0.0,1.0,17.0
2006-12-16 17:25:00,5.36,0.436,233.63,23.0,0.0,1.0,16.0
2006-12-16 17:26:00,5.374,0.498,233.29,23.0,0.0,2.0,17.0
2006-12-16 17:27:00,5.388,0.502,233.74,23.0,0.0,1.0,17.0
2006-12-16 17:28:00,3.666,0.528,235.68,15.8,0.0,1.0,17.0
2006-12-16 17:29:00,3.52,0.522,235.02,15.0,0.0,2.0,17.0
2006-12-16 17:30:00,3.702,0.52,235.09,15.8,0.0,1.0,17.0
2006-12-16 17:31:00,3.7,0.52,235.22,15.8,0.0,1.0,17.0
2006-12-16 17:32:00,3.668,0.51,233.99,15.8,0.0,1.0,17.0
2006-12-16 17:33:00,3.662,0.51,233.86,15.8,0.0,2.0,16.0


In [None]:
def normalize_series(data, min, max):
  data = data - min
  data = data / max
  return data

In [None]:
N_FEATURES = len(df.columns)
data = df.values
data = normalize_series(data, data.min(axis=0), data.max(axis=0))
data

array([[0.43377912, 0.47826087, 0.04036551, ..., 0.        , 0.01282051,
        0.85      ],
       [0.55716135, 0.49885584, 0.0355582 , ..., 0.        , 0.01282051,
        0.8       ],
       [0.55867127, 0.56979405, 0.03420739, ..., 0.        , 0.02564103,
        0.85      ],
       ...,
       [0.03710095, 0.        , 0.05983313, ..., 0.        , 0.        ,
        0.        ],
       [0.03559103, 0.        , 0.06515693, ..., 0.        , 0.        ,
        0.        ],
       [0.03774806, 0.        , 0.06730234, ..., 0.        , 0.01282051,
        0.        ]])

In [None]:
pd.DataFrame(data).describe()

Unnamed: 0,0,1,2,3,4,5,6
count,86400.0,86400.0,86400.0,86400.0,86400.0,86400.0,86400.0
mean,0.156411,0.147141,0.064697,0.152278,0.01695,0.024085,0.375711
std,0.14404,0.134578,0.0139,0.139343,0.086787,0.097022,0.433595
min,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,0.021786,0.0,0.055344,0.024752,0.0,0.0,0.0
50%,0.131795,0.132723,0.065713,0.123762,0.0,0.0,0.0
75%,0.239431,0.224256,0.074652,0.227723,0.0,0.012821,0.85
max,0.979077,1.0,0.10735,0.980198,1.0,1.0,1.0


In [None]:
split_time = int(len(data) * 0.8)

In [None]:
x_train = data[:split_time]
x_valid = data[split_time:]

In [None]:
def windowed_dataset(series, batch_size, n_past=24, n_future=24, shift=1):
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(size=(n_past + n_future), shift = shift, drop_remainder = True)
    ds = ds.flat_map(lambda w: w.batch(n_past + n_future))
    ds = ds.shuffle(len(series))
    ds = ds.map(
        lambda w: (w[:n_past], w[n_past:])
    )
    return ds.batch(batch_size).prefetch(1)

In [None]:
BATCH_SIZE = 32
N_PAST = 24
N_FUTURE = 24
SHIFT = 1

In [None]:
train_set = windowed_dataset(series=x_train,
                           batch_size=BATCH_SIZE,
                           )
valid_set = windowed_dataset(series=x_valid,
                           batch_size=BATCH_SIZE,
                           )

#Model, Checkpoint, Compile, Fit, Load, Evaluate

In [None]:
model = Sequential([
    Conv1D(filters = 32, kernel_size = 3, padding = 'causal', activation = 'relu', input_shape = [N_PAST, 7]),
    Bidirectional(LSTM(32, return_sequences=True)),
    Bidirectional(LSTM(32, return_sequences=True)),
    Dense(32, activation='relu'),
    Dense(16, activation='relu'),
    Dense(8, activation='relu'),
    Dense(N_FEATURES),
])

In [None]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 24, 32)            704       
                                                                 
 bidirectional (Bidirectiona  (None, 24, 64)           16640     
 l)                                                              
                                                                 
 bidirectional_1 (Bidirectio  (None, 24, 64)           24832     
 nal)                                                            
                                                                 
 dense (Dense)               (None, 24, 32)            2080      
                                                                 
 dense_1 (Dense)             (None, 24, 16)            528       
                                                                 
 dense_2 (Dense)             (None, 24, 8)             1

In [None]:
checkpoint_path='model/my_checkpoint.ckpt'

checkpoint = ModelCheckpoint(checkpoint_path,
                             save_weights_only=True,
                             save_best_only=True,
                             monitor='val_loss',
                             verbose=1,
                             )

In [None]:
optimizer = tf.keras.optimizers.Adam(learning_rate=0.0005)

model.compile(optimizer=optimizer, loss = 'mae', metrics=['mae'])

In [None]:
model.fit(train_set, validation_data = valid_set, epochs = 30, callbacks=[checkpoint])

Epoch 1/30
   2159/Unknown - 56s 13ms/step - loss: 0.0646 - mae: 0.0646
Epoch 1: val_loss improved from inf to 0.04423, saving model to model/my_checkpoint.ckpt
Epoch 2/30
Epoch 2: val_loss improved from 0.04423 to 0.04212, saving model to model/my_checkpoint.ckpt
Epoch 3/30
Epoch 3: val_loss improved from 0.04212 to 0.04128, saving model to model/my_checkpoint.ckpt
Epoch 4/30
Epoch 4: val_loss improved from 0.04128 to 0.04056, saving model to model/my_checkpoint.ckpt
Epoch 5/30
Epoch 5: val_loss improved from 0.04056 to 0.03994, saving model to model/my_checkpoint.ckpt
Epoch 6/30
Epoch 6: val_loss did not improve from 0.03994
Epoch 7/30
Epoch 7: val_loss improved from 0.03994 to 0.03992, saving model to model/my_checkpoint.ckpt
Epoch 8/30
Epoch 8: val_loss improved from 0.03992 to 0.03966, saving model to model/my_checkpoint.ckpt
Epoch 9/30
Epoch 9: val_loss did not improve from 0.03966
Epoch 10/30
Epoch 10: val_loss improved from 0.03966 to 0.03945, saving model to model/my_checkpoin

<keras.callbacks.History at 0x7bddbff26ce0>

In [None]:
model.load_weights(checkpoint_path)

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x7bdd3aaec5b0>

In [None]:
model.evaluate(valid_set)



[0.03858674690127373, 0.03858671337366104]