In [2]:
import numpy as np
import pandas as pd
import tensorflow as tf
import tensorflow_datasets as tfds
import urllib
import zipfile

tf.__version__

'2.10.0'

In [2]:
def download_and_extract_data():
    url = 'https://storage.googleapis.com/download.tensorflow.org/data/certificate/household_power.zip'
    urllib.request.urlretrieve(url, 'household_power.zip')
    with zipfile.ZipFile('household_power.zip', 'r') as zip_ref:
        zip_ref.extractall()

In [3]:
download_and_extract_data()

In [4]:
df = pd.read_csv('household_power_consumption.csv', sep=',',
                     infer_datetime_format=True, index_col='datetime', header=0)

In [5]:
N_FEATURES = len(df.columns)

In [6]:
data = df.values
data[:10]

array([[  4.216,   0.418, 234.84 ,  18.4  ,   0.   ,   1.   ,  17.   ],
       [  5.36 ,   0.436, 233.63 ,  23.   ,   0.   ,   1.   ,  16.   ],
       [  5.374,   0.498, 233.29 ,  23.   ,   0.   ,   2.   ,  17.   ],
       [  5.388,   0.502, 233.74 ,  23.   ,   0.   ,   1.   ,  17.   ],
       [  3.666,   0.528, 235.68 ,  15.8  ,   0.   ,   1.   ,  17.   ],
       [  3.52 ,   0.522, 235.02 ,  15.   ,   0.   ,   2.   ,  17.   ],
       [  3.702,   0.52 , 235.09 ,  15.8  ,   0.   ,   1.   ,  17.   ],
       [  3.7  ,   0.52 , 235.22 ,  15.8  ,   0.   ,   1.   ,  17.   ],
       [  3.668,   0.51 , 233.99 ,  15.8  ,   0.   ,   1.   ,  17.   ],
       [  3.662,   0.51 , 233.86 ,  15.8  ,   0.   ,   2.   ,  16.   ]])

In [7]:
def normalize_series(data, min, max):
    data = data - min
    data = data / max
    return data

In [8]:
data = normalize_series(data, data.min(axis=0), data.max(axis=0))

In [9]:
data[:10]

array([[0.43377912, 0.47826087, 0.04036551, 0.43564356, 0.        ,
        0.01282051, 0.85      ],
       [0.55716135, 0.49885584, 0.0355582 , 0.54950495, 0.        ,
        0.01282051, 0.8       ],
       [0.55867127, 0.56979405, 0.03420739, 0.54950495, 0.        ,
        0.02564103, 0.85      ],
       [0.56018119, 0.57437071, 0.03599523, 0.54950495, 0.        ,
        0.01282051, 0.85      ],
       [0.37446074, 0.60411899, 0.04370282, 0.37128713, 0.        ,
        0.01282051, 0.85      ],
       [0.35871441, 0.597254  , 0.04108065, 0.35148515, 0.        ,
        0.02564103, 0.85      ],
       [0.3783434 , 0.59496568, 0.04135876, 0.37128713, 0.        ,
        0.01282051, 0.85      ],
       [0.3781277 , 0.59496568, 0.04187525, 0.37128713, 0.        ,
        0.01282051, 0.85      ],
       [0.37467645, 0.58352403, 0.03698848, 0.37128713, 0.        ,
        0.01282051, 0.85      ],
       [0.37402934, 0.58352403, 0.03647199, 0.37128713, 0.        ,
        0.02564103, 0.8

In [10]:
SPLIT_TIME = int(len(data) * 0.5)

In [11]:
x_train = data[:SPLIT_TIME]
x_valid = data[SPLIT_TIME:]

In [12]:
x_train.shape, x_valid.shape

((43200, 7), (43200, 7))

In [13]:
BATCH_SIZE = 32
N_PAST = 24
N_FUTURE = 24
SHIFT = 1

In [14]:
def windowed_dataset(series, batch_size, n_past=24, n_future=24, shift=1):
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(size=n_past + n_future, shift=shift, drop_remainder=True)
    ds = ds.flat_map(lambda w: w.batch(n_past + n_future))
    ds = ds.map(lambda w: (w[:n_past], w[n_past:]))
    return ds.batch(batch_size).prefetch(1)

In [15]:
train_set = windowed_dataset(series=x_train, batch_size=BATCH_SIZE,
                                 n_past=N_PAST, n_future=N_FUTURE,
                                 shift=SHIFT)

In [16]:
valid_set = windowed_dataset(series=x_valid, batch_size=BATCH_SIZE,
                                 n_past=N_PAST, n_future=N_FUTURE,
                                 shift=SHIFT)

In [17]:
tf.random.set_seed(42)

inputs = tf.keras.layers.Input(shape=(N_PAST, N_FEATURES))
x = tf.keras.layers.LSTM(128, activation="relu", return_sequences=True)(inputs)
x = tf.keras.layers.LSTM(128, activation="relu")(x)
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(N_FUTURE * N_FEATURES)(x)
outputs = tf.keras.layers.Reshape((N_FUTURE, N_FEATURES))(x)
model_1 = tf.keras.Model(inputs = inputs, outputs = outputs, name = "model_1")

In [18]:
model_1.compile(loss = "mae",
                optimizer = tf.keras.optimizers.Adam(),
                metrics=["accuracy"])

In [19]:
model_1.fit(train_set,
            epochs=10,
            validation_data=valid_set,
            callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=100, restore_best_weights=True)])

Epoch 1/10


2023-05-23 23:26:33.189902: W tensorflow/core/platform/profile_utils/cpu_utils.cc:128] Failed to get CPU frequency: 0 Hz


Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x29901bd90>

## TEST CODE

In [27]:
def mae(y_true, y_pred):
    return np.mean(abs(y_true.ravel() - y_pred.ravel()))

In [28]:
def model_forecast(model, series, window_size, batch_size):
    ds = tf.data.Dataset.from_tensor_slices(series)
    ds = ds.window(window_size, shift=1, drop_remainder=True)
    ds = ds.flat_map(lambda w: w.batch(window_size))
    ds = ds.batch(batch_size, drop_remainder=True).prefetch(1)
    forecast = model.predict(ds)
    return forecast

In [29]:
rnn_forecast = model_forecast(model_1, data, N_PAST, BATCH_SIZE)



In [30]:
rnn_forecast = rnn_forecast[SPLIT_TIME - N_PAST:-1, 0, :]

In [31]:
x_valid = x_valid[:rnn_forecast.shape[0]]

In [32]:
result = mae(x_valid, rnn_forecast)
result

0.023344217102081195

## Model 2

In [33]:
tf.random.set_seed(42)

model_2 = tf.keras.Sequential([
    tf.keras.layers.Conv1D(filters=128, kernel_size=5, padding="causal", activation="relu", input_shape=(N_PAST, N_FEATURES)),
    tf.keras.layers.MaxPool1D(),
    tf.keras.layers.Conv1D(filters=128, kernel_size=5, padding="causal", activation="relu"),
    tf.keras.layers.MaxPool1D(),
    tf.keras.layers.Flatten(),
    tf.keras.layers.Dense(N_FUTURE*N_FEATURES),
    tf.keras.layers.Reshape((N_FUTURE, N_FEATURES))
])

model_2.compile(loss = "mae",
                optimizer = tf.keras.optimizers.Adam(),
                metrics=["accuracy"])

In [34]:
model_2.fit(train_set,
            epochs=10,
            validation_data=valid_set,
            callbacks=[tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=100, restore_best_weights=True)])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.History at 0x2998dd190>

## Test Code 2

In [35]:
rnn_forecast = model_forecast(model_2, data, N_PAST, BATCH_SIZE)



In [36]:
rnn_forecast = rnn_forecast[SPLIT_TIME - N_PAST:-1, 0, :]

In [37]:
x_valid = x_valid[:rnn_forecast.shape[0]]

In [38]:
result = mae(x_valid, rnn_forecast)
result

0.03615179452921788

In [7]:
x = tf.random.uniform(shape=(4, 6), minval=1, maxval=1000, dtype=tf.int32)
x

<tf.Tensor: shape=(4, 6), dtype=int32, numpy=
array([[670, 372, 454, 275, 844,  55],
       [120, 197, 583, 520, 337,  44],
       [227, 158, 672, 268, 682, 808],
       [359, 457, 650, 633, 895, 165]], dtype=int32)>

In [12]:
tf.reduce_mean(x, axis=-1)

<tf.Tensor: shape=(4,), dtype=int32, numpy=array([445, 300, 469, 526], dtype=int32)>