<a href="https://colab.research.google.com/github/denmau/tensorflow-practice/blob/main/Time_Series_Week_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [7]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

print(tf.__version__)

2.4.1


In [2]:
dataset = tf.data.Dataset.range(10)
for val in dataset:
  print(val.numpy())

0
1
2
3
4
5
6
7
8
9


In [69]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1)
# drop_remainder is False by default
for window_dataset in dataset:
  for val in window_dataset:
    print(val.numpy(), end=" ")
  print()

0 1 2 3 4 
1 2 3 4 5 
2 3 4 5 6 
3 4 5 6 7 
4 5 6 7 8 
5 6 7 8 9 
6 7 8 9 
7 8 9 
8 9 
9 


In [64]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
for window_dataset in dataset:
  for val in window_dataset:
    print(val.numpy(), end=" ")
  print()

0 1 2 3 4 
1 2 3 4 5 
2 3 4 5 6 
3 4 5 6 7 
4 5 6 7 8 
5 6 7 8 9 


In [65]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5)) # flatten the dataset
for window in dataset:
  print(window.numpy())

[0 1 2 3 4]
[1 2 3 4 5]
[2 3 4 5 6]
[3 4 5 6 7]
[4 5 6 7 8]
[5 6 7 8 9]


Split the data

In [66]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5)) # flatten the dataset
dataset = dataset.map(lambda window: (window[:-1], window[-1:])) 
# Splitting into everything but the last one(:-1), and the last one only(-1:)
for x,y in dataset:
  print(x.numpy(), y.numpy())

[0 1 2 3] [4]
[1 2 3 4] [5]
[2 3 4 5] [6]
[3 4 5 6] [7]
[4 5 6 7] [8]
[5 6 7 8] [9]


shuffle data before training to prevent sequence biass

In [5]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5)) # flatten the dataset
dataset = dataset.map(lambda window: (window[:-1], window[-1])) 
# Splitting into everything but the last one(:-1)-input x, and the last one itself(-1:)-label y

dataset = dataset.shuffle(buffer_size=10) 
# buffer size is the number of data items that we have

for x,y in dataset:
  print(x.numpy(), y.numpy())

[0 1 2 3] 4
[2 3 4 5] 6
[3 4 5 6] 7
[5 6 7 8] 9
[1 2 3 4] 5
[4 5 6 7] 8


Batch the data (into sets of 2)

In [3]:
dataset = tf.data.Dataset.range(10)
dataset = dataset.window(5, shift=1, drop_remainder=True)
dataset = dataset.flat_map(lambda window: window.batch(5)) # flatten the dataset
dataset = dataset.map(lambda window: (window[:-1], window[-1])) 
# Splitting into everything but the last one(:-1), and the last one itself(-1:)

dataset = dataset.shuffle(buffer_size=10) 
# buffer size is the number of data items that we have

dataset = dataset.batch(2).prefetch(1)

for x,y in dataset:
  print('x = ', x.numpy())
  print('y = ', y.numpy())

x =  [[4 5 6 7]
 [5 6 7 8]]
y =  [8 9]
x =  [[1 2 3 4]
 [2 3 4 5]]
y =  [5 6]
x =  [[0 1 2 3]
 [3 4 5 6]]
y =  [4 7]


# Single layer Neural Network

In [None]:
def windowed_dataset(series, window_size, batch_size, shuffle_buffer):
  # we will pass the series through it from the tensor_slices(series)
  dataset = tf.data.Dataset.from_tensor_slices(series)
  # slice the data into appropriate windows each one being shifted by 1 an keep all in same size
  dataset = dataset.window(window_size + 1, shift=1, drop_remainder=True)
  # flatten data into chunks of size window_size + 1
  dataset = dataset.flat_map(lambda window: window.batch(window_size + 1))
  # now shuffle the flattened data. Shuffle buffer tends to speed things up even in super large datasets
  dataset = dataset.shuffle(shuffle_buffer)
  # shuffled data is then splitted into x's which is all elements except the last, and y which is the last element
  dataset = dataset.map(lambda window: (window[:-1], window[-1])) 
  # or we can combine this with shuffle
  #     dataset = dataset.shuffle(shuffle_buffer)
  # it is then batched into the selected batch_size
  dataset = dataset.batch(batch_size).prefetch(1)
  return dataset


In [None]:
# split dataset into training & Validation set
split_time = 1000 # splitting at time stamp 1000

# training data is the subset of the series upto split_time
time_train = time[:split_time] 
x_train = series[:split_time]

time_valid = time[:split_time]
x_valid = series[:split_time]


## our Series

In [None]:
# define the series


In [None]:
# Linear Regression
window_size = 20
batch_size = 32
shuffle_buffle_size = 1000

#lets create our dataset
dataset = windowed_dataset(series, window_size, batch_size, shuffle_buffer_size)
# single dense layer
l0 = tf.keras.layers.Dense(1, input_shape = [window_size])

# Model
model = tf.keras.models.Sequential(l0)

# compile model
#   mean squared error loss function
#   optimizer will Stochastic Gradient Descent
model.compile(loss="mse", optimizer = tf.keras.optimizers.SGD(lr=1e-6, momentum=0.9))
model.fit(dataset, epochs=100, verbose=0)



In [None]:
# inspect the different weights
print(f"Layer weights {l0.get_weights()}")

In [None]:
# print 20 items in series
print(series[1:21])
# predict the series
# the numpy new axis reshapes it to input dimension useed by the model
model.predict(series[1:21])

# our model says, when it sees 20 values like this, the predicted value is ____________

In [None]:
# plot our forecast for every point on the timeseries, relative to the time series before it (our window size is 20)
forecast = []
# iterate over the series 
for time in range(len(series) - window_size):
  # iterate over the series taking sices in window size, predicting them the append the prediction in forecast
  forecast.append(model.predict(series[time:time + window_size][np.newaxis]))

# take the forecast after the split time and load into np array for charting
forecast = forecast[split_time - window_size:]
results = np.array(forecast)[:, 0, 0]

# chart results

In [None]:
# measure mean abs error

# Using a DNN

In [None]:
 # the difference is of course the model
 dataset = windowed_dataset(x_train, window_size, batch_size, shuffle_buffer_size)
 
 model = tf.keras.models.Sequential([
                                     tf.keras.layers.Dense(10, input_shape = [window_size], activation="relu"),
                                     tf.keras.layers.Dense(10, activation="relu"),
                                     tf.keras.layers.Dense(1) # this single neuron dense gives us the predicted value
 ])

model.compile(loss="mse", optimizer = tf.keras.optimizers.SGD(lr=1e-6, momentum=0.9))
model.fit(dataset, epochs=100, verbose=0)

In [None]:
# DNN with Callbacks
dataset = windowed_dataset(x_train, window_size, batch_size, shuffle_buffer_size)
 
model = tf.keras.models.Sequential([
                                     tf.keras.layers.Dense(10, input_shape = [window_size], activation="relu"),
                                     tf.keras.layers.Dense(10, activation="relu"),
                                     tf.keras.layers.Dense(1)
])

# The Callback changes the learning rate to a value based on the epoch number
lr_schedule = tf.keras.callbacks.LearningRateScheduler(lambda epoch: 1e-8 * 10**(epoch / 20))

optimizer = tf.keras.optimizers.SGD(lr=1e-8, momentum=0.9)

model.compile(loss="mse", optimizer = optimizer)
model.fit(dataset, epochs=100, callbacks=[lr_schedule])

# chart loss per epoch against the learningrate per epoch
# we can then pick the lost point of chart where is still relatively stable, then update it to the learning rate