<a href="https://colab.research.google.com/github/doantronghieu/DEEP-LEARNING/blob/main/CSR_TF_Dev_Pro_Cert/C4/C4_W4_Lab_2_Sunspots.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import csv
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import tensorflow as tf
import tensorflow.keras as tfk
from tensorflow.keras import layers, optimizers, losses, models, callbacks, metrics
import seaborn as sns
sns.set()

In [None]:
def plot_series(time, series, format = '-', start = 0, end = None, label = None):
    plt.plot(time[start:end], series[start:end], format, label = label)
    plt.xlabel('Time')
    plt.ylabel('Value')
    if label:
        plt.legend(fontsize = 14)
    plt.grid(True)

In [None]:
# Sunspots.csv
!gdown --id 1bLnqPgwoSh6rHz_DKDdDeQyAyl8_nqT5

In [None]:
time_step = []
sunspots  = []

with open('/content/Sunspots.csv') as csvfile:
    reader = csv.reader(csvfile, delimiter = ',')
    next(reader)

    for row in reader:
        time_step.append(int  (row[0]))
        sunspots .append(float(row[2]))
    
series = np.array(sunspots)
time   = np.array(time_step)

split_time = 3000
time_train = time  [:split_time]
time_valid = time  [split_time:]
x_train    = series[:split_time]
x_valid    = series[split_time:]

window_size         = 30
batch_size          = 32
shuffle_buffer_size = 1000

plt.figure(figsize = (22, 7))
plot_series(time, series)

In [None]:
def windowed_dataset(series, window_size, batch_size, shuffle_buffer):
    series  = tf.expand_dims(series, axis = -1)
    dataset = tf.data.Dataset.from_tensor_slices(series)
    dataset = dataset.window(window_size + 1, shift=1, drop_remainder=True)
    dataset = dataset.flat_map(lambda window: window.batch(window_size + 1))
    dataset = dataset.shuffle(shuffle_buffer).map(lambda window: (window[:-1], window[-1]))
    dataset = dataset.batch(batch_size).prefetch(1)
    return dataset

In [None]:
def model_forecast(model, series, window_size):
    dataset = tf.data.Dataset.from_tensor_slices(series)
    dataset = dataset.window(window_size, shift = 1, drop_remainder = True)
    dataset = dataset.flat_map(lambda window: window.batch(window_size))
    dataset = dataset.batch(32).prefetch(1)
    
    forecast =  model.predict(dataset)
    
    return forecast

In [None]:
tf.keras.backend.clear_session()
tf.random.set_seed(51)
np.random.seed(51)

window_size = 64
batch_size  = 256
train_set   = windowed_dataset(x_train, window_size, batch_size, shuffle_buffer_size)

print(train_set, '\n')
print(x_train.shape, '\n')

model = models.Sequential([
    layers.Conv1D(filters = 32, kernel_size = 5, strides = 1, 
                  padding = 'causal', activation = 'relu', 
                  input_shape = [None, 1]),                           
    layers.LSTM(64, return_sequences = True),
    layers.LSTM(64, return_sequences = True),
    layers.Dense(30, activation = 'relu'),
    layers.Dense(10, activation = 'relu'),
    layers.Dense(1),
    layers.Lambda(lambda x: x * 400.0)                           
])

lr_schedule = callbacks.LearningRateScheduler(lambda epoch: 1e-8 * 10 ** (epoch / 20))

model.compile(loss = losses.Huber(),
              optimizer = optimizers.SGD(learning_rate = 1e-8, momentum = 0.9),
              metrics = ['mae'])

history = model.fit(train_set, epochs = 100, callbacks = [lr_schedule], verbose = 0)

plt.figure(figsize = (22, 7))
plt.semilogx(history.history['lr'], history.history['loss'])
plt.axis([1e-8, 1e-4, 0, 60])
plt.show()

In [None]:
tf.keras.backend.clear_session()
tf.random.set_seed(51)
np.random.seed(51)

window_size = 60
batch_size  = 100
train_set   = windowed_dataset(x_train, window_size, batch_size, shuffle_buffer_size)

print(train_set, '\n')
print(x_train.shape, '\n')

model = models.Sequential([
    layers.Conv1D(filters = 60, kernel_size = 5, strides = 1, 
                  padding = 'causal', activation = 'relu', 
                  input_shape = [None, 1]),                           
    layers.LSTM(60, return_sequences = True),
    layers.LSTM(60, return_sequences = True),
    layers.Dense(30, activation = 'relu'),
    layers.Dense(10, activation = 'relu'),
    layers.Dense(1),
    layers.Lambda(lambda x: x * 400.0)                           
])

model.compile(loss = losses.Huber(),
              optimizer = optimizers.SGD(learning_rate = 1e-5, momentum = 0.9),
              metrics = ['mae'])

history = model.fit(train_set, epochs = 500, verbose = 0)

In [None]:
rnn_forecast = model_forecast(model, series[..., np.newaxis], window_size)
rnn_forecast = rnn_forecast[split_time - window_size:-1, -1, 0]

plt.figure(figsize = (22, 7))
plot_series(time_valid, x_valid)
plot_series(time_valid, rnn_forecast)

print(f'Forecast: {rnn_forecast}\n')
print(f'Error: {metrics.mean_absolute_error(x_valid, rnn_forecast).numpy()}')

In [None]:
# - Retrieve a list of results on training and test data sets for each 
# training epoch
mae  = history.history['mae']
loss = history.history['loss']
epochs = range(len(loss)) # Number of epochs

epochs_zoom = epochs[200:]
mae_zoom    = mae[200:]
loss_zoom   = loss[200:]

fig, axs = plt.subplots(1, 2, figsize = (22, 10))

axs[0].plot(epochs, mae, 'r',  label = 'MAE')
axs[0].plot(epochs, loss, 'b', label = 'Loss')
axs[0].set(xlabel = 'Epochs', ylabel = 'Accuracy', title = 'MAE and Loss')
axs[0].legend()

axs[1].plot(epochs_zoom, mae_zoom, 'r', label = 'MAE')
axs[1].plot(epochs_zoom, loss_zoom, 'b', label = 'Loss')
axs[1].set(xlabel = 'Epochs', ylabel = 'Accuracy', title = 'MAE and Loss')
axs[1].legend()

plt.tight_layout()
plt.show()