In [43]:
import numpy as np
import pandas as pd
import pickle 
import sklearn 
import matplotlib.pyplot as plt 

In [44]:
with open('../data/train_data.pickle', 'rb') as f:
    train_data = pickle.load(f)

In [45]:
with open('../data/test_data.pickle', 'rb') as f:
    test_data = pickle.load(f)

In [46]:
def generate_sequence(df,N, window_size):
        
    X_sequences = [df.iloc[i:i+window_size].values for i in range(N - window_size)]
    Y_values = [df.iloc[i+window_size]['pollution'] for i in range(N - window_size)]

    return np.array(X_sequences).astype('float64'), np.array(Y_values).astype('float64').reshape(-1,1)

In [47]:
window_size = 10
N= len(train_data)
X_train_mv, y_train_mv = generate_sequence(train_data,N, window_size)
print(X_train_mv.shape, y_train_mv.shape)

M=len(test_data)
X_test_mv, y_test_mv = generate_sequence(test_data,M,window_size)
print(X_test_mv.shape,y_test_mv.shape)

(39410, 10, 8) (39410, 1)
(4370, 10, 8) (4370, 1)


In [48]:
train_data_uv = train_data['pollution']
test_data_uv = test_data['pollution']

In [49]:
def create_sequences(data, sequence_length):
    sequences = []
    labels = []
    for i in range(len(data) - sequence_length):
        sequences.append(data.iloc[i:i+sequence_length].values)  # Sequence of pollution values
        labels.append(data.iloc[i + sequence_length])  # Next time step's pollution value (target)
    return np.array(sequences), np.array(labels)

X_train_uv, y_train_uv = create_sequences(train_data_uv, 10)
X_test_uv, y_test_uv = create_sequences(test_data_uv, 10)

print(f"X_train shape: {X_train_uv.shape}, y_train shape: {y_train_uv.shape}")
print(f"X_test shape: {X_test_uv.shape}, y_test shape: {y_test_uv.shape}")

X_train shape: (39410, 10), y_train shape: (39410,)
X_test shape: (4370, 10), y_test shape: (4370,)


In [50]:
n_steps = X_train_mv.shape[1]
n_features = X_train_mv.shape[2]
n_outputs = y_train_mv.shape[1]



In [51]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout, Input
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.losses import MeanSquaredError
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from numpy import concatenate
from sklearn.metrics import mean_squared_error
from math import sqrt
from keras.layers import BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping
from keras.layers import Dense
from keras.layers import Flatten
from keras.layers import ConvLSTM2D
import time 

In [52]:
tf.random.set_seed(7)
MV_LSTM = Sequential()
MV_LSTM.add(Input(shape =(10, 8)))
MV_LSTM.add(LSTM(32,return_sequences=True))
MV_LSTM.add(Dropout(0.1)) #Prevent overfitting
MV_LSTM.add(LSTM(16, return_sequences=False))
MV_LSTM.add(Dense(n_outputs, activation ='linear')) 

MV_LSTM.compile(optimizer=Adam(learning_rate = 0.001), loss='mse', metrics = [RootMeanSquaredError()])

# Save the initial weights of the model right after it is defined and compiled
mv_weights = MV_LSTM.get_weights()

MV_LSTM.summary()

In [53]:
UV_LSTM = Sequential()
UV_LSTM.add(Input(shape=(10, 1)))  
UV_LSTM.add(LSTM(units=50, return_sequences=False)) #return_sequences = False used, common in time series prediction and non-stacked models
UV_LSTM.add(Dense(1))
UV_LSTM.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error', metrics=[RootMeanSquaredError()])

uv_weights = UV_LSTM.get_weights()

UV_LSTM.summary()

In [54]:
times = {"CPU": {}, "GPU": {}}

with tf.device('/CPU:0'):
    #Reset weights
    MV_LSTM.set_weights(mv_weights)
    start_time = time.time()
    MV_LSTM.fit(X_train_mv, y_train_mv, epochs=25, validation_split=0.1, batch_size=100,shuffle=False)
    times["CPU"]["Multivariate"] = time.time() - start_time

with tf.device('/GPU:0'):
    #Reset weights
    MV_LSTM.set_weights(mv_weights)
    start_time = time.time()
    MV_LSTM.fit(X_train_mv, y_train_mv, epochs=25, validation_split=0.1, batch_size=100,shuffle=False)
    times["GPU"]["Multivariate"] = time.time() - start_time

    
with tf.device('/CPU:0'):
    UV_LSTM.set_weights(uv_weights)
    start_time = time.time()
    UV_LSTM.fit(X_train_uv, y_train_uv, epochs=30,batch_size=128, validation_split=0.1,shuffle=False)
    times["CPU"]["Univariate"] = time.time() - start_time

with tf.device('/GPU:0'):
    UV_LSTM.set_weights(uv_weights)
    start_time = time.time()
    UV_LSTM.fit(X_train_uv, y_train_uv, epochs=30,batch_size=128, validation_split=0.1,shuffle=False)
    times["GPU"]["Univariate"] = time.time() - start_time

# Print timings for reference
print(f"Timings (in seconds):{times}")


Epoch 1/25
[1m355/355[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 9ms/step - loss: 0.0073 - root_mean_squared_error: 0.0843 - val_loss: 0.0377 - val_root_mean_squared_error: 0.1943
Epoch 2/25
[1m355/355[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - loss: 0.0036 - root_mean_squared_error: 0.0598 - val_loss: 0.0315 - val_root_mean_squared_error: 0.1776
Epoch 3/25
[1m355/355[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - loss: 0.0028 - root_mean_squared_error: 0.0530 - val_loss: 0.0257 - val_root_mean_squared_error: 0.1603
Epoch 4/25
[1m355/355[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 8ms/step - loss: 0.0022 - root_mean_squared_error: 0.0473 - val_loss: 0.0220 - val_root_mean_squared_error: 0.1485
Epoch 5/25
[1m355/355[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 10ms/step - loss: 0.0020 - root_mean_squared_error: 0.0441 - val_loss: 0.0197 - val_root_mean_squared_error: 0.1403
Epoch 6/25
[1m355/355[0m [32m━━━━━━