To compare the performance of LSTM, GRU, RNN, and Transformer-based models, we'll use a publicly available dataset suitable for sequence prediction. The Air Quality UCI dataset is a good candidate, as it contains time series data on air quality measurements. The task will be to predict the air quality index (AQI) based on historical data.

Overview of the Plan
- Dataset Selection: Use the Air Quality UCI dataset.
- Data Preprocessing: Prepare the data for each model.
- Model Implementation: Implement RNN, GRU, LSTM, and Transformer models.
- Training and Inference: Measure training time, inference time, and evaluation metrics.
- Results Comparison: Compare the performance of each model.

#### Dataset Selection
We'll use the Air Quality UCI dataset, which can be downloaded from UCI Machine Learning Repository (https://archive.ics.uci.edu/dataset/360/air+quality). 

```bash
pip install pandas numpy seaborn matplotlib
```

#### Data Preprocessing
Here’s a quick script to preprocess the dataset and prepare it for modeling.

In [9]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
import zipfile
import requests
import os

# Define URL and download ZIP file
url = "https://archive.ics.uci.edu/static/public/360/air+quality.zip"
zip_file = "air_quality.zip"
csv_file = "AirQualityUCI.csv"

# Download the ZIP file
response = requests.get(url)
with open(zip_file, 'wb') as f:
    f.write(response.content)

# Extract the CSV file from the ZIP
with zipfile.ZipFile(zip_file, 'r') as zip_ref:
    zip_ref.extract(csv_file)

# Load dataset
df = pd.read_csv(csv_file, sep=';')  # Use ';' as separator due to the dataset format

# Data preprocessing
# Keep only the relevant columns
df = df.iloc[:, [0, 1, 2, 3, 4, 5, 6, 7]]  # Adjust column indices as necessary
df.columns = ['Date', 'Time', 'CO', 'C6H6', 'NOx', 'NO2', 'O3', 'T']

# Replace periods with colons in the Time column
df['Time'] = df['Time'].str.replace('.', ':', regex=False)

# Combine Date and Time and convert to datetime
df['Datetime'] = pd.to_datetime(df['Date'] + ' ' + df['Time'], dayfirst=True)

df = df.set_index('Datetime')

# Drop irrelevant columns
df = df[['CO', 'C6H6', 'NOx', 'NO2', 'O3', 'T']]

# Replace commas with periods for numeric conversion
df = df.replace(',', '.', regex=True)

# Convert columns to numeric
df = df.apply(pd.to_numeric, errors='coerce')

# Fill missing values (interpolation)
df = df.interpolate(method='linear')

# Normalize data
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(df.values)

# Prepare sequences for time series forecasting
def create_dataset(data, time_step=1):
    X, y = [], []
    for i in range(len(data) - time_step - 1):
        X.append(data[i:(i + time_step), :])
        y.append(data[i + time_step, 0])  # Predicting CO
    return np.array(X), np.array(y)

# Create datasets
time_step = 10
X, y = create_dataset(scaled_data, time_step)

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Print shapes of the datasets
print("Shapes of the datasets:")
print("X_train shape:", X_train.shape)
print("X_test shape:", X_test.shape)
print("y_train shape:", y_train.shape)
print("y_test shape:", y_test.shape)


Shapes of the datasets:
X_train shape: (7568, 10, 6)
X_test shape: (1892, 10, 6)
y_train shape: (7568,)
y_test shape: (1892,)


#### Implementing Models in TensorFlow and PyTorch
Next, I will provide implementations for RNN, GRU, LSTM, and Transformer using both TensorFlow and PyTorch.

#### TensorFlow Implementation
RNN, GRU, LSTM, and Transformer in TensorFlow

In [10]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, SimpleRNN, Dense, Dropout, Flatten, Input

# Function to create RNN model
def create_rnn_model(input_shape):
    model = Sequential()
    model.add(SimpleRNN(50, activation='relu', input_shape=input_shape))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    return model

# Function to create GRU model
def create_gru_model(input_shape):
    model = Sequential()
    model.add(GRU(50, activation='relu', input_shape=input_shape))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    return model

# Function to create LSTM model
def create_lstm_model(input_shape):
    model = Sequential()
    model.add(LSTM(50, activation='relu', input_shape=input_shape))
    model.add(Dense(1))
    model.compile(optimizer='adam', loss='mse')
    return model

# Function to create Transformer model
def create_transformer_model(input_shape):
    inputs = Input(shape=input_shape)
    x = Flatten()(inputs)
    x = Dense(128, activation='relu')(x)
    x = Dropout(0.1)(x)
    x = Dense(64, activation='relu')(x)
    x = Dropout(0.1)(x)
    x = Dense(32, activation='relu')(x)
    outputs = Dense(1)(x)
    model = tf.keras.Model(inputs, outputs)
    model.compile(optimizer='adam', loss='mse')
    return model

# Split data into training and testing sets
train_size = int(len(X) * 0.8)
X_train, X_test = X[:train_size], X[train_size:]
y_train, y_test = y[:train_size], y[train_size:]

# Prepare input shapes
input_shape = (X_train.shape[1], X_train.shape[2])

# Function to train and evaluate models
def train_and_evaluate(model, X_train, y_train, X_test, y_test):
    model.fit(X_train, y_train, epochs=50, batch_size=32, verbose=0)
    predictions = model.predict(X_test)
    mse = np.mean((predictions - y_test.reshape(-1, 1)) ** 2)
    return mse

# Train and evaluate models in TensorFlow
models_tf = {
    "RNN": create_rnn_model(input_shape),
    "GRU": create_gru_model(input_shape),
    "LSTM": create_lstm_model(input_shape),
    "Transformer": create_transformer_model((X_train.shape[1], X_train.shape[2])),
}

results_tf = {}
for model_name, model in models_tf.items():
    mse = train_and_evaluate(model, X_train, y_train, X_test, y_test)
    results_tf[model_name] = mse

print("TensorFlow Model Results (MSE):")
print(results_tf)


ModuleNotFoundError: No module named 'tensorflow'

#### PyTorch Implementation
RNN, GRU, LSTM, and Transformer in PyTorch

In [14]:
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import time
from sklearn.metrics import mean_squared_error

# Define the RNN Model
class RNNModel(nn.Module):
    def __init__(self, input_size):
        super(RNNModel, self).__init__()
        self.rnn = nn.RNN(input_size, 50, batch_first=True)
        self.fc = nn.Linear(50, 1)

    def forward(self, x):
        out, _ = self.rnn(x)
        out = self.fc(out[:, -1, :])  # Use the last output
        return out

# Define the GRU Model
class GRUModel(nn.Module):
    def __init__(self, input_size):
        super(GRUModel, self).__init__()
        self.gru = nn.GRU(input_size, 50, batch_first=True)
        self.fc = nn.Linear(50, 1)

    def forward(self, x):
        out, _ = self.gru(x)
        out = self.fc(out[:, -1, :])
        return out

# Define the LSTM Model
class LSTMModel(nn.Module):
    def __init__(self, input_size):
        super(LSTMModel, self).__init__()
        self.lstm = nn.LSTM(input_size, 50, batch_first=True)
        self.fc = nn.Linear(50, 1)

    def forward(self, x):
        out, _ = self.lstm(x)
        out = self.fc(out[:, -1, :])
        return out

# Define a simple Transformer Model
class TransformerModel(nn.Module):
    def __init__(self, input_size):
        super(TransformerModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 128)
        self.fc2 = nn.Linear(128, 64)
        self.fc3 = nn.Linear(64, 32)
        self.fc_out = nn.Linear(32, 1)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = torch.dropout(x, p=0.1, train=self.training)
        x = torch.relu(self.fc2(x))
        x = torch.dropout(x, p=0.1, train=self.training)
        x = torch.relu(self.fc3(x))
        return self.fc_out(x)

# Function to train and evaluate models in PyTorch
def train_and_evaluate_pytorch(model, X_train, y_train, X_test, y_test):
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Convert numpy arrays to PyTorch tensors
    X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
    y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(-1, 1)
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32)

    # Training
    start_time = time.time()
    model.train()
    for epoch in range(50):
        optimizer.zero_grad()
        outputs = model(X_train_tensor)
        loss = criterion(outputs, y_train_tensor)
        loss.backward()
        optimizer.step()
    training_time = time.time() - start_time

    # Evaluation
    model.eval()
    with torch.no_grad():
        start_inference = time.time()
        predictions = model(X_test_tensor)
        inference_time = time.time() - start_inference

    # Calculate RMSE
    mse = criterion(predictions, torch.tensor(y_test, dtype=torch.float32).view(-1, 1)).item()
    rmse = mse ** 0.5

    return training_time, inference_time, rmse

# Prepare input size for models
input_size = X_train.shape[2]

# Train and evaluate models in PyTorch
models_pt = {
    "RNN": RNNModel(input_size),
    "GRU": GRUModel(input_size),
    "LSTM": LSTMModel(input_size),
    "Transformer": TransformerModel(input_size * time_step),  # Flatten for transformer
}

results_pt = []
for model_name, model in models_pt.items():
    training_time, inference_time, rmse = train_and_evaluate_pytorch(model, X_train, y_train, X_test, y_test)
    results_pt.append({
        'Model': model_name,
        'Training Time (s)': training_time,
        'Average Inference Time (s)': inference_time,
        'RMSE': rmse
    })

# Convert results to DataFrame
results_df = pd.DataFrame(results_pt)

# Display results
print("PyTorch Model Results:")
print(results_df)


PyTorch Model Results:
         Model  Training Time (s)  Average Inference Time (s)      RMSE
0          RNN          19.172516                    0.069543  0.242882
1          GRU         138.019992                    0.853577  0.261966
2         LSTM         150.027126                    0.317735  0.258679
3  Transformer           9.036658                    0.054682  0.185528
