In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.preprocessing.sequence import TimeseriesGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, GRU, SimpleRNN, Dense, Bidirectional

In [None]:
# Step 1: Generate Synthetic Time Series Data
# I'll start by generating a synthetic time series dataset with a mix of trend, seasonality, and noise.

In [None]:
def generate_synthetic_data(n_points=1000):
    # Time variable (index for each point in series)
    time = np.arange(0, n_points)

    # Trend component (a small increase over time)
    trend = 0.01 * time

    # Seasonal component (a sine wave pattern)
    seasonal = 0.5 * np.sin(0.1 * time)

    # Noise component (random fluctuations)
    noise = 0.2 * np.random.randn(n_points)

    # Combined series
    series = trend + seasonal + noise
    return series

In [None]:
# Generate initial time series data with 1000 points
n_points = 1000
series = generate_synthetic_data(n_points)

In [None]:
# Plot the generated time series data
plt.plot(series)
plt.title("Synthetic Time Series")
plt.show()

In [None]:
# Step 2: Normalize and Prepare Data for Modeling
# I'll normalize the data and prepare it for time series modeling by creating sequences with a specified window size.

In [None]:
# Normalize the series
scaler = MinMaxScaler()
series_scaled = scaler.fit_transform(series.reshape(-1, 1)).flatten()

In [None]:
# Set sequence length (window size) and batch size
sequence_length = 50
batch_size = 32

In [None]:
# Split the scaled series into training and test sets based on indices
split_index = int(len(series_scaled) * 0.8)
train_series = series_scaled[:split_index]
test_series = series_scaled[split_index:]

In [None]:
# Generate sequences for training and testing sets
train_generator = TimeseriesGenerator(train_series, train_series, length=sequence_length, batch_size=batch_size)
test_generator = TimeseriesGenerator(test_series, test_series, length=sequence_length, batch_size=batch_size)

In [None]:
# Step 3: Define Different RNN Models
# Now, I'll define four different types of RNN models to compare: LSTM, GRU, BiDirectional RNN, and a simple Deep RNN.

In [None]:
# Define an LSTM model
def create_lstm_model():
    model = Sequential([
        LSTM(50, activation='relu', input_shape=(sequence_length, 1)),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

In [None]:
# Define a GRU model
def create_gru_model():
    model = Sequential([
        GRU(50, activation='relu', input_shape=(sequence_length, 1)),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

In [None]:
# Define a Bidirectional LSTM model
def create_bidirectional_model():
    model = Sequential([
        Bidirectional(LSTM(50, activation='relu'), input_shape=(sequence_length, 1)),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

In [None]:
# Define a simple Deep RNN model with two layers
def create_deep_rnn_model():
    model = Sequential([
        SimpleRNN(50, activation='relu', return_sequences=True, input_shape=(sequence_length, 1)),
        SimpleRNN(50, activation='relu'),
        Dense(1)
    ])
    model.compile(optimizer='adam', loss='mse')
    return model

In [None]:
# Step 4: Train and Evaluate Each Model on Initial Dataset
# I'll now train each model on the initial dataset and record the test loss.

In [None]:
# Function to train and evaluate a model
def train_and_evaluate_model(model, train_gen, test_gen, epochs=10):
    # Train model with specified epochs
    model.fit(train_gen, epochs=epochs, validation_data=test_gen)

    # Evaluate on test data and return the loss
    loss = model.evaluate(test_gen)
    return loss

In [None]:
# Initialize models in a dictionary for easy reference
models = {
    "LSTM": create_lstm_model(),
    "GRU": create_gru_model(),
    "BiDirectional RNN": create_bidirectional_model(),
    "Deep RNN": create_deep_rnn_model()
}

In [None]:
# Train each model and record the performance on the initial dataset
results_initial = {}
for name, model in models.items():
    print(f"Training {name} model on initial dataset")
    loss = train_and_evaluate_model(model, train_generator, test_generator)
    results_initial[name] = loss

In [None]:
# Step 5: Increase Dataset Size by 3x and Repeat Experiments
# I’ll create a new dataset that’s three times larger, repeating the data generation process with more points.

In [None]:
# Generate a larger dataset with 3000 points
series_large = generate_synthetic_data(n_points * 3)
series_large_scaled = scaler.fit_transform(series_large.reshape(-1, 1)).flatten()

In [None]:
# Split the larger dataset into training and test sets
split_index_large = int(len(series_large_scaled) * 0.8)
train_series_large = series_large_scaled[:split_index_large]
test_series_large = series_large_scaled[split_index_large:]

In [None]:
# Generate sequences for the larger dataset
train_generator_large = TimeseriesGenerator(train_series_large, train_series_large, length=sequence_length, batch_size=batch_size)
test_generator_large = TimeseriesGenerator(test_series_large, test_series_large, length=sequence_length, batch_size=batch_size)

In [None]:
# Train each model on the larger dataset and record performance
results_large = {}
for name, model in models.items():
    print(f"Training {name} model on larger dataset")
    loss = train_and_evaluate_model(model, train_generator_large, test_generator_large)
    results_large[name] = loss

In [None]:
# Step 6: Compare Results
# Finally, I'll print the results to compare how each model performed on the smaller and larger datasets.

In [None]:
print("Results on Initial Dataset (1000 points):")
for name, loss in results_initial.items():
    print(f"{name}: Loss = {loss:.4f}")

In [None]:
print("\nResults on Larger Dataset (3000 points):")
for name, loss in results_large.items():
    print(f"{name}: Loss = {loss:.4f}")