<a href="https://colab.research.google.com/github/basugautam/Reproducibility-Challenge-Project/blob/main/2_Constrained_Learning_Framework.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Load the dataset from the given path
dataset_path = r"C:\LCTSF\Dataset\timeseries_dataset.csv"  # Adjust the file path as needed
df = pd.read_csv(dataset_path)

# Check the first few rows of the dataset
df.head()

# Let's assume the dataset has a single time series column 'value' for forecasting
data = df['value'].values

# Scale the data using MinMaxScaler for LSTM/GRU input (recommended for neural networks)
scaler = MinMaxScaler(feature_range=(0, 1))
data_scaled = scaler.fit_transform(data.reshape(-1, 1))

# Create a function to generate input-output sequences for time series forecasting
def create_sequences(data, seq_length):
    X = []
    y = []
    for i in range(len(data) - seq_length):
        X.append(data[i:i+seq_length, 0])
        y.append(data[i+seq_length, 0])
    return np.array(X), np.array(y)

# Set the sequence length for the time steps
seq_length = 50  # Number of time steps to predict the next step

# Create sequences for training data
X, y = create_sequences(data_scaled, seq_length)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)

# Reshape the data to match the LSTM/GRU input format
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# Build the LSTM-based model
model = Sequential()
model.add(LSTM(units=64, return_sequences=False, input_shape=(X_train.shape[1], 1)))
model.add(Dropout(0.2))
model.add(Dense(units=1))

# Compile the model
model.compile(optimizer=Adam(learning_rate=0.001), loss='mean_squared_error', metrics=['mae'])

# Train the model
history = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

# Plot the loss curves for training and validation
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss During Training')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Make predictions on the test set
y_pred = model.predict(X_test)

# Invert the scaling for the predictions and actual values
y_pred = scaler.inverse_transform(y_pred)
y_test = scaler.inverse_transform(y_test.reshape(-1, 1))

# Plot the actual vs predicted values
plt.figure(figsize=(10,6))
plt.plot(y_test, label='Actual Values')
plt.plot(y_pred, label='Predicted Values')
plt.title('Time Series Forecasting')
plt.xlabel('Time Steps')
plt.ylabel('Value')
plt.legend()
plt.show()

# Evaluate the model's performance
from sklearn.metrics import mean_absolute_error, mean_squared_error
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print(f'Mean Absolute Error: {mae}')
print(f'Mean Squared Error: {mse}')

# Now, implement a custom loss function to impose upper bounds on the error at each time step
def bounded_loss(y_true, y_pred, upper_bound=0.1):
    """
    Custom loss function that imposes an upper bound on forecast errors.
    Errors above the bound are penalized more heavily.
    """
    # Calculate the error at each time step
    error = tf.abs(y_true - y_pred)

    # Apply a penalty for errors above the upper bound
    penalty = tf.maximum(0.0, error - upper_bound)

    # Return the mean of the loss with penalty
    return tf.reduce_mean(error + penalty)

# Recompile the model with the custom bounded loss
model.compile(optimizer=Adam(learning_rate=0.001), loss=bounded_loss, metrics=['mae'])

# Retrain the model with the custom loss function to enforce error constraints
history_bounded = model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

# Plot the loss curves for training and validation with bounded loss
plt.plot(history_bounded.history['loss'], label='Training Loss (Bounded)')
plt.plot(history_bounded.history['val_loss'], label='Validation Loss (Bounded)')
plt.title('Model Loss with Bounded Error')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Make predictions again using the retrained model
y_pred_bounded = model.predict(X_test)

# Invert the scaling for the predictions and actual values
y_pred_bounded = scaler.inverse_transform(y_pred_bounded)
y_test_bounded = scaler.inverse_transform(y_test.reshape(-1, 1))

# Plot the actual vs predicted values with bounded error
plt.figure(figsize=(10,6))
plt.plot(y_test_bounded, label='Actual Values')
plt.plot(y_pred_bounded, label='Predicted Values (Bounded)')
plt.title('Time Series Forecasting with Error Constraints')
plt.xlabel('Time Steps')
plt.ylabel('Value')
plt.legend()
plt.show()

# Evaluate the model's performance with bounded loss
mae_bounded = mean_absolute_error(y_test_bounded, y_pred_bounded)
mse_bounded = mean_squared_error(y_test_bounded, y_pred_bounded)

print(f'Mean Absolute Error (Bounded): {mae_bounded}')
print(f'Mean Squared Error (Bounded): {mse_bounded}')


FileNotFoundError: [Errno 2] No such file or directory: 'C:\\LCTSF\\Dataset\\timeseries_dataset.csv'