<a href="https://colab.research.google.com/github/basugautam/Reproducibility-Challenge-Project/blob/Architecture-Files/7_error_distribution_shaping.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# Import Necessary Libraries
import pandas as pd
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error, mean_squared_error

# 🔹 Step 1: Load Dataset
dataset_path = r"C:\LCTSF\Dataset\timeseries_dataset.csv"
df = pd.read_csv(dataset_path)

# 🔹 Step 2: Data Preprocessing
data = df['value'].values.reshape(-1, 1)  # Extract time-series values
scaler = MinMaxScaler(feature_range=(0, 1))  # Normalize data
data_scaled = scaler.fit_transform(data)

# 🔹 Step 3: Create Sequences for Forecasting
def generate_sequences(data, window_size):
    X, y = [], []
    for i in range(len(data) - window_size):
        X.append(data[i:i + window_size, 0])  # Input sequence
        y.append(data[i + window_size, 0])    # Target output
    return np.array(X), np.array(y)

sequence_length = 50  # Time steps in sequence
X, y = generate_sequences(data_scaled, sequence_length)

# Split into Training & Testing Sets
split_idx = int(0.8 * len(X))
X_train, X_test = X[:split_idx], X[split_idx:]
y_train, y_test = y[:split_idx], y[split_idx:]

# Reshape for LSTM input
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)

# 🔹 Step 4: Define the LSTM Model
def create_lstm_model():
    model = tf.keras.Sequential([
        tf.keras.layers.LSTM(64, activation='relu', return_sequences=True, input_shape=(sequence_length, 1)),
        tf.keras.layers.LSTM(32, activation='relu', return_sequences=False),
        tf.keras.layers.Dense(1)
    ])
    return model

# Baseline Model - Traditional LSTM
baseline_model = create_lstm_model()
baseline_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss='mse')
baseline_model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

# 🔹 Step 5: Define a Constrained LSTM Model for Error Shaping
class ErrorShapingLoss(tf.keras.losses.Loss):
    """
    Custom loss function that penalizes variations in error across different forecast steps.
    - Ensures consistent error distribution.
    - Reduces fluctuations in long-term predictions.
    """
    def __init__(self, lambda_weight=0.1):
        super(ErrorShapingLoss, self).__init__()
        self.lambda_weight = lambda_weight

    def call(self, y_true, y_pred):
        error = tf.abs(y_true - y_pred)
        standard_dev = tf.math.reduce_std(error)  # Calculate standard deviation of errors
        base_loss = tf.reduce_mean(tf.square(error))  # Mean Squared Error
        return base_loss + self.lambda_weight * standard_dev  # Penalizing error variations

# Constrained Model
constrained_model = create_lstm_model()
constrained_model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001), loss=ErrorShapingLoss())
constrained_model.fit(X_train, y_train, epochs=50, batch_size=32, validation_data=(X_test, y_test))

# 🔹 Step 6: Evaluate Error Distribution
y_pred_baseline = baseline_model.predict(X_test)
y_pred_constrained = constrained_model.predict(X_test)

# Rescale Predictions to Original Scale
y_test_rescaled = scaler.inverse_transform(y_test.reshape(-1, 1))
y_pred_baseline_rescaled = scaler.inverse_transform(y_pred_baseline)
y_pred_constrained_rescaled = scaler.inverse_transform(y_pred_constrained)

# Compute Error Distributions
error_baseline = np.abs(y_test_rescaled - y_pred_baseline_rescaled)
error_constrained = np.abs(y_test_rescaled - y_pred_constrained_rescaled)

std_baseline_


FileNotFoundError: [Errno 2] No such file or directory: 'C:\\LCTSF\\Dataset\\timeseries_dataset.csv'