In [None]:
import numpy as np
import pandas as pd

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau

In [None]:
SEQUENCE_LENGTH = 24 * 2
STEP_SIZE = 24
FORECAST_HORIZON = 1
BATCH_SIZE = 32
NUM_EPOCHS = 10
SELECT_MODEL = 0

In [None]:
# Load preprocessed feature data
X_train_feature = np.load(f"./data/finalized/X_train_feature_{SELECT_MODEL}.npy")
Y_train_feature = np.load(f"./data/finalized/Y_train_feature_{SELECT_MODEL}.npy")
X_val_feature = np.load(f"./data/finalized/X_val_feature_{SELECT_MODEL}.npy")
Y_val_feature = np.load(f"./data/finalized/Y_val_feature_{SELECT_MODEL}.npy")
X_test_feature = np.load(f"./data/finalized/X_test_feature_{SELECT_MODEL}.npy")
Y_test_feature = np.load(f"./data/finalized/Y_test_feature_{SELECT_MODEL}.npy")

In [None]:
len(Y_test_feature)

In [None]:
def create_cnn_model(seq_length, forecast_horizon):
    """
    Simple 1D CNN for time-series regression built with TensorFlow/Keras.
    Architecture: Conv1D -> MaxPool -> Dense -> Output
    """
    model = models.Sequential([
        layers.Input(shape=(seq_length, 1)),
        layers.Conv1D(filters=64, kernel_size=5, activation='relu', padding='valid'),
        layers.Dropout(0.25),
        layers.MaxPooling1D(pool_size=2),
        layers.Flatten(),
        layers.Dense(50, activation='relu'),
        layers.Dropout(0.25),
        layers.Dense(forecast_horizon)
    ])
    
    return model

In [None]:
def evaluate_model(model, X_test, Y_test):
    """Run predictions"""
    y_pred = model.predict(X_test, verbose=0)
    y_true = Y_test
    
    return y_true, y_pred

def print_metrics(y_true, y_pred, model_name="Model"):
    """Print regression metrics including variance analysis"""
    # Calculate metrics
    mse = mean_squared_error(y_true.flatten(), y_pred.flatten())
    mae = mean_absolute_error(y_true.flatten(), y_pred.flatten())
    rmse = np.sqrt(mse)
    r2 = r2_score(y_true.flatten(), y_pred.flatten())
    
    # Variance metrics
    true_std = np.std(y_true.flatten())
    pred_std = np.std(y_pred.flatten())
    variance_ratio = pred_std / true_std if true_std > 0 else 0
    
    # Range metrics
    true_range = np.max(y_true.flatten()) - np.min(y_true.flatten())
    pred_range = np.max(y_pred.flatten()) - np.min(y_pred.flatten())
    range_ratio = pred_range / true_range if true_range > 0 else 0
    
    print(f"\n{model_name} Regression Metrics:")
    print(f"Mean Squared Error (MSE): {mse:.4f}")
    print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
    print(f"Mean Absolute Error (MAE): {mae:.4f}")
    print(f"R² Score: {r2:.4f}")
    print(f"\nVariance Analysis:")
    print(f"Actual Std Dev: {true_std:.4f}")
    print(f"Predicted Std Dev: {pred_std:.4f}")
    print(f"Variance Ratio (pred/actual): {variance_ratio:.4f}")
    print(f"\nRange Analysis:")
    print(f"Actual Range: {true_range:.4f}")
    print(f"Predicted Range: {pred_range:.4f}")
    print(f"Range Ratio (pred/actual): {range_ratio:.4f}")
    
    return y_true, y_pred, mse, mae, rmse, r2

def evaluate_and_print_metrics(model, X_test, Y_test, model_name="Model"):
    y_true, y_pred = evaluate_model(model, X_test, Y_test)
    return print_metrics(y_true, y_pred, model_name)

In [None]:
def mse_with_variance_loss(alpha=0.15):
    """
    Custom loss function that combines prediction accuracy with variance matching.
    This helps prevent the model from dampening predictions (predicting too narrow a range).
    
    Args:
        alpha: Weight for variance matching penalty (0.1-0.2 recommended)
               Higher alpha = more emphasis on matching variance
    
    Returns:
        Loss function compatible with Keras model.compile()
    """
    @tf.keras.utils.register_keras_serializable(package='Custom', name=f'mse_variance_loss_alpha_{alpha}')
    def loss(y_true, y_pred):
        # Accuracy component: Standard MSE for prediction accuracy
        mse = tf.reduce_mean(tf.square(y_true - y_pred))
        
        # Variance matching component: Penalize if predicted std differs from actual std
        true_std = tf.math.reduce_std(y_true)
        pred_std = tf.math.reduce_std(y_pred)
        var_penalty = tf.square(true_std - pred_std)
        
        # Combined loss
        return mse + alpha * var_penalty
    
    return loss

In [None]:
# Build the TensorFlow/Keras CNN model
model = create_cnn_model(X_train_feature.shape[1], FORECAST_HORIZON)

# Compile the model with custom loss function to prevent dampening
model.compile(
    optimizer='adam',
    loss=mse_with_variance_loss(alpha=0.15),  # Custom loss to match variance
    metrics=['mae']
)

model.summary()
print("\nModel Architecture:")
print("Using MSE + Variance Loss (alpha=0.15) to prevent prediction dampening")

In [None]:
# Define callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6)
callbacks = [early_stopping, reduce_lr]

# Train the model
history = model.fit(
    X_train_feature,
    Y_train_feature,
    validation_data=(X_val_feature, Y_val_feature),
    epochs=NUM_EPOCHS,
    batch_size=BATCH_SIZE,
    callbacks=callbacks,
    verbose=1
)

In [None]:
# Plot training and validation loss
import matplotlib.pyplot as plt

train_losses = history.history['loss']
val_losses = history.history['val_loss']

plt.figure(figsize=(10, 6))
plt.plot(range(1, len(train_losses) + 1), train_losses, label='Training Loss', marker='o')
plt.plot(range(1, len(val_losses) + 1), val_losses, label='Validation Loss', marker='s')
plt.xlabel('Epoch')
plt.ylabel('Loss (MSE)')
plt.title('Training vs Validation Loss')
plt.legend()
plt.grid(True, alpha=0.3)
plt.tight_layout()
plt.show()

# Check for overfitting
final_train_loss = train_losses[-1]
final_val_loss = val_losses[-1]
loss_diff = final_val_loss - final_train_loss
loss_ratio = final_val_loss / final_train_loss

print(f"\nFinal Training Loss: {final_train_loss:.4f}")
print(f"Final Validation Loss: {final_val_loss:.4f}")

print(f"Difference (Val - Train): {loss_diff:.4f}")
print(f"Ratio (Val / Train): {loss_ratio:.2f}")

In [None]:
# Evaluate the model
print("\n--- Evaluating Model Performance ---")
y_true, y_pred, mse, mae, rmse, r2 = evaluate_and_print_metrics(model, X_test_feature, Y_test_feature)

In [None]:
# Load scaler parameters
MEAN = 40.00946858359476
STD = 4.8101438894455

# Unscale the predictions and actual values
y_true_flat = np.array(y_true).reshape(-1)
y_pred_flat = np.array(y_pred).reshape(-1)

y_true_unscaled = (y_true_flat * STD) + MEAN
y_pred_unscaled = (y_pred_flat * STD) + MEAN

# Calculate unscaled MSE
unscaled_mse = mean_squared_error(y_true_unscaled, y_pred_unscaled)
unscaled_rmse = np.sqrt(unscaled_mse)
unscaled_mae = mean_absolute_error(y_true_unscaled, y_pred_unscaled)

print(f"\nUnscaled Metrics:")
print(f"MSE: {unscaled_mse:.4f} °C²")
print(f"RMSE: {unscaled_rmse:.4f} °C")
print(f"MAE: {unscaled_mae:.4f} °C")

plt.figure(figsize=(14, 6))

# Sample a subset for visualization if too many points
num_samples = min(10000, len(y_true_unscaled))
if num_samples == 0:
    print("No test samples available to plot.")
else:
    # Use the first 200 samples instead of random samples
    indices = np.arange(num_samples)

    plt.subplot(1, 2, 1)
    plt.plot(indices, y_true_unscaled[indices], 'b-', label='Actual', alpha=0.7)
    plt.plot(indices, y_pred_unscaled[indices], 'r--', label='Predicted', alpha=0.7)
    plt.xlabel('Sample Index')
    plt.ylabel('Temperature (°C)')
    plt.title(f'Predictions vs Actual (Unscaled)\n{SELECT_MODEL}')
    plt.legend()
    plt.grid(True, alpha=0.3)

    plt.subplot(1, 2, 2)
    plt.scatter(y_true_unscaled, y_pred_unscaled, alpha=0.5)
    mn = min(y_true_unscaled.min(), y_pred_unscaled.min())
    mx = max(y_true_unscaled.max(), y_pred_unscaled.max())
    plt.plot([mn, mx], [mn, mx], 'r--', lw=2)
    plt.xlabel('Actual Temperature (°C)')
    plt.ylabel('Predicted Temperature (°C)')
    plt.title('Prediction Scatter Plot')
    plt.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()
    
    print(f"\nUnscaled value ranges:")
    print(f"Actual: [{y_true_unscaled.min():.2f}°C, {y_true_unscaled.max():.2f}°C]")
    print(f"Predicted: [{y_pred_unscaled.min():.2f}°C, {y_pred_unscaled.max():.2f}°C]")

In [None]:
# Get the X values (input sequences) associated with the predictions
X_associated = X_test_feature[indices]

print(f"Shape of associated input sequences: {X_associated.shape}")
print(f"Number of samples: {len(X_associated)}")
print(f"Sequence length: {X_associated.shape[1]}")
print(f"\nFirst associated input sequence (sample {indices[0]}):")
print(X_associated[0].flatten())
print(f"\nCorresponding actual value: {y_true_unscaled[indices[0]]:.2f}°C")
print(f"Corresponding predicted value: {y_pred_unscaled[indices[0]]:.2f}°C")

# Optionally visualize a few input sequences
fig, axes = plt.subplots(2, 3, figsize=(15, 8))
axes = axes.flatten()

for i in range(min(6, len(X_associated))):
    ax = axes[i]
    sequence = X_associated[i].flatten()
    
    # Unscale the input sequence
    sequence_unscaled = (sequence * STD) + MEAN
    
    ax.plot(sequence_unscaled, 'b-', linewidth=1.5)
    ax.axhline(y=y_true_unscaled[indices[i]], color='g', linestyle='--', label=f'Actual: {y_true_unscaled[indices[i]]:.1f}°C')
    ax.axhline(y=y_pred_unscaled[indices[i]], color='r', linestyle='--', label=f'Pred: {y_pred_unscaled[indices[i]]:.1f}°C')
    ax.set_title(f'Sample {indices[i]} - Input Sequence')
    ax.set_xlabel('Time Step')
    ax.set_ylabel('Temperature (°C)')
    ax.legend(fontsize=8)
    ax.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

print(f"\nNote: Each input sequence contains {SEQUENCE_LENGTH} time steps")
print(f"The model uses these {SEQUENCE_LENGTH} historical values to predict the next value")

# Pruning the Model

In [None]:
import tf2onnx
from onnxruntime.quantization import quantize_dynamic, quantize_static, CalibrationDataReader, QuantType
import onnx
import tempfile
import os

In [None]:
def magnitude_prune_model(model, target_sparsity=0.5):
    """
    Manual magnitude-based pruning for TensorFlow/Keras models.
    Sets the smallest magnitude weights to zero based on target sparsity.
    
    Args:
        model: Trained Keras model to prune
        target_sparsity: Fraction of weights to prune (0.5 = 50%)
    
    Returns:
        Pruned model with sparse weights
    """
    # Create a new model with the same architecture but without cloning
    # This avoids the serialization issue with custom loss functions
    pruned_model = create_cnn_model(model.input_shape[1], model.output_shape[1])
    pruned_model.set_weights(model.get_weights())
    
    total_params = 0
    pruned_params = 0
    
    for layer in pruned_model.layers:
        # Only prune layers with trainable weights (Conv, Dense)
        if hasattr(layer, 'kernel'):
            weights = layer.get_weights()
            
            if len(weights) > 0:
                # Get kernel weights
                kernel = weights[0]
                
                # Flatten for easier threshold calculation
                flat_kernel = kernel.flatten()
                total_params += flat_kernel.size
                
                # Calculate magnitude threshold for this layer
                threshold = np.percentile(np.abs(flat_kernel), target_sparsity * 100)
                
                # Create binary mask (1 = keep, 0 = prune)
                mask = (np.abs(kernel) >= threshold).astype(np.float32)
                
                # Apply mask to weights
                pruned_kernel = kernel * mask
                pruned_params += np.sum(mask == 0)
                
                # Update layer weights
                weights[0] = pruned_kernel
                layer.set_weights(weights)
                
                # Print layer stats
                layer_sparsity = (np.sum(mask == 0) / mask.size) * 100
                print(f"Layer '{layer.name}': {layer_sparsity:.1f}% sparse ({np.sum(mask == 0)}/{mask.size} zeros)")
    
    overall_sparsity = (pruned_params / total_params) * 100
    print(f"\nOverall Sparsity Achieved: {overall_sparsity:.2f}%")
    print(f"  Total parameters: {total_params:,}")
    print(f"  Pruned (zero) parameters: {pruned_params:,}")
    print(f"  Active parameters: {total_params - pruned_params:,}")
    
    return pruned_model

# Apply magnitude-based pruning to your trained model
pruned_model = magnitude_prune_model(model, target_sparsity=0.5)

# Compile the pruned model with the same custom loss
pruned_model.compile(
    optimizer='adam',
    loss=mse_with_variance_loss(alpha=0.15),
    metrics=['mae']
)

In [None]:
# Fine-tune the pruned model to recover accuracy
print("\n=== Fine-tuning Pruned Model ===")
print("Training for 5 epochs to recover from pruning...\n")

# Use lower learning rate for fine-tuning
pruned_model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),  # Lower LR for fine-tuning
    loss=mse_with_variance_loss(alpha=0.15),  # Keep the custom loss
    metrics=['mae']
)

# Define callbacks
callbacks_pruned = [
    tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True),
    tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=1e-7)
]

# Fine-tune
history_pruned = pruned_model.fit(
    X_train_feature,
    Y_train_feature,
    validation_data=(X_val_feature, Y_val_feature),
    epochs=5,  # Just a few epochs to recover
    batch_size=BATCH_SIZE,
    callbacks=callbacks_pruned,
    verbose=1
)

In [None]:
def check_model_sparsity(model):
    """Calculate actual sparsity of the model after pruning"""
    total_weights = 0
    zero_weights = 0
    
    for layer in model.layers:
        if hasattr(layer, 'kernel'):
            weights = layer.get_weights()[0]
            total_weights += weights.size
            zero_weights += np.sum(weights == 0)
    
    sparsity_pct = (zero_weights / total_weights) * 100
    return sparsity_pct, total_weights, zero_weights

# Evaluate the pruned model
print("\n=== Evaluating Pruned Model Performance ===")
y_true_pruned, y_pred_pruned, mse_pruned, mae_pruned, rmse_pruned, r2_pruned = evaluate_and_print_metrics(
    pruned_model, X_test_feature, Y_test_feature, "Pruned Model"
)

# Verify sparsity
sparsity, total, zeros = check_model_sparsity(pruned_model)
print(f"\nVerified Sparsity: {sparsity:.2f}% ({zeros:,} / {total:,} weights are zero)")
final_pruned_model = pruned_model

In [None]:
# Save the pruned model temporarily for quantization
# Note: Save weights only to avoid custom loss serialization issues
_, temp_model_path = tempfile.mkstemp(suffix='.h5', dir='./model')

# Save with custom objects to handle the custom loss function
final_pruned_model.save(temp_model_path, save_format='h5', include_optimizer=False)
print(f"Pruned model saved temporarily at: {temp_model_path}")

# Dynamic Quantization

In [None]:
### Tensorflow Lite Quantization
# Create a representative dataset generator for quantization
def representative_dataset_gen():
    """
    Generator function to provide representative samples for quantization.
    Uses a subset of training data.
    """
    num_calibration_samples = min(2000, len(X_train_feature))
    for i in range(num_calibration_samples):
        # Yield a single sample with correct shape for the model
        yield [X_train_feature[i:i+1].astype(np.float32)]

print("\nRepresentative dataset generator created for quantization.")

# Convert the pruned model to TensorFlow Lite with 8-bit integer quantization
converter = tf.lite.TFLiteConverter.from_keras_model(final_pruned_model)

# Set optimization flags for full integer quantization
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset_gen

print("\nTrying with dynamic range quantization instead...")
# Fallback to dynamic range quantization
converter = tf.lite.TFLiteConverter.from_keras_model(final_pruned_model)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset_gen
quantized_tflite_model = converter.convert()
print("Dynamic range quantization successful!")

print(f"\n--- Saving Prune + Quantized Model ---")

# Save tensorflow lite model
quantized_model_path = './model/pruned_quantized_model.tflite'
with open(quantized_model_path, 'wb') as f:
    f.write(quantized_tflite_model)

print(f"Location: {quantized_model_path}")
print(f"Size: {len(quantized_tflite_model) / 1024:.2f} KB")

In [None]:
### Onnx Model Quantization
# Temporarily save the pruned model as ONNX
temp_model_path = "./model/pruned_model.onnx"
spec = (tf.TensorSpec(final_pruned_model.inputs[0].shape, tf.float32, name="input"),)
final_pruned_model.output_names = ["output"]
model_proto, _ = tf2onnx.convert.from_keras(final_pruned_model, input_signature=spec, opset=13)

with open (temp_model_path, "wb") as f:
    f.write(model_proto.SerializeToString())

# Turn the ONNX model into a quantized version using dynamic quantization
print("\n--- Quantizing ONNX Model ---")
quantized_onnx_path = "./model/pruned_quantized_model.onnx"
quantize_dynamic(
    model_input=temp_model_path,
    model_output=quantized_onnx_path,
    weight_type=QuantType.QUInt8  # or QInt8
)

print(f"\nDynamic quantization complete!")
print(f"Location: {quantized_onnx_path}")
print(f"Size: {os.path.getsize(quantized_onnx_path) / 1024:.2f} KB")

# Compare sizes
original_onnx_size = os.path.getsize(temp_model_path)
quantized_onnx_size = os.path.getsize(quantized_onnx_path)
compression_ratio = original_onnx_size / quantized_onnx_size

print(f"\nONNX Model Compression:")
print(f"  Original: {original_onnx_size / 1024:.2f} KB")
print(f"  Quantized: {quantized_onnx_size / 1024:.2f} KB")
print(f"  Compression: {compression_ratio:.2f}x smaller ({(1 - quantized_onnx_size/original_onnx_size)*100:.1f}% reduction)")

In [None]:
# Evaluate the quantized TFLite model
print("\n--- Evaluating Quantized Model ---")

# Load the TFLite model
interpreter = tf.lite.Interpreter(model_path=quantized_model_path)
interpreter.allocate_tensors()

# Get input and output details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

print(f"Input details: {input_details[0]['shape']}, dtype: {input_details[0]['dtype']}")
print(f"Output details: {output_details[0]['shape']}, dtype: {output_details[0]['dtype']}")

# Make predictions on test set
quantized_predictions = []

for i in range(len(X_test_feature)):
    # Prepare input
    input_data = X_test_feature[i:i+1].astype(input_details[0]['dtype'])
    
    # If input is int8, we need to quantize
    if input_details[0]['dtype'] == np.int8:
        input_scale, input_zero_point = input_details[0]['quantization']
        input_data = (input_data / input_scale + input_zero_point).astype(np.int8)
    
    interpreter.set_tensor(input_details[0]['index'], input_data)
    interpreter.invoke()
    
    # Get output
    output_data = interpreter.get_tensor(output_details[0]['index'])
    
    # If output is int8, we need to dequantize
    if output_details[0]['dtype'] == np.int8:
        output_scale, output_zero_point = output_details[0]['quantization']
        output_data = (output_data.astype(np.float32) - output_zero_point) * output_scale
    
    quantized_predictions.append(output_data[0])

quantized_predictions = np.array(quantized_predictions)

# Calculate metrics
y_true_flat_quant = Y_test_feature.flatten()
y_pred_flat_quant = quantized_predictions.flatten()

y_true, y_pred, mse_quantized, mae_quantized, rmse_quantized, r2_quantized = print_metrics(y_true_flat_quant, y_pred_flat_quant, "Quantized Model")

In [None]:
# Compare model sizes
import os

# Get original model size
_, original_model_path = tempfile.mkstemp(suffix='.h5', dir="./model")
print(original_model_path)

# Save without optimizer to avoid custom loss serialization issues
model.save(original_model_path, save_format='h5', include_optimizer=False)
original_size = os.path.getsize(original_model_path)

# Get pruned model size (already saved earlier)
pruned_size = os.path.getsize(temp_model_path)

# Get quantized model size
quantized_size = len(quantized_tflite_model)

print("\n=== Model Size Comparison ===")
print(f"Original Model: {original_size / 1024:.2f} KB")
print(f"Pruned Model: {pruned_size / 1024:.2f} KB ({(1 - pruned_size/original_size)*100:.1f}% reduction)")
print(f"Pruned + Quantized Model: {quantized_size / 1024:.2f} KB ({(1 - quantized_size/original_size)*100:.1f}% reduction)")
print(f"\nTotal compression: {original_size / quantized_size:.2f}x smaller")

print("\n=== Final Model Comparison ===")
print(f"{'Model':<25} {'MSE':<10} {'MAE':<10} {'R²':<10}")
print(f"{'-'*55}")
print(f"{'Original':<25} {mse:<10.4f} {mae:<10.4f} {r2:<10.4f}")
print(f"{'Pruned':<25} {mse_pruned:<10.4f} {mae_pruned:<10.4f} {r2_pruned:<10.4f}")
print(f"{'Pruned + Quantized':<25} {mse_quantized:<10.4f} {mae_quantized:<10.4f} {r2_quantized:<10.4f}")

# Cleanup temporary files
os.remove(original_model_path)
os.remove(temp_model_path)

In [None]:
# Visualize predictions comparison
plt.figure(figsize=(16, 5))

num_samples = min(150, len(y_true_flat))
if num_samples > 0:
    indices = np.random.choice(len(y_true_flat), num_samples, replace=False)
    indices = np.sort(indices)

    # Plot 1: Time series comparison
    plt.subplot(1, 3, 1)
    plt.plot(indices, y_true_flat[indices], 'b-', label='Actual', alpha=0.7, linewidth=2)
    plt.plot(indices, y_pred_flat[indices], 'g--', label='Original', alpha=0.7)
    plt.plot(indices, y_pred_pruned.flatten()[indices], 'r--', label='Pruned', alpha=0.7)
    plt.plot(indices, y_pred_flat_quant[indices], 'm--', label='Quantized', alpha=0.7)
    plt.xlabel('Sample Index')
    plt.ylabel('Sensor Value (Scaled)')
    plt.title('Predictions Comparison')
    plt.legend()
    plt.grid(True, alpha=0.3)

    # Plot 2: Original vs Quantized scatter
    plt.subplot(1, 3, 2)
    plt.scatter(y_true_flat, y_pred_flat, alpha=0.3, label='Original')
    plt.scatter(y_true_flat, y_pred_flat_quant, alpha=0.3, label='Quantized')
    mn = min(y_true_flat.min(), y_pred_flat.min(), y_pred_flat_quant.min())
    mx = max(y_true_flat.max(), y_pred_flat.max(), y_pred_flat_quant.max())
    plt.plot([mn, mx], [mn, mx], 'r--', lw=2)
    plt.xlabel('Actual Values')
    plt.ylabel('Predicted Values')
    plt.title('Scatter: Original vs Quantized')
    plt.legend()
    plt.grid(True, alpha=0.3)

    # Plot 3: Error distribution
    plt.subplot(1, 3, 3)
    error_original = np.abs(y_true_flat - y_pred_flat)
    error_pruned = np.abs(y_true_flat - y_pred_pruned.flatten())
    error_quantized = np.abs(y_true_flat - y_pred_flat_quant)
    
    plt.hist(error_original, bins=30, alpha=0.5, label='Original', color='green')
    plt.hist(error_pruned, bins=30, alpha=0.5, label='Pruned', color='red')
    plt.hist(error_quantized, bins=30, alpha=0.5, label='Quantized', color='magenta')
    plt.xlabel('Absolute Error')
    plt.ylabel('Frequency')
    plt.title('Error Distribution')
    plt.legend()
    plt.grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()
else:
    print("No test samples available to plot.")