### Mixture Density Network

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import matplotlib.pyplot as plt

# --- 1. Parameters & Configuration ---

In [2]:
# Load the training data
# !!! UPDATE THIS FILENAME !!!
try:
    train_data = np.load('../data1/noisy_data.npz')
    x_train = train_data['x']
    y_train = train_data['y']
except FileNotFoundError:
    print("Error: 'training_data.npz' not found.")
    print("Please create a dummy file or update the name to your training file.")
    # Create dummy data to allow the script to run for demonstration
    x_train = np.linspace(-np.pi, np.pi, 800)
    # True function (green line)
    y_true_func = 0.5 * np.sin(x_train * 1.5) + 0.1 * x_train
    # Add small noise
    y_train = y_true_func + np.random.normal(0, 0.1, x_train.shape)
    # Add outlier clusters
    outlier_indices = np.random.choice(800, 100, replace=False)
    y_train[outlier_indices] = y_train[outlier_indices] + np.random.uniform(-1.5, 1.5, 100)
    print("Using dummy data for demonstration.")

# Get the total number of training samples
N_SAMPLES = x_train.shape[0]

# Set parameters based on the array size
# We choose a power of 2 for batch size. 64 is efficient for 800 samples.
BATCH_SIZE = 64
LEARNING_RATE = 1e-3  # 0.001 is a good default for Adam
NUM_EPOCHS = 1000     # Number of passes through the entire dataset

print(f"Loaded {N_SAMPLES} training samples.")


Loaded 800 training samples.


## --- 2. Data Preparation ---

In [3]:
# Reshape data: PyTorch models expect input shape (batch_size, num_features)
# Our num_features is 1 (just the x-coordinate).
# Original shape is likely (800,), we need (800, 1).
if x_train.ndim == 1:
    x_train = x_train.reshape(-1, 1)
if y_train.ndim == 1:
    y_train = y_train.reshape(-1, 1)

# Convert NumPy arrays to PyTorch Tensors
# Use .float() as neural networks default to float32
x_train_tensor = torch.tensor(x_train).float()
y_train_tensor = torch.tensor(y_train).float()

# Create a TensorDataset and DataLoader
# DataLoader handles batching and shuffling for us
train_dataset = TensorDataset(x_train_tensor, y_train_tensor)
train_loader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)


# --- 3. Model Definition ---

In [4]:
# We'll use a simple Multi-Layer Perceptron (MLP)
class RegressionModel(nn.Module):
    def __init__(self):
        super(RegressionModel, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(1, 32),    # Reduced to 32 neurons
            nn.Tanh(),
            nn.Linear(32, 32),   # Reduced to 32 neurons
            nn.Tanh(),
            nn.Linear(32, 1)
        )

    def forward(self, x):
        return self.network(x)

# Initialize the model
model = RegressionModel()
print(model)

RegressionModel(
  (network): Sequential(
    (0): Linear(in_features=1, out_features=32, bias=True)
    (1): Tanh()
    (2): Linear(in_features=32, out_features=32, bias=True)
    (3): Tanh()
    (4): Linear(in_features=32, out_features=1, bias=True)
  )
)


# --- 4. Loss Function and Optimizer ---

In [5]:
# CRITICAL CHOICE: Loss Function
# We use HuberLoss (SmoothL1Loss) because it's robust to outliers.
# Unlike MSE, it doesn't quadratically penalize large errors from outliers.
criterion = nn.HuberLoss() 
# Alternative: nn.L1Loss() (Mean Absolute Error)
# We AVOID: nn.MSELoss() (Mean Squared Error) which is very sensitive to outliers

# Optimizer
# Adam is a great general-purpose optimizer
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

# --- 5. Training Loop ---

In [None]:
print("Starting training...")
for epoch in range(NUM_EPOCHS):
    epoch_loss = 0.0
    for x_batch, y_batch in train_loader:
        # Forward pass: Compute predicted y by passing x to the model
        y_pred = model(x_batch)

        # Compute the loss
        loss = criterion(y_pred, y_batch)

        # Zero gradients, perform a backward pass, and update the weights.
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()

    # Print progress every 100 epochs
    if (epoch + 1) % 100 == 0:
        print(f'Epoch [{epoch+1}/{NUM_EPOCHS}], Loss: {epoch_loss/len(train_loader):.4f}')

print("Training finished.")

Starting training...
Epoch [100/1000], Loss: 0.1428


# --- 6. Visualization with Validation Data ---

In [None]:
print("Loading validation data and plotting results...")

# Load the validation data
# !!! UPDATE THIS FILENAME !!!
try:
    val_data = np.load('../data1/clean_data.npz')
    x_val = val_data['x']
    y_val = val_data['y']
except FileNotFoundError:
    print("Error: 'validation_data.npz' not found. Using dummy validation data.")
    x_val = np.linspace(-np.pi, np.pi, 100)
    y_val = 0.5 * np.sin(x_val * 1.5) + 0.1 * x_val # The "clean" green line
    
# Prepare validation data (reshape and convert to tensor)
if x_val.ndim == 1:
    x_val = x_val.reshape(-1, 1)
if y_val.ndim == 1:
    y_val = y_val.reshape(-1, 1)

x_val_tensor = torch.tensor(x_val).float()

# Set model to evaluation mode (e.g., turns off dropout)
model.eval()

# Get model predictions
# We use torch.no_grad() to disable gradient calculation for efficiency
with torch.no_grad():
    y_val_pred_tensor = model(x_val_tensor)

# Convert predictions and validation data back to NumPy for plotting
y_val_pred = y_val_pred_tensor.numpy()
x_val_np = x_val_tensor.numpy()
y_val_np = y_val # Already a numpy array

# Create the scatter plot
plt.figure(figsize=(8, 5))

# Plot the actual validation data (the "ground truth")
plt.scatter(x_val_np, y_val_np, color='blue', label='Actual Validation Data', s=10, alpha=0.7)

# Plot the model's predictions
plt.scatter(x_val_np, y_val_pred, color='red', label='Model Predictions', s=10, alpha=0.7)

# To better see the curve, we can sort the predicted values
# (This is optional but makes the predicted line clearer)
# sort_indices = np.argsort(x_val_np.squeeze())
# plt.plot(x_val_np[sort_indices], y_val_pred[sort_indices], color='red', linestyle='--', linewidth=2, label='Model Prediction Line')


plt.title('Model Predictions vs. Actual Validation Data')
plt.xlabel('X-axis')
plt.ylabel('Y-axis')
plt.legend()
plt.grid(True, linestyle='--', alpha=0.5)
plt.show()