In [None]:
# Cell 1
import torch
import torch.nn as nn
from torch.quasirandom import SobolEngine
import numpy as np
from abc import ABC, abstractmethod
from typing import List, Dict, Tuple, Optional, Union, Callable

class PDEProblem(ABC):
    def __init__(
            self,
            name: str,
            input_vars: List[str] = ['x'],
            output_vars: List[str] = ['u'],
            time_var: Optional[str] = None,
            kappa_name: str = "kappa",
            default_kappa_value: float = 1.0
        ):
        self.name: str = name
        self.input_vars: List[str] = sorted(list(set(input_vars)))
        self.output_vars: List[str] = output_vars
        self.time_var: Optional[str] = time_var

        self.output_dim: int = len(self.output_vars)
        self.spatial_domain_dim: int = len(self.input_vars) - (1 if time_var else 0)
        self.time_dependent: bool = bool(time_var)

        self.kappa_name: str = kappa_name
        self.default_kappa_value: float = default_kappa_value

    @abstractmethod
    def get_domain_bounds(self) -> Dict[str, Tuple[float, float]]:
        """
        Returns a dictionary mapping input variable names to their (min, max) bounds.
        Example: {'x': (0.0, 1.0), 't': (0.0, 2.0)}
        """
        pass

    @abstractmethod
    def pde_residual(
            self,
            inputs: Dict[str, torch.Tensor],
            model_outputs: torch.Tensor, # Shape: (batch, output_dim)
            derivatives: Dict[str, torch.Tensor], # Keys like 'd(u)_dx(1)', 'd2(u)_dx(2)', 'd(v)_dt(1)' etc.
            kappa_value: float
        ) -> torch.Tensor: # Expected shape: (batch, num_pde_equations)
        """
        Calculates the PDE residual(s).
        - model_outputs: Tensor of shape (batch_size, self.output_dim)
        - derivatives: Dictionary where keys might be 'd(out_var)_d(in_var)(order)'
                       e.g., 'd1u_dx1' for du/dx, 'd2v_dydt1' for d^2v/dydt.
        Should return a tensor where each column is the residual of one PDE equation.
        For scalar PDEs, this will be (batch_size, 1).
        """
        pass

    @abstractmethod
    def boundary_conditions(
        self,
        inputs_bc: Dict[str, torch.Tensor],
        model_outputs_bc: torch.Tensor, # Shape: (batch_bc, output_dim)
        model: nn.Module,
        kappa_value: float
        ) -> torch.Tensor: # Scalar loss term
        pass

    def initial_conditions(
            self,
            inputs_ic: Dict[str, torch.Tensor],
            model_outputs_ic: torch.Tensor, # Shape: (batch_ic, output_dim)
            model: nn.Module,
            kappa_value: float
        ) -> torch.Tensor: # Scalar loss term
        if not self.time_dependent:
            device = 'cpu'
            if model:
                try: device = next(model.parameters()).device
                except StopIteration: pass
            elif isinstance(model_outputs_ic, torch.Tensor):
                device = model_outputs_ic.device
            return torch.tensor(0.0, device=device)
        raise NotImplementedError("Initial conditions must be implemented for time-dependent PDEs.")

    @abstractmethod
    def get_ground_truth(self,
                         inputs: Dict[str, torch.Tensor],
                         kappa_value: float) -> Optional[torch.Tensor]: # Shape: (batch, output_dim)
        pass

    def get_collocation_points(self,
                               num_points: int,
                               kappa_value: float,
                               device: Union[str, torch.device] = 'cpu',
                               strategy: str = 'uniform') -> Dict[str, torch.Tensor]:
        domain_bounds = self.get_domain_bounds()
        inputs = {}

        if strategy == 'sobol':
            num_input_dims_for_sampling = len(self.input_vars)
            if num_input_dims_for_sampling == 0: # Should not happen for collocation
                 return {}
            sobol = SobolEngine(dimension=num_input_dims_for_sampling, scramble=True)
            # Move Sobol samples to target device after generation
            samples_0_1 = sobol.draw(num_points).to(device)

        for i, var_name in enumerate(self.input_vars):
            if var_name not in domain_bounds:
                raise ValueError(f"Domain bounds not defined for variable: {var_name}")
            var_min, var_max = domain_bounds[var_name]

            if strategy == 'uniform':
                samples_var = torch.rand(num_points, 1, device=device) * (var_max - var_min) + var_min
            elif strategy == 'sobol':
                samples_var = samples_0_1[:, i:i+1] * (var_max - var_min) + var_min
            else:
                raise NotImplementedError(f"Collocation sampling strategy '{strategy}' not implemented for variable '{var_name}'.")

            inputs[var_name] = samples_var.requires_grad_(True)
        return inputs

    def get_boundary_points_hyperrect(self,
                            num_points_per_face: int,
                            kappa_value: float,
                            device: Union[str, torch.device] = 'cpu',
                            strategy: str = 'uniform') -> Dict[str, torch.Tensor]:
        domain_bounds = self.get_domain_bounds()
        all_bc_inputs = {v: [] for v in self.input_vars}
        # Spatial vars are input_vars excluding the time_var
        spatial_vars = [v for v in self.input_vars if v != self.time_var]

        if not spatial_vars: # No spatial dimensions, so no spatial boundaries
            return {v: torch.empty(0,1,device=device).detach() for v in self.input_vars}

        samples_bc_other_dims = None
        num_dims_to_sample_on_face = len(spatial_vars) - 1 + (1 if self.time_dependent else 0)

        if strategy == 'sobol' and num_dims_to_sample_on_face > 0:
            sobol_bc = SobolEngine(dimension=num_dims_to_sample_on_face, scramble=True)
            samples_bc_other_dims = sobol_bc.draw(num_points_per_face).to(device)
        elif strategy != 'uniform' and strategy != 'sobol': # if strategy is not uniform and sobol setup failed or not chosen
            raise NotImplementedError(f"Boundary sampling strategy '{strategy}' not supported.")


        for fixed_var_name in spatial_vars:
            other_sampling_vars = [v for v in spatial_vars if v != fixed_var_name]
            if self.time_dependent:
                other_sampling_vars.append(self.time_var)

            for boundary_value in domain_bounds[fixed_var_name]: # For min and max of this fixed_var
                current_face_inputs = {}
                current_face_inputs[fixed_var_name] = torch.full((num_points_per_face, 1),
                                                                boundary_value, dtype=torch.float32, device=device)

                sample_idx = 0
                for other_var_name in other_sampling_vars:
                    ov_min, ov_max = domain_bounds[other_var_name]
                    if strategy == 'sobol' and samples_bc_other_dims is not None:
                        current_face_inputs[other_var_name] = samples_bc_other_dims[:, sample_idx:sample_idx+1] * (ov_max - ov_min) + ov_min
                        sample_idx +=1
                    else: # Default to uniform if Sobol not applicable or not chosen
                        current_face_inputs[other_var_name] = torch.rand(num_points_per_face, 1, device=device) * (ov_max - ov_min) + ov_min

                # Append points for this face to the main list
                for var_n in self.input_vars:
                    all_bc_inputs[var_n].append(current_face_inputs[var_n])

        # Concatenate points from all faces
        final_bc_inputs = {}
        for var_n in self.input_vars:
            if all_bc_inputs[var_n]: # If any points were added for this variable
                final_bc_inputs[var_n] = torch.cat(all_bc_inputs[var_n], dim=0).detach() # BC points usually don't need grad
            else: # Should only happen if input_vars is empty or logic error
                final_bc_inputs[var_n] = torch.empty(0,1,device=device).detach()
        return final_bc_inputs

    def get_boundary_points_general(self,
                                       num_total_points: int, # Note: parameter name change
                                       kappa_value: float,
                                       device: Union[str, torch.device] = 'cpu',
                                       strategy: str = 'uniform' # Strategy for sampling on the general boundary
                                      ) -> Optional[Dict[str, torch.Tensor]]:
        """
        To be implemented by subclasses for non-rectangular/complex domains.
        Should return points lying *on* the boundary.
        Returns None to indicate this method is not implemented or not applicable,
        allowing fallback to get_boundary_points_hyperrect.
        """
        return None  # Indicating no general boundary points available

    def get_initial_points(self,
                           num_points: int,
                           kappa_value: float,
                           device: Union[str, torch.device] = 'cpu',
                           strategy: str = 'uniform') -> Dict[str, torch.Tensor]:
        if not self.time_dependent:
            return {v: torch.empty(0,1,device=device).requires_grad_(False) for v in self.input_vars}

        domain_bounds = self.get_domain_bounds()
        inputs = {}

        t_initial_val = domain_bounds[self.time_var][0]
        inputs[self.time_var] = torch.full((num_points, 1), t_initial_val, dtype=torch.float32, device=device)

        spatial_vars = [v for v in self.input_vars if v != self.time_var]
        if strategy == 'sobol' and spatial_vars: # only use sobol if there are spatial vars to sample
            sobol_ic = SobolEngine(dimension=len(spatial_vars), scramble=True)
            samples_0_1_ic = sobol_ic.draw(num_points).to(device)
        elif strategy != 'uniform' and strategy != 'sobol':
             raise NotImplementedError(f"IC sampling strategy '{strategy}' not supported.")


        for i, var_name in enumerate(spatial_vars):
            var_min, var_max = domain_bounds[var_name]
            if strategy == 'uniform':
                inputs[var_name] = torch.rand(num_points, 1, device=device) * (var_max - var_min) + var_min
            elif strategy == 'sobol' and spatial_vars: # check spatial_vars again for safety
                inputs[var_name] = samples_0_1_ic[:, i:i+1] * (var_max - var_min) + var_min
            # No else needed if strategy check is done above

        # Ensure all input_vars keys are present, even if fixed (like time)
        for var_name in self.input_vars:
            if var_name not in inputs: # e.g. if only time_var and no spatial_vars
                 if var_name == self.time_var: continue # already handled
                 # This case should be rare if input_vars is setup correctly with domain_bounds
                 inputs[var_name] = torch.empty(num_points, 1, device=device) # or handle error

            inputs[var_name].requires_grad_(False) # IC coords generally don't need grad
        return inputs

    @abstractmethod
    def get_required_derivative_orders(self) -> Dict[str, Dict[Tuple[str, ...], int]]:
        """
        Returns a dictionary specifying derivative requirements for each output variable.
        Structure:
          {
            'output_var_name_1': { # For the first output variable (e.g., 'u')
                # Simple derivatives:
                ('input_var_for_deriv',): order,  # e.g., ('x',): 2 for d2(u)/dx2
                # Mixed derivatives (sequence of differentiation):
                ('input_var_1', 'input_var_2', ...): 1, # e.g., ('x', 'y'): 1 for d/dy(d(u)/dx)
                                                       # The value (e.g., 1) indicates one application
                                                       # of this sequence of differentiations.
            },
            'output_var_name_2': { ... } # For the second output variable (e.g., 'v')
          }
        Example for -u_xx - u_yy = f (output_vars=['u']):
          {'u': {('x',): 2, ('y',): 2}}
        Example for u_t + v_x = 0, v_t + u_x = 0 (output_vars=['u', 'v']):
          {
            'u': {('t',): 1},
            'v': {('x',): 1, ('t',): 1} # Here u_x is not directly a derivative of 'v',
                                        # but if 'v' appears in an equation with u_x,
                                        # the PDE residual itself handles fetching u_x.
                                        # This dict is about derivatives OF the key output_var_name.
                                        # Let's refine this point below.
          }
        """
        pass

    def calculate_specific_observables(self,
                                       inputs: Dict[str, torch.Tensor],
                                       model_outputs: torch.Tensor,
                                       ground_truth_outputs: Optional[torch.Tensor],
                                       kappa_value: float) -> Dict[str, float]:
        """
        Calculates PDE-specific physical observables and their errors.
        To be implemented by subclasses if relevant.
        Args:
            inputs: Dictionary of input tensors for the test grid.
            model_outputs: Tensor of model predictions on the test grid.
            ground_truth_outputs: Tensor of ground truth solutions on the test grid (if available).
            kappa_value: Current hardness parameter.
        Returns:
            A dictionary of observable names to their scalar values (e.g., errors).
            Example: {'soliton_amplitude_error': 0.01, 'shock_speed_error': 0.05}
        """
        return {} # Default implementation returns no specific observables

In [83]:
import torch
import torch.nn as nn
from typing import Union

def create_pinn_model(
    input_dim: int,
    output_dim: int,
    n_neurons_per_layer: int,
    n_hidden_layers: int = 1, # Default to SLN
    activation_str: str = "tanh",
    device: Union[str, torch.device] = 'cpu'
) -> nn.Module:
    """
    Creates a feedforward neural network (PINN model).

    Args:
        input_dim (int): Dimension of the input (e.g., 1 for u(x), 2 for u(x,t)).
        output_dim (int): Dimension of the output (e.g., 1 for scalar u).
        n_neurons_per_layer (int): Number of neurons in each hidden layer.
        n_hidden_layers (int): Number of hidden layers. Default is 1.
        activation_str (str): Activation function to use ('tanh', 'relu', 'sigmoid', 'leakyrelu').
                              Default is 'tanh'.
        device (Union[str, torch.device]): Device to send the model to ('cpu' or 'cuda').
                                           Default is 'cpu'.

    Returns:
        nn.Module: The PyTorch neural network model (nn.Sequential).
    """
    layers: list[nn.Module] = []

    if n_hidden_layers == 0: # Special case: linear model (no hidden layers)
        layers.append(nn.Linear(input_dim, output_dim))
    else:
        layers.append(nn.Linear(input_dim, n_neurons_per_layer))

        # Activation function selection
        if activation_str.lower() == 'tanh':
            activation_fn: nn.Module = nn.Tanh()
        elif activation_str.lower() == 'relu':
            activation_fn: nn.Module = nn.ReLU()
        elif activation_str.lower() == 'sigmoid':
            activation_fn: nn.Module = nn.Sigmoid()
        elif activation_str.lower() == 'leakyrelu':
            activation_fn: nn.Module = nn.LeakyReLU()
        else:
            raise ValueError(f"Unsupported activation: {activation_str}")

        layers.append(activation_fn)

        for _ in range(n_hidden_layers - 1):
            layers.append(nn.Linear(n_neurons_per_layer, n_neurons_per_layer))
            layers.append(activation_fn)

        # Output layer (connects last hidden layer to output_dim)
        layers.append(nn.Linear(n_neurons_per_layer, output_dim))

    model = nn.Sequential(*layers).to(device)

    # Apply initializations
    for i, layer in enumerate(model):
        if isinstance(layer, nn.Linear):
            if activation_str.lower() == 'tanh' or activation_str.lower() == 'sigmoid':
                nn.init.xavier_normal_(layer.weight) # Glorot normal
            elif activation_str.lower() == 'relu' or activation_str.lower() == 'leakyrelu':
                # For Kaiming, if the next layer is an activation, use that info.
                nn.init.kaiming_normal_(layer.weight, nonlinearity='relu' if activation_str.lower() == 'relu' else 'leaky_relu')

            if layer.bias is not None:
                nn.init.zeros_(layer.bias)

    return model


In [None]:
# Cell 3
import torch
import torch.optim as optim
import time
import numpy as np

class Trainer:
    def __init__(self, model, pde_problem: 'PDEProblem', optimizer_str="adam", learning_rate=1e-3, device='cpu'):
        self.model = model.to(device)
        self.pde_problem = pde_problem # Type hint for clarity
        self.device = device
        self.lr = learning_rate

        if optimizer_str.lower() == "adam":
            self.optimizer = optim.Adam(self.model.parameters(), lr=self.lr)
        elif optimizer_str.lower() == "lbfgs":
            self.optimizer = optim.LBFGS(self.model.parameters(), lr=self.lr, max_iter=20, line_search_fn="strong_wolfe")
        else:
            raise ValueError(f"Unsupported optimizer: {optimizer_str}")

        self.optimizer_str = optimizer_str
        self.epoch_wise_log = []

    def _prepare_model_input(self, inputs_dict: dict) -> torch.Tensor | None:
        """
        Prepares a single tensor input for the model from the inputs_dict.
        The order of concatenation is defined by self.pde_problem.input_vars.
        """
        if not inputs_dict:
            return None # Or handle as appropriate if model expects input even for empty dict

        ordered_input_tensors = []
        for var_name in self.pde_problem.input_vars:
            if var_name in inputs_dict:
                ordered_input_tensors.append(inputs_dict[var_name])
            else:
                # This should ideally not happen if PDEProblem methods are consistent
                raise ValueError(f"Input variable '{var_name}' expected by PDEProblem.input_vars "
                                 f"but not found in provided inputs_dict keys: {list(inputs_dict.keys())}")

        if not ordered_input_tensors: # Should be caught by the first check if inputs_dict is empty
             return torch.empty(0, device=self.device)

        return torch.cat(ordered_input_tensors, dim=1)

    def _compute_derivatives(self, inputs_dict_with_grad: Dict[str, torch.Tensor],
                             model_outputs_tensor: torch.Tensor) -> Dict[str, torch.Tensor]:
        """
        Computes derivatives based on pde_problem.get_required_derivative_orders().
        model_outputs_tensor has shape (batch, pde_problem.output_dim)
        Derivatives are taken with respect to the individual tensors in inputs_dict_with_grad.

        Returns a dictionary of derivatives.
        Naming convention examples:
        - d(u)_dx(1)       for first derivative of 'u' wrt 'x'
        - d2(u)_dx(2)      for second derivative of 'u' wrt 'x'
        - d(v)_dt(1)       for first derivative of 'v' wrt 't'
        - d(u)_dx(1)dy(1)  for d/dy(du/dx)
        """
        derivatives: Dict[str, torch.Tensor] = {}
        required_specs = self.pde_problem.get_required_derivative_orders()
        output_var_names = self.pde_problem.output_vars # List of names like ['u', 'v']

        for out_idx, out_var_name in enumerate(output_var_names):
            if out_var_name not in required_specs: # If no derivatives are listed for this output var
                continue

            # current_output_component is (batch_size, 1)
            current_output_component = model_outputs_tensor[:, out_idx:out_idx+1]

            spec_for_this_output_var = required_specs[out_var_name]

            for input_var_sequence, order_val in spec_for_this_output_var.items():
                # input_var_sequence is a tuple, e.g., ('x',) or ('x', 't')
                # order_val for simple derivatives is the max order, e.g., 2 for d2u/dx2
                # order_val for mixed sequence is typically 1 (one application of the sequence)

                if not isinstance(input_var_sequence, tuple) or not input_var_sequence:
                    raise ValueError(f"Invalid input_var_sequence: {input_var_sequence} for {out_var_name}")

                # --- Handle Simple Derivatives (e.g., ('x',): 2 for d2u/dx2) ---
                if len(input_var_sequence) == 1:
                    input_var_name_for_deriv = input_var_sequence[0]
                    max_order = order_val

                    if input_var_name_for_deriv not in inputs_dict_with_grad:
                        raise RuntimeError(f"Input variable '{input_var_name_for_deriv}' needed for derivative of '{out_var_name}' "
                                           f"not found in inputs_dict_with_grad: {list(inputs_dict_with_grad.keys())}")
                    input_tensor_for_grad = inputs_dict_with_grad[input_var_name_for_deriv]
                    if not input_tensor_for_grad.requires_grad:
                        raise RuntimeError(f"Input tensor for '{input_var_name_for_deriv}' does not require grad.")

                    temp_deriv_target = current_output_component
                    for o in range(1, max_order + 1):
                        grads = torch.autograd.grad(
                            outputs=temp_deriv_target,
                            inputs=input_tensor_for_grad,
                            grad_outputs=torch.ones_like(temp_deriv_target),
                            create_graph=True,
                            retain_graph=True,
                            allow_unused=False # Be strict initially
                        )[0]
                        if grads is None:
                            raise RuntimeError(f"Gradient for d{o}({out_var_name})_d({input_var_name_for_deriv}){o} was None.")

                        deriv_name = f"d{'' if o == 1 else o}({out_var_name})_d{input_var_name_for_deriv}({o})"
                        derivatives[deriv_name] = grads
                        temp_deriv_target = grads

                # --- Handle Mixed Derivatives (e.g., ('x', 't'): 1 for d/dt(du/dx)) ---
                elif len(input_var_sequence) > 1:
                    if order_val != 1:
                        # For now, assume mixed derivative specs like ('x','y'):1 mean one application of d/dy(d/dx(...))
                        # Higher order_val for mixed could mean repeated application of the sequence, but that's rare.
                        print(f"Warning: Mixed derivative for {out_var_name} wrt {input_var_sequence} has order_val {order_val} != 1. Interpreting as 1 application.")

                    temp_deriv_target = current_output_component

                    # Build the name like "d(u)_dx(1)dy(1)"
                    # The number before (out_var_name) will be len(input_var_sequence)
                    name_prefix = f"d{len(input_var_sequence)}({out_var_name})_d"
                    name_suffix_parts = []

                    for i, invar_name in enumerate(input_var_sequence):
                        if invar_name not in inputs_dict_with_grad:
                            raise RuntimeError(f"Input variable '{invar_name}' for mixed derivative of '{out_var_name}' "
                                               f"not in inputs_dict_with_grad.")
                        input_tensor_for_grad = inputs_dict_with_grad[invar_name]
                        if not input_tensor_for_grad.requires_grad:
                             raise RuntimeError(f"Input tensor for mixed deriv '{invar_name}' does not require grad.")

                        grads = torch.autograd.grad(
                            outputs=temp_deriv_target,
                            inputs=input_tensor_for_grad,
                            grad_outputs=torch.ones_like(temp_deriv_target),
                            create_graph=True, # Must be true if any further grads in sequence
                            retain_graph=True, # Must be true
                            allow_unused=False
                        )[0]
                        if grads is None:
                            raise RuntimeError(f"Mixed derivative part d/d{invar_name} for {out_var_name} failed.")
                        temp_deriv_target = grads
                        name_suffix_parts.append(f"{invar_name}(1)")

                    deriv_name = name_prefix + "".join(name_suffix_parts)
                    derivatives[deriv_name] = temp_deriv_target
        return derivatives

    def _calculate_error_metrics_on_test_grid(self, kappa_value, num_test_pts=1001):
        self.model.eval()
        domain_bounds = self.pde_problem.get_domain_bounds()
        test_inputs_dict_for_gt = {} # Populate this as before based on input_vars

        # ... (grid generation logic as in your full code for 1D/2D inputs) ...
        if len(self.pde_problem.input_vars) == 1:
            var_name = self.pde_problem.input_vars[0]
            var_min, var_max = domain_bounds[var_name]
            test_values_np = np.linspace(var_min, var_max, num_test_pts)
            test_values_torch = torch.tensor(test_values_np, dtype=torch.float32, device=self.device).unsqueeze(1)
            test_inputs_dict_for_gt[var_name] = test_values_torch
        elif len(self.pde_problem.input_vars) == 2:
            var1_name, var2_name = self.pde_problem.input_vars[0], self.pde_problem.input_vars[1]
            var1_min, var1_max = domain_bounds[var1_name]
            var2_min, var2_max = domain_bounds[var2_name]
            pts_per_dim = int(np.sqrt(num_test_pts))
            # ... (meshgrid logic) ...
            # (ensure num_test_pts is updated based on actual grid size)
            var1_vals = torch.linspace(var1_min, var1_max, pts_per_dim, device=self.device)
            var2_vals = torch.linspace(var2_min, var2_max, pts_per_dim, device=self.device)
            grid_var1, grid_var2 = torch.meshgrid(var1_vals, var2_vals, indexing='ij')
            test_inputs_dict_for_gt[var1_name] = grid_var1.reshape(-1, 1)
            test_inputs_dict_for_gt[var2_name] = grid_var2.reshape(-1, 1)
            num_test_pts = test_inputs_dict_for_gt[var1_name].shape[0]
        else:
            # For >2D, you'll need to implement a more general grid creation or accept it as an argument
            # For now, let's assume we won't hit this for the workshop's core PDEs
            print("Warning: Test grid generation for >2 input_vars not fully implemented in error metrics.")
            # Fallback or raise error
            return {key: float('nan') for key in ['L1_err', 'L2_err', 'Linf_err', 'L1_err_rel',
                                                  'L2_err_rel', 'Linf_err_rel', 'PDE_residual_max',
                                                  'error_median_abs', 'error_p90_abs']}


        test_model_input_tensor = self._prepare_model_input(test_inputs_dict_for_gt)
        if test_model_input_tensor is None or test_model_input_tensor.numel() == 0:
             print("Warning: No test model input tensor generated for error metrics.")
             return {key: float('nan') for key in ['L1_err', 'L2_err', 'Linf_err', 'L1_err_rel',
                                                  'L2_err_rel', 'Linf_err_rel', 'PDE_residual_max',
                                                  'error_median_abs', 'error_p90_abs']}


        with torch.no_grad():
            u_pred_test = self.model(test_model_input_tensor)

        u_true_test_torch = self.pde_problem.get_ground_truth(test_inputs_dict_for_gt, kappa_value)

        if u_true_test_torch is not None and u_pred_test.shape != u_true_test_torch.shape:
            try: u_pred_test = u_pred_test.reshape_as(u_true_test_torch)
            except RuntimeError: print(f"Warning: Cannot reshape u_pred_test for error calc.")

        metrics = {
            'L1_err': float('nan'), 'L2_err': float('nan'), 'Linf_err': float('nan'),
            'L1_err_rel': float('nan'), 'L2_err_rel': float('nan'), 'Linf_err_rel': float('nan'),
            'PDE_residual_max': float('nan'),
            'error_median_abs': float('nan'), 'error_p90_abs': float('nan') # New
        }

        if u_true_test_torch is not None:
            error_vec = (u_pred_test - u_true_test_torch).flatten() # Flatten for norms and quantiles
            actual_num_test_pts = len(error_vec)
            if actual_num_test_pts == 0: actual_num_test_pts = 1

            metrics['L1_err'] = torch.linalg.norm(error_vec, ord=1).item() / actual_num_test_pts
            metrics['L2_err'] = torch.linalg.norm(error_vec, ord=2).item() / np.sqrt(actual_num_test_pts)
            metrics['Linf_err'] = torch.linalg.norm(error_vec, ord=float('inf')).item()

            abs_error_vec = torch.abs(error_vec)
            metrics['error_median_abs'] = torch.median(abs_error_vec).item()
            if actual_num_test_pts > 0 : # Quantile needs at least one element
                 metrics['error_p90_abs'] = torch.quantile(abs_error_vec, 0.9).item()

            u_true_flat = u_true_test_torch.flatten()
            norm_u_true_l1 = torch.linalg.norm(u_true_flat, ord=1)
            norm_u_true_l2 = torch.linalg.norm(u_true_flat, ord=2)
            norm_u_true_linf = torch.linalg.norm(u_true_flat, ord=float('inf'))

            if norm_u_true_l1 > 1e-9: metrics['L1_err_rel'] = torch.linalg.norm(error_vec, ord=1).item() / norm_u_true_l1.item()
            if norm_u_true_l2 > 1e-9: metrics['L2_err_rel'] = torch.linalg.norm(error_vec, ord=2).item() / norm_u_true_l2.item()
            if norm_u_true_linf > 1e-9: metrics['Linf_err_rel'] = metrics['Linf_err'] / norm_u_true_linf.item()

        # Max PDE residual
        test_inputs_dict_for_res = {}
        for k, v_test in test_inputs_dict_for_gt.items():
            if v_test.numel() > 0: # Only process if tensor is not empty
                test_inputs_dict_for_res[k] = v_test.clone().detach().requires_grad_(True)

        if test_inputs_dict_for_res: # Proceed only if there are inputs for residual calculation
            res_model_input_tensor = self._prepare_model_input(test_inputs_dict_for_res)
            if res_model_input_tensor is not None and res_model_input_tensor.numel() > 0:
                u_pred_for_res = self.model(res_model_input_tensor)
                derivatives_for_res = self._compute_derivatives(test_inputs_dict_for_res, u_pred_for_res)
                pde_res_vals_on_grid = self.pde_problem.pde_residual(test_inputs_dict_for_res, u_pred_for_res, derivatives_for_res, kappa_value)
                if pde_res_vals_on_grid is not None and pde_res_vals_on_grid.numel() > 0 :
                    metrics['PDE_residual_max'] = torch.max(torch.abs(pde_res_vals_on_grid.detach())).item()

        # Calculate specific observables if the PDEProblem has this method
        if hasattr(self.pde_problem, 'calculate_specific_observables'):
            try:
                specific_obs = self.pde_problem.calculate_specific_observables(
                    test_inputs_dict_for_gt, # The dict of input tensors for the test grid
                    u_pred_test,             # Model predictions on the test grid
                    u_true_test_torch,       # Ground truth on the test grid
                    kappa_value
                )
                if specific_obs and isinstance(specific_obs, dict):
                    metrics.update(specific_obs) # Add them to the metrics dict for this epoch
            except Exception as e:
                print(f"Warning: Error calculating specific observables for {self.pde_problem.name}: {e}")

        self.model.train()
        return metrics

    def _closure_lbfgs(self, collocation_points_dict, bc_points_dict, ic_points_dict, kappa_value, loss_weights):
        self.optimizer.zero_grad()

        # PDE Loss
        colloc_model_input = self._prepare_model_input(collocation_points_dict)
        model_outputs_colloc = self.model(colloc_model_input)
        derivatives_colloc = self._compute_derivatives(collocation_points_dict, model_outputs_colloc)
        pde_res = self.pde_problem.pde_residual(collocation_points_dict, model_outputs_colloc, derivatives_colloc, kappa_value)
        loss_pde = torch.mean(pde_res**2)

        # BC Loss
        loss_bc = torch.tensor(0.0, device=self.device)
        if bc_points_dict: # Check if not empty
            bc_model_input = self._prepare_model_input(bc_points_dict)
            model_outputs_bc = self.model(bc_model_input)
            loss_bc = self.pde_problem.boundary_conditions(bc_points_dict, model_outputs_bc, self.model, kappa_value)

        # IC Loss
        loss_ic = torch.tensor(0.0, device=self.device)
        if self.pde_problem.time_dependent and ic_points_dict:
            ic_model_input = self._prepare_model_input(ic_points_dict)
            model_outputs_ic = self.model(ic_model_input)
            loss_ic = self.pde_problem.initial_conditions(ic_points_dict, model_outputs_ic, self.model, kappa_value)

        total_loss = (loss_weights['pde'] * loss_pde +
                      loss_weights['bc'] * loss_bc +
                      loss_weights['ic'] * loss_ic)
        total_loss.backward()
        self._current_losses = {'pde': loss_pde.item(), 'bc': loss_bc.item(),
                                'ic': loss_ic.item(), 'total': total_loss.item()}
        return total_loss

    def train(self, num_epochs, kappa_value,
              num_collocation_pts, num_bc_pts_per_face, num_ic_pts, # Renamed for clarity
              collocation_strategy='uniform',
              loss_weights={'pde': 1.0, 'bc': 1.0, 'ic': 1.0},
              log_epochs=[0, 1000, 5000, 10000],
              num_test_pts_error_grid=1001):

        cumulative_time_s = 0.0
        self.epoch_wise_log = []

        for epoch in range(num_epochs + 1):
            epoch_start_time = time.time()

            # Common point sampling (moved outside optimizer-specific block)
            # These return dicts like {'x': tensor, 't': tensor}
            collocation_points_dict = self.pde_problem.get_collocation_points(
                num_collocation_pts, kappa_value, self.device, collocation_strategy
            )

            bc_points_dict = self.pde_problem.get_boundary_points_general(
                num_bc_pts_per_face, kappa_value, self.device, strategy=collocation_strategy # Use same strategy for BCs
            )
            if bc_points_dict is None: # Fallback to hyperrect if general not implemented
                bc_points_dict = self.pde_problem.get_boundary_points_hyperrect(
                    num_bc_pts_per_face, kappa_value, self.device, strategy=collocation_strategy
                )

            ic_points_dict = {}
            if self.pde_problem.time_dependent:
                ic_points_dict = self.pde_problem.get_initial_points(
                    num_ic_pts, kappa_value, self.device, strategy=collocation_strategy # Use same strategy for ICs
                )

            if self.optimizer_str == "adam":
                self.model.train()
                self.optimizer.zero_grad()

                # PDE Loss
                colloc_model_input = self._prepare_model_input(collocation_points_dict)
                model_outputs_colloc = self.model(colloc_model_input)
                # Pass original dict with grad-enabled tensors for derivative computation
                derivatives_colloc = self._compute_derivatives(collocation_points_dict, model_outputs_colloc)
                pde_res = self.pde_problem.pde_residual(collocation_points_dict, model_outputs_colloc, derivatives_colloc, kappa_value)
                loss_pde = torch.mean(pde_res**2)

                # BC Loss
                loss_bc = torch.tensor(0.0, device=self.device)
                if bc_points_dict:
                    bc_model_input = self._prepare_model_input(bc_points_dict)
                    model_outputs_bc = self.model(bc_model_input)
                    loss_bc = self.pde_problem.boundary_conditions(bc_points_dict, model_outputs_bc, self.model, kappa_value)

                # IC Loss
                loss_ic = torch.tensor(0.0, device=self.device)
                if self.pde_problem.time_dependent and ic_points_dict:
                    ic_model_input = self._prepare_model_input(ic_points_dict)
                    model_outputs_ic = self.model(ic_model_input)
                    loss_ic = self.pde_problem.initial_conditions(ic_points_dict, model_outputs_ic, self.model, kappa_value)

                total_loss = (loss_weights['pde'] * loss_pde +
                              loss_weights['bc'] * loss_bc +
                              loss_weights['ic'] * loss_ic)

                if epoch > 0:
                    total_loss.backward()
                    self.optimizer.step()

                current_total_loss = total_loss.item()
                current_pde_loss = loss_pde.item()
                current_bc_loss = loss_bc.item()
                current_ic_loss = loss_ic.item()

            elif self.optimizer_str == "lbfgs":
                if epoch > 0:
                    self.model.train()
                    self.optimizer.step(lambda: self._closure_lbfgs(
                        collocation_points_dict, bc_points_dict, ic_points_dict, kappa_value, loss_weights
                    ))
                # For LBFGS, losses are updated within the closure
                current_total_loss = self._current_losses.get('total', float('nan')) if hasattr(self, '_current_losses') else float('nan')
                current_pde_loss = self._current_losses.get('pde', float('nan')) if hasattr(self, '_current_losses') else float('nan')
                current_bc_loss = self._current_losses.get('bc', float('nan')) if hasattr(self, '_current_losses') else float('nan')
                current_ic_loss = self._current_losses.get('ic', float('nan')) if hasattr(self, '_current_losses') else float('nan')

            # ... (rest of logging logic is good) ...
            epoch_duration_s = time.time() - epoch_start_time
            if epoch > 0 : cumulative_time_s += epoch_duration_s

            if epoch in log_epochs or epoch == num_epochs:
                grad_norm = 0.0
                if epoch > 0:
                    for p in self.model.parameters():
                        if p.grad is not None:
                            grad_norm += p.grad.detach().data.norm(2).item() ** 2
                    grad_norm = grad_norm ** 0.5 if grad_norm > 0 else 0.0

                error_metrics_on_grid = self._calculate_error_metrics_on_test_grid(kappa_value, num_test_pts_error_grid)

                # Track L2 norm of weights for regularization and diagnostics
                l2_norm_weights = 0.0
                for param in self.model.parameters():
                    if param.requires_grad: # Usually all model parameters do
                        l2_norm_weights += torch.linalg.norm(param.data).item()**2
                l2_norm_weights = np.sqrt(l2_norm_weights) if l2_norm_weights > 0 else 0.0

                gpu_mem_peak_mb = float('nan')
                if self.device.type == 'cuda':
                    # Peak memory allocated on this device since the last reset
                    gpu_mem_peak_mb = torch.cuda.max_memory_allocated(self.device) / (1024**2) # Convert to MB
                    torch.cuda.reset_peak_memory_stats(self.device) # Reset for the next interval

                log_entry = {
                    'epoch': epoch, 'time_s': cumulative_time_s,
                    'loss_total': current_total_loss, 'loss_pde': current_pde_loss,
                    'loss_bc': current_bc_loss, 'loss_ic': current_ic_loss,
                    'grad_norm_l2': grad_norm, 'l2_norm_weights': l2_norm_weights,
                    'gpu_mem_peak_mb': gpu_mem_peak_mb,
                }
                log_entry.update(error_metrics_on_grid)
                self.epoch_wise_log.append(log_entry)

                print(f"Epoch {epoch}/{num_epochs}, Loss: {current_total_loss:.3e}, "
                      f"L2_err_rel: {log_entry.get('L2_err_rel', float('nan')):.3e}, GradNorm: {grad_norm:.3e}")

        print(f"Training finished. Total active time: {cumulative_time_s:.2f}s")
        return self.epoch_wise_log

In [85]:
# Cell 4
import itertools
import os
import json
import pandas as pd
from dataclasses import dataclass, asdict, field

@dataclass
class ExperimentConfig:
    # Identification
    pde_name: str
    kappa_val: float
    activation_str: str
    seed: int

    # Architecture
    depth: int # Number of hidden layers
    width: int # Neurons per hidden layer

    # Optimizer
    optimizer_type: str
    lr: float

    # Training
    epochs: int

    # Logging & Error Evaluation
    log_epochs_list: list = field(default_factory=lambda: [x for x in range(0, 10001, 100)])
    num_test_pts_error_grid: int = 1001

    # Loss Weights
    loss_weight_pde: float = 1.0
    loss_weight_bc: float = 1.0
    loss_weight_ic: float = 1.0

    # Collocation (points takes precedence over factor)
    M_collocation_pts: int = field(default=None)
    M_collocation_factor: int = field(default=10)

    # IC/BC Points
    num_total_bc_pts: Optional[int] = field(default=None) # Total BC points across all faces
    num_bc_pts_per_face: Optional[int] = field(default=None) # If not specified, heuristic will be used
    num_ic_pts: Optional[int] = field(default=None) # Only for time-dependent PDEs
    collocation_scheme: str = field(default='uniform')

class ExperimentRunner:
    def __init__(self, base_results_dir="data/", pde_map=None, device='cpu'):
        self.base_results_dir = base_results_dir
        self.pde_map = pde_map if pde_map is not None else {}
        self.device = device
        os.makedirs(self.base_results_dir, exist_ok=True)

    def _get_run_dir(self, config: ExperimentConfig):
        # Format kappa_val for filename safety, e.g., replace decimal point
        kappa_str = f"{config.kappa_val:.1e}".replace('.', 'p').replace('+', '') # e.g. 1p0e-03

        run_path = os.path.join(
            self.base_results_dir,
            config.pde_name,
            f"kappa_{kappa_str}",
            f"act_{config.activation_str}",
            f"N_{config.width}",
            f"D_{config.depth}",
            f"seed_{config.seed}"
        )
        os.makedirs(run_path, exist_ok=True)
        return run_path

    def run_single_experiment(self, config: ExperimentConfig):
        pde_instance = self.pde_map.get(config.pde_name)
        if pde_instance is None:
            print(f"Error: PDE problem '{config.pde_name}' not found in pde_map.")
            return

        run_dir = self._get_run_dir(config)
        print(f"\n--- Running Experiment: {run_dir} ---")
        print(f"Config: {config}")

        with open(os.path.join(run_dir, "config.json"), 'w') as f:
            json.dump(asdict(config), f, indent=2)

        torch.manual_seed(config.seed)
        np.random.seed(config.seed)

        # Determine model input_dim based on pde_instance.input_vars
        # Could also use pde_instance.spatial_domain_dim + (1 if pde_instance.time_dependent else 0)
        model_input_dim = len(pde_instance.input_vars)

        model = create_pinn_model(
            input_dim=model_input_dim,
            output_dim=pde_instance.output_dim,
            n_hidden_layers=config.depth,
            n_neurons_per_layer=config.width,
            activation_str=config.activation_str,
            device=self.device
        )

        trainer = Trainer(model, pde_instance,
                          optimizer_str=config.optimizer_type,
                          learning_rate=config.lr,
                          device=self.device)


        # Collocation points:
        if config.M_collocation_pts is not None:
            M_collocation = config.M_collocation_pts
        else:
            # Specified or default collocation factor if direct points not given
            M_collocation = config.width * config.M_collocation_factor

        # For BC points:
        actual_num_bc_pts_per_face = 0
        if config.num_total_bc_pts is not None:
            num_spatial_dims = pde_instance.spatial_domain_dim
            num_faces = 2 * num_spatial_dims if num_spatial_dims > 0 else 0
            if num_faces > 0:
                actual_num_bc_pts_per_face = config.num_total_bc_pts // num_faces
            else: # If no spatial dims, num_total_bc_pts should ideally be 0 or ignored
                actual_num_bc_pts_per_face = 0 # or handle appropriately
        elif config.num_bc_pts_per_face is not None:
            actual_num_bc_pts_per_face = config.num_bc_pts_per_face
        else: # Fallback or default heuristic if not specified
            num_spatial_dims = pde_instance.spatial_domain_dim
            num_faces = 2 * num_spatial_dims if num_spatial_dims > 0 else 0
            if num_faces > 0:
                heuristic_bc_factor = 20
                actual_num_bc_pts_per_face = M_collocation // (heuristic_bc_factor * num_faces)
                actual_num_bc_pts_per_face = max(10, actual_num_bc_pts_per_face) # Min points
            else:
                actual_num_bc_pts_per_face = 0

        # For IC points:
        actual_num_ic_pts = 0
        if pde_instance.time_dependent:
            if config.num_ic_pts is not None:
                actual_num_ic_pts = config.num_ic_pts
            else: # Fallback or default heuristic
                heuristic_ic_factor = 10
                actual_num_ic_pts = M_collocation // heuristic_ic_factor
                actual_num_ic_pts = max(10, actual_num_ic_pts) # Min points

        # Adjust log_epochs based on actual config.epochs
        actual_log_epochs = [e for e in config.log_epochs_list if e <= config.epochs]
        if config.epochs not in actual_log_epochs:
            actual_log_epochs.append(config.epochs)
        actual_log_epochs = sorted(list(set(actual_log_epochs)))
        if 0 not in actual_log_epochs : actual_log_epochs.insert(0,0)


        epoch_wise_log_data = trainer.train(
            num_epochs=config.epochs,
            kappa_value=config.kappa_val,
            num_collocation_pts=M_collocation,
            num_bc_pts_per_face=actual_num_bc_pts_per_face,
            num_ic_pts=actual_num_ic_pts,
            collocation_strategy=config.collocation_scheme,
            log_epochs=actual_log_epochs,
            num_test_pts_error_grid=config.num_test_pts_error_grid
        )

        df_epoch_log = pd.DataFrame(epoch_wise_log_data)
        df_epoch_log.to_csv(os.path.join(run_dir, "training_log.csv"), index=False)

        final_metrics = {}
        if not df_epoch_log.empty:
            last_epoch_data = df_epoch_log.iloc[-1]
            final_metrics = {
                key: last_epoch_data.get(key, float('nan'))
                for key in ['time_s', 'loss_total', 'L1_err_rel', 'L2_err_rel',
                            'Linf_err_rel', 'PDE_residual_max', 'grad_norm_l2']
            }

        summary_data = {"final_metrics": final_metrics, "fit_results": {}} # Config saved separately
        with open(os.path.join(run_dir, "summary.json"), 'w') as f:
            json.dump(summary_data, f, indent=2, cls=NpEncoder) # Handle numpy types if any

        print(f"Finished experiment. Final L2_err_rel: {final_metrics.get('L2_err_rel', 'N/A'):.3e}")

# Helper for JSON serialization if numpy types are used in summary
class NpEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.integer):
            return int(obj)
        if isinstance(obj, np.floating):
            return float(obj)
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return super(NpEncoder, self).default(obj)

In [None]:

# --- Main Execution Script (Example) ---
if __name__ == '__main__':
    # Ensure Cell 1 (PDEProblem) and Cell 2 (create_pinn_model) are executable or imported
    # For example, if they are in separate files:
    # from pde_problem_cell1 import PDEProblem # (and any concrete PDE classes)
    # from model_creator_cell2 import create_pinn_model

    DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print(f"Using device: {DEVICE}")

    # --- Define Concrete PDEProblem Subclasses Here ---
    class TrivialLinearPDE(PDEProblem):
        def __init__(self):
            super().__init__(name="TrivialLinear", input_vars=['x'], output_vars=['u'], kappa_name="kappa", default_kappa_value=1.0)
        def get_domain_bounds(self): return {'x': (0.0, 1.0)}
        def pde_residual(self,inputs,model_outputs,derivatives,kappa_value): return derivatives['d2(u)_dx(2)']
        def boundary_conditions(self,inputs_bc,model_outputs_bc,model,kappa_value):
            x_vals = inputs_bc['x'].squeeze()
            loss = torch.tensor(0.0, device=model_outputs_bc.device)
            u_at_0 = model_outputs_bc[x_vals == 0.0] - 0.0
            u_at_1 = model_outputs_bc[x_vals == 1.0] - 1.0
            if u_at_0.numel() > 0: loss += torch.mean(u_at_0**2)
            if u_at_1.numel() > 0: loss += torch.mean(u_at_1**2)
            return loss
        def get_ground_truth(self,inputs,kappa_value):
            x = inputs['x']
            return x.clone() # u(x) = x
        def get_required_derivative_orders(self) -> Dict[str, Dict[Tuple[str, ...], int]]:
            return {
                'u': {       # For output variable 'u'
                    ('x',): 2  # We need up to d2u/dx2
                }
        }


    class PoissonPDE(PDEProblem):
        def __init__(self):
            super().__init__(name="Poisson", input_vars=['x'])
            self.forcing_fn = lambda x: torch.sin(np.pi * x)
            self.analytical_sol_np = lambda x_np: (1.0 / (np.pi**2)) * np.sin(np.pi * x_np.squeeze())
        def get_domain_bounds(self): return {'x': (0.0, 1.0)}
        def pde_residual(self,inputs,model_outputs,derivatives,kappa_value):
            return derivatives['d2(u)_dx(2)'] + self.forcing_fn(inputs['x'])
        def boundary_conditions(self,inputs_bc,model_outputs_bc,model,kappa_value):
            x_vals = inputs_bc['x'].squeeze()
            loss = torch.tensor(0.0, device=model_outputs_bc.device)
            u_at_0 = model_outputs_bc[x_vals == 0.0] - 0.0
            u_at_1 = model_outputs_bc[x_vals == 1.0] - 0.0
            if u_at_0.numel() > 0: loss += torch.mean(u_at_0**2)
            if u_at_1.numel() > 0: loss += torch.mean(u_at_1**2)
            return loss
        def get_ground_truth(self,inputs,kappa_value):
            x_np = inputs['x'].detach().cpu().numpy()
            u_true_np = self.analytical_sol_np(x_np)
            return torch.tensor(u_true_np, dtype=torch.float32, device=inputs['x'].device).reshape_as(inputs['x'])
        def get_required_derivative_orders(self) -> Dict[str, Dict[Tuple[str, ...], int]]:
            return {
                'u': { # Assuming output_vars = ['u']
                    ('x',): 2
                }
    }

    # --- Setup for ExperimentRunner ---
    pde_instances_map = {
        "TrivialLinear": TrivialLinearPDE(),
        "Poisson": PoissonPDE(),
        # Add BurgersPDE(), KdVPDE() instances here once implemented
    }

    runner = ExperimentRunner(base_results_dir="experiment_data_final/", pde_map=pde_instances_map, device=DEVICE)

    # --- Define Experiment Sweeps ---
    # For a quick test:
    configs_to_run = []
    test_pdes = ["Poisson"] # Or ["TrivialLinear", "Poisson"]
    test_widths = [20, 50] # Reduced N for speed
    test_kappas_poisson = [1.0] # Poisson kappa is fixed
    test_activations = ["tanh"]
    test_seeds = [1]
    test_epochs = 2000 # Reduced for quick test

    for pde_name in test_pdes:
        kappas_for_this_pde = test_kappas_poisson # In a real scenario, fetch from a KAPPA_VALS_MAP
        # if pde_name == "Burgers": kappas_for_this_pde = [10.0, 100.0] etc.

        for kappa_v in kappas_for_this_pde:
            for width_v in test_widths:
                for act_v in test_activations:
                    for seed_v in test_seeds:
                        configs_to_run.append(ExperimentConfig(
                            pde_name=pde_name,
                            kappa_val=kappa_v,
                            activation_str=act_v,
                            seed=seed_v,
                            depth=1, # SLN
                            width=width_v,
                            M_collocation_factor=10,
                            collocation_scheme="uniform", # or "sobol"
                            optimizer_type="adam",
                            lr=1e-3,
                            epochs=test_epochs,
                        ))

    for cfg in configs_to_run:
        runner.run_single_experiment(cfg)

    print("\n--- Example: How to load and quickly check a result ---")
    if configs_to_run:
        example_run_dir = runner._get_run_dir(configs_to_run[0])
        try:
            df_log = pd.read_csv(os.path.join(example_run_dir, "training_log.csv"))
            print(f"Log for first experiment ({configs_to_run[0].pde_name}, N={configs_to_run[0].width}):")
            print(df_log[['epoch', 'loss_total', 'L2_err_rel']].tail())
            with open(os.path.join(example_run_dir, "summary.json"), 'r') as f:
                summary = json.load(f)
            print("\nSummary of final metrics:")
            for k,v in summary['final_metrics'].items():
                print(f"  {k}: {v:.3e}" if isinstance(v, float) else f"  {k}: {v}")
        except FileNotFoundError:
            print(f"Could not find results for example run at: {example_run_dir}")

Using device: cpu

--- Running Experiment: experiment_data_final/Poisson\kappa_1p0e00\act_tanh\N_20\D_1\seed_1 ---
Config: ExperimentConfig(pde_name='Poisson', kappa_val=1.0, activation_str='tanh', seed=1, depth=1, width=20, optimizer_type='adam', lr=0.001, epochs=2000, log_epochs_list=[0, 1000, 2000], num_test_pts_error_grid=1001, loss_weight_pde=1.0, loss_weight_bc=1.0, loss_weight_ic=1.0, M_collocation_pts=None, M_collocation_factor=10, num_total_bc_pts=None, num_bc_pts_per_face=None, num_ic_pts=None, collocation_scheme='uniform')
Epoch 0/2000, Loss: 4.951e-01, L2_err_rel: 9.635e-01, GradNorm: 0.000e+00
Epoch 1000/2000, Loss: 3.295e-02, L2_err_rel: 7.841e-02, GradNorm: 9.651e-02
Epoch 2000/2000, Loss: 1.830e-04, L2_err_rel: 2.337e-03, GradNorm: 6.572e-03
Training finished. Total active time: 3.33s
Finished experiment. Final L2_err_rel: 2.337e-03

--- Running Experiment: experiment_data_final/Poisson\kappa_1p0e00\act_tanh\N_50\D_1\seed_1 ---
Config: ExperimentConfig(pde_name='Poisson