In [None]:
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
import tensorflow_probability as tfp
import os
import time
import pickle
from matplotlib.gridspec import GridSpec
from mpl_toolkits.axes_grid1 import make_axes_locatable
import import_ipynb

import Mathematical_Framework_Setup as nb1

np.random.seed(1234)
tf.random.set_seed(1234)





TensorFlow version: 2.19.0
TensorFlow Probability version: 0.25.0
No GPUs available, using CPU

Physical parameters initialized:
Diffusion coefficients: D_SiH4 = 1e-05, D_Si = 5e-06, D_H2 = 4e-05, D_SiH2 = 1.5e-05
Thermal parameters: k = 0.1, Cp = 700.0, ρ = 1.0
Reaction parameters: A1 = 1000000.0, E1 = 150000.0, A2 = 200000.0, E2 = 120000.0, A3 = 300000.0, E3 = 100000.0
Gas constant: R = 8.314

Domain bounds:
x_min = 0.0
x_max = 0.1
y_min = 0.0
y_max = 0.05
t_min = 0.0
t_max = 10.0

Test input shape: (5, 3)
Test output shape: (5, 5)

Computed gradients:
dy_dx shape: (5, 5, 3)
y_x shape: (5, 5)
y_y shape: (5, 5)
y_t shape: (5, 5)
y_xx shape: (5, 5)
y_yy shape: (5, 5)

Computed residuals:
Residual 1 shape: (5, 1)
Residual 2 shape: (5, 1)
Residual 3 shape: (5, 1)
Residual 4 shape: (5, 1)
Residual 5 shape: (5, 1)

Generated collocation points shape: (10, 3)
Generated boundary points:
inlet shape: (5, 3)
substrate shape: (5, 3)
left_wall shape: (5, 3)
right_wall shape: (5, 3)
Generated 

In [2]:
#---------------------------------------------------------------------------
# Section 1: Traditional PINN Trainer
#---------------------------------------------------------------------------

class TraditionalPINNTrainer:
    """
    Trainer for traditional PINN with deterministic optimization
    Used as a baseline for comparison with Entropy-Langevin approach
    """
    
    def __init__(self, model, phys_params, domain_bounds):
        """
        Initialize the traditional PINN trainer
        
        Parameters:
        -----------
        model : PINN
            The PINN model to train
        phys_params : CVDPhysicalParams
            Object containing physical parameters
        domain_bounds : dict
            Dictionary with domain bounds
        """
        self.model = model
        self.phys_params = phys_params
        self.domain_bounds = domain_bounds
        
        # Create PDE residual calculator
        self.pde_calculator = nb1.CVDPDE(phys_params)
        
        # Create data generator
        self.data_generator = nb1.CVDDataGenerator(domain_bounds)
        
        # Initialize optimizer
        self.optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)
        
        # Initialize loss history
        self.loss_history = {
            'total': [],
            'pde': [],
            'bc': [],
            'ic': []
        }
    
    def compute_pde_loss(self, x_collocation):
        """
        Compute PDE residual loss
        
        Parameters:
        -----------
        x_collocation : tf.Tensor
            Collocation points for PDE residuals
            
        Returns:
        --------
        tf.Tensor
            Mean squared PDE residual
        """
        # Get model predictions
        y_pred = self.model(x_collocation)
        
        # Get derivatives
        derivatives = self.model.get_gradients(x_collocation, y_pred)
        
        # Compute PDE residuals
        residuals = self.pde_calculator.compute_residuals(
            x_collocation, y_pred, derivatives
        )
        
        # Compute mean squared residual for each equation
        mse_SiH4 = tf.reduce_mean(tf.square(residuals[0]))
        mse_Si = tf.reduce_mean(tf.square(residuals[1]))
        mse_H2 = tf.reduce_mean(tf.square(residuals[2]))
        mse_SiH2 = tf.reduce_mean(tf.square(residuals[3]))
        mse_T = tf.reduce_mean(tf.square(residuals[4]))
        
        # Combine all residuals
        total_pde_loss = mse_SiH4 + mse_Si + mse_H2 + mse_SiH2 + mse_T
        
        return total_pde_loss
    
    def compute_bc_loss(self, boundary_points):
        """
        Compute boundary condition loss
        
        Parameters:
        -----------
        boundary_points : dict
            Dictionary with boundary points for each boundary
            
        Returns:
        --------
        tf.Tensor
            Boundary condition loss
        """
        total_bc_loss = 0.0
        
        # Inlet boundary conditions (y = y_min)
        inlet_points = boundary_points['inlet']
        inlet_pred = self.model(inlet_points)
        
        # At inlet: SiH4 = 0.2, T = 350
        inlet_SiH4_target = 0.2 * tf.ones_like(inlet_pred[:, 0:1])
        inlet_T_target = 350.0 * tf.ones_like(inlet_pred[:, 4:5])
        
        inlet_loss = tf.reduce_mean(tf.square(inlet_pred[:, 0:1] - inlet_SiH4_target)) + \
                     tf.reduce_mean(tf.square(inlet_pred[:, 4:5] - inlet_T_target))
        
        # Substrate boundary conditions (y = y_max)
        substrate_points = boundary_points['substrate']
        substrate_pred = self.model(substrate_points)
        
        # At substrate: T = 700
        substrate_T_target = 700.0 * tf.ones_like(substrate_pred[:, 4:5])
        
        substrate_loss = tf.reduce_mean(tf.square(substrate_pred[:, 4:5] - substrate_T_target))
        
        # Wall boundary conditions (no-flux)
        # We'll simplify this by setting zero gradients at walls
        # This could be implemented more rigorously with proper gradient calculations
        
        # Combine all boundary losses
        total_bc_loss = inlet_loss + substrate_loss
        
        return total_bc_loss
    
    def compute_ic_loss(self, initial_points):
        """
        Compute initial condition loss
        
        Parameters:
        -----------
        initial_points : tf.Tensor
            Initial condition points
            
        Returns:
        --------
        tf.Tensor
            Initial condition loss
        """
        # Get model predictions at initial points
        initial_pred = self.model(initial_points)
        
        # Initial conditions:
        # SiH4 = 0.1 (uniform low concentration)
        # Si = 0.0 (no silicon initially)
        # H2 = 0.0 (no hydrogen initially)
        # SiH2 = 0.0 (no silylene initially)
        # T = 300.0 (room temperature)
        
        initial_targets = tf.concat([
            0.1 * tf.ones_like(initial_pred[:, 0:1]),  # SiH4
            0.0 * tf.ones_like(initial_pred[:, 1:2]),  # Si
            0.0 * tf.ones_like(initial_pred[:, 2:3]),  # H2
            0.0 * tf.ones_like(initial_pred[:, 3:4]),  # SiH2
            300.0 * tf.ones_like(initial_pred[:, 4:5])  # T
        ], axis=1)
        
        # Compute mean squared error
        initial_loss = tf.reduce_mean(tf.square(initial_pred - initial_targets))
        
        return initial_loss
    
    @tf.function
    def train_step(self, x_collocation, boundary_points, initial_points):
        """
        Perform one training step
        
        Parameters:
        -----------
        x_collocation : tf.Tensor
            Collocation points for PDE residuals
        boundary_points : dict
            Dictionary with boundary points
        initial_points : tf.Tensor
            Initial condition points
            
        Returns:
        --------
        tuple
            (total_loss, pde_loss, bc_loss, ic_loss)
        """
        with tf.GradientTape() as tape:
            # Compute losses
            pde_loss = self.compute_pde_loss(x_collocation)
            bc_loss = self.compute_bc_loss(boundary_points)
            ic_loss = self.compute_ic_loss(initial_points)
            
            # Weight the losses
            # You may need to tune these weights for your specific problem
            weighted_pde_loss = 1.0 * pde_loss
            weighted_bc_loss = 10.0 * bc_loss
            weighted_ic_loss = 10.0 * ic_loss
            
            # Compute total loss
            total_loss = weighted_pde_loss + weighted_bc_loss + weighted_ic_loss
        
        # Compute gradients
        gradients = tape.gradient(total_loss, self.model.trainable_variables)
        
        # Apply gradients
        self.optimizer.apply_gradients(zip(gradients, self.model.trainable_variables))
        
        return total_loss, pde_loss, bc_loss, ic_loss
    
    def train(self, n_epochs, n_collocation_points=5000, batch_size=None, print_frequency=100):
        """
        Train the PINN model
        
        Parameters:
        -----------
        n_epochs : int
            Number of training epochs
        n_collocation_points : int
            Number of collocation points for PDE residuals
        batch_size : int or None
            Batch size for mini-batch training (None for full batch)
        print_frequency : int
            Frequency of printing and plotting results
            
        Returns:
        --------
        dict
            Loss history
        """
        print("Starting traditional PINN training...")
        start_time = time.time()
        
        # Generate training data once
        x_collocation = self.data_generator.generate_collocation_points(n_collocation_points)
        x_collocation = tf.convert_to_tensor(x_collocation, dtype=tf.float32)
        
        boundary_points = self.data_generator.generate_boundary_points(n_collocation_points // 10)
        # Convert to tensors
        for key in boundary_points:
            boundary_points[key] = tf.convert_to_tensor(boundary_points[key], dtype=tf.float32)
        
        initial_points = self.data_generator.generate_initial_points(n_collocation_points // 10)
        initial_points = tf.convert_to_tensor(initial_points, dtype=tf.float32)
        
        # Mini-batch training
        if batch_size is not None:
            # Implement mini-batch training with TensorFlow's Dataset API
            dataset = tf.data.Dataset.from_tensor_slices((x_collocation,))
            dataset = dataset.shuffle(buffer_size=n_collocation_points).batch(batch_size)
            
            # We'll need to modify the training loop for batching
            # This is a simplified version without batching boundary and initial points
            for epoch in range(n_epochs):
                total_pde_loss = 0.0
                num_batches = 0
                
                for batch in dataset:
                    x_batch = batch
                    
                    # Perform training step
                    total_loss, pde_loss, bc_loss, ic_loss = self.train_step(
                        x_batch, boundary_points, initial_points
                    )
                    
                    total_pde_loss += pde_loss
                    num_batches += 1
                
                # Compute average losses
                avg_pde_loss = total_pde_loss / num_batches
                
                # Update loss history
                self.loss_history['total'].append(total_loss.numpy())
                self.loss_history['pde'].append(avg_pde_loss.numpy())
                self.loss_history['bc'].append(bc_loss.numpy())
                self.loss_history['ic'].append(ic_loss.numpy())
                
                # Print progress
                if (epoch + 1) % print_frequency == 0:
                    elapsed = time.time() - start_time
                    print(f"Epoch {epoch+1}/{n_epochs}, "
                          f"Loss: {total_loss.numpy():.6e}, "
                          f"PDE: {avg_pde_loss.numpy():.6e}, "
                          f"BC: {bc_loss.numpy():.6e}, "
                          f"IC: {ic_loss.numpy():.6e}, "
                          f"Time: {elapsed:.2f}s")
        else:
            # Full batch training
            for epoch in range(n_epochs):
                # Perform training step
                total_loss, pde_loss, bc_loss, ic_loss = self.train_step(
                    x_collocation, boundary_points, initial_points
                )
                
                # Update loss history
                self.loss_history['total'].append(total_loss.numpy())
                self.loss_history['pde'].append(pde_loss.numpy())
                self.loss_history['bc'].append(bc_loss.numpy())
                self.loss_history['ic'].append(ic_loss.numpy())
                
                # Print progress
                if (epoch + 1) % print_frequency == 0:
                    elapsed = time.time() - start_time
                    print(f"Epoch {epoch+1}/{n_epochs}, "
                          f"Loss: {total_loss.numpy():.6e}, "
                          f"PDE: {pde_loss.numpy():.6e}, "
                          f"BC: {bc_loss.numpy():.6e}, "
                          f"IC: {ic_loss.numpy():.6e}, "
                          f"Time: {elapsed:.2f}s")
        
        total_time = time.time() - start_time
        print(f"Training completed in {total_time:.2f} seconds.")
        
        return self.loss_history
    
    def save_model(self, filename):
        """
        Save the trained model
        
        Parameters:
        -----------
        filename : str
            Filename to save the model
        """
        self.model.save_weights(filename)
    
    def load_model(self, filename):
        """
        Load a trained model
        
        Parameters:
        -----------
        filename : str
            Filename to load the model from
        """
        self.model.load_weights(filename)
    
    def predict(self, x_test):
        """
        Make predictions with the trained model
        
        Parameters:
        -----------
        x_test : np.ndarray or tf.Tensor
            Test points
            
        Returns:
        --------
        np.ndarray
            Model predictions
        """
        # Convert to tensor if numpy array
        if isinstance(x_test, np.ndarray):
            x_test = tf.convert_to_tensor(x_test, dtype=tf.float32)
        
        # Make predictions
        y_pred = self.model(x_test)
        
        return y_pred.numpy()
    
    def plot_loss_history(self):
        """Plot the loss history during training"""
        plt.figure(figsize=(12, 8))
        
        epochs = range(1, len(self.loss_history['total']) + 1)
        
        plt.semilogy(epochs, self.loss_history['total'], 'k-', linewidth=2, label='Total Loss')
        plt.semilogy(epochs, self.loss_history['pde'], 'r-', linewidth=1.5, label='PDE Loss')
        plt.semilogy(epochs, self.loss_history['bc'], 'b-', linewidth=1.5, label='BC Loss')
        plt.semilogy(epochs, self.loss_history['ic'], 'g-', linewidth=1.5, label='IC Loss')
        
        plt.xlabel('Epochs', fontsize=14)
        plt.ylabel('Loss', fontsize=14)
        plt.title('Loss History (Traditional PINN)', fontsize=16)
        plt.legend(fontsize=12)
        plt.grid(True, which='both', linestyle='--', alpha=0.6)
        
        plt.tight_layout()
        plt.savefig(r'D:\Documents\UNAI_Notes\CVD-PINN-Project\notebooks\figures\traditional_pinn_loss.png', dpi=300, bbox_inches='tight')
        plt.show()



In [4]:
#---------------------------------------------------------------------------
# Section 2: Entropy-Langevin PINN Trainer
#---------------------------------------------------------------------------

class EntropyLangevinPINNTrainer:
    """
    Trainer implementing the Entropy-Langevin algorithm for ensemble of PINNs
    """
    
    def __init__(self, models, phys_params, domain_bounds, alpha=0.1, beta=10.0, learning_rate=1e-3):
        """
        Initialize the Entropy-Langevin PINN trainer
        
        Parameters:
        -----------
        models : list
            List of PINN models in the ensemble
        phys_params : CVDPhysicalParams
            Object containing physical parameters
        domain_bounds : dict
            Dictionary with domain bounds
        alpha : float
            Initial entropy weight parameter
        beta : float
            Initial inverse temperature parameter
        learning_rate : float
            Learning rate for optimizer
        """
        self.models = models
        self.num_models = len(models)
        self.phys_params = phys_params
        self.domain_bounds = domain_bounds
        
        # Create PDE residual calculator
        self.pde_calculator = nb1.CVDPDE(phys_params)
        
        # Create data generator
        self.data_generator = nb1.CVDDataGenerator(domain_bounds)
        
        # Initialize entropy regularization
        self.entropy_reg = nb1.EntropyRegularizedLoss(alpha, beta)
        
        # Initialize optimizers (one for each model)
        self.optimizers = [tf.keras.optimizers.Adam(learning_rate=learning_rate) 
                          for _ in range(self.num_models)]
        
        # Initialize loss history
        self.loss_history = {
            'total': [],
            'pde': [[] for _ in range(self.num_models)],
            'bc': [[] for _ in range(self.num_models)],
            'ic': [[] for _ in range(self.num_models)],
            'entropy': []
        }
    
    def compute_pde_loss(self, model_idx, x_collocation):
        """
        Compute PDE residual loss for a specific model
        
        Parameters:
        -----------
        model_idx : int
            Index of the model in the ensemble
        x_collocation : tf.Tensor
            Collocation points for PDE residuals
            
        Returns:
        --------
        tf.Tensor
            Mean squared PDE residual
        """
        # Get model
        model = self.models[model_idx]
        
        # Get model predictions
        y_pred = model(x_collocation)
        
        # Get derivatives
        derivatives = model.get_gradients(x_collocation, y_pred)
        
        # Compute PDE residuals
        residuals = self.pde_calculator.compute_residuals(
            x_collocation, y_pred, derivatives
        )
        
        # Compute mean squared residual for each equation
        mse_SiH4 = tf.reduce_mean(tf.square(residuals[0]))
        mse_Si = tf.reduce_mean(tf.square(residuals[1]))
        mse_H2 = tf.reduce_mean(tf.square(residuals[2]))
        mse_SiH2 = tf.reduce_mean(tf.square(residuals[3]))
        mse_T = tf.reduce_mean(tf.square(residuals[4]))
        
        # Combine all residuals
        total_pde_loss = mse_SiH4 + mse_Si + mse_H2 + mse_SiH2 + mse_T
        
        return total_pde_loss
    
    def compute_bc_loss(self, model_idx, boundary_points):
        """
        Compute boundary condition loss for a specific model
        
        Parameters:
        -----------
        model_idx : int
            Index of the model in the ensemble
        boundary_points : dict
            Dictionary with boundary points for each boundary
            
        Returns:
        --------
        tf.Tensor
            Boundary condition loss
        """
        # Get model
        model = self.models[model_idx]
        
        total_bc_loss = 0.0
        
        # Inlet boundary conditions (y = y_min)
        inlet_points = boundary_points['inlet']
        inlet_pred = model(inlet_points)
        
        # At inlet: SiH4 = 0.2, T = 350
        inlet_SiH4_target = 0.2 * tf.ones_like(inlet_pred[:, 0:1])
        inlet_T_target = 350.0 * tf.ones_like(inlet_pred[:, 4:5])
        
        inlet_loss = tf.reduce_mean(tf.square(inlet_pred[:, 0:1] - inlet_SiH4_target)) + \
                     tf.reduce_mean(tf.square(inlet_pred[:, 4:5] - inlet_T_target))
        
        # Substrate boundary conditions (y = y_max)
        substrate_points = boundary_points['substrate']
        substrate_pred = model(substrate_points)
        
        # At substrate: T = 700
        substrate_T_target = 700.0 * tf.ones_like(substrate_pred[:, 4:5])
        
        substrate_loss = tf.reduce_mean(tf.square(substrate_pred[:, 4:5] - substrate_T_target))
        
        # Combine all boundary losses
        total_bc_loss = inlet_loss + substrate_loss
        
        return total_bc_loss
    
    def compute_ic_loss(self, model_idx, initial_points):
        """
        Compute initial condition loss for a specific model
        
        Parameters:
        -----------
        model_idx : int
            Index of the model in the ensemble
        initial_points : tf.Tensor
            Initial condition points
            
        Returns:
        --------
        tf.Tensor
            Initial condition loss
        """
        # Get model
        model = self.models[model_idx]
        
        # Get model predictions at initial points
        initial_pred = model(initial_points)
        
        initial_targets = tf.concat([
            0.1 * tf.ones_like(initial_pred[:, 0:1]),  # SiH4
            0.0 * tf.ones_like(initial_pred[:, 1:2]),  # Si
            0.0 * tf.ones_like(initial_pred[:, 2:3]),  # H2
            0.0 * tf.ones_like(initial_pred[:, 3:4]),  # SiH2
            300.0 * tf.ones_like(initial_pred[:, 4:5])  # T
        ], axis=1)
        
        initial_loss = tf.reduce_mean(tf.square(initial_pred - initial_targets))
        
        return initial_loss
    
    def train_step(self, epoch, x_collocation, boundary_points, initial_points):
        """
        Perform one training step for the ensemble
        
        Parameters:
        -----------
        epoch : int
            Current epoch number
        x_collocation : tf.Tensor
            Collocation points for PDE residuals
        boundary_points : dict
            Dictionary with boundary points
        initial_points : tf.Tensor
            Initial condition points
            
        Returns:
        --------
        tuple
            (total_losses, pde_losses, bc_losses, ic_losses)
        """
        # Update entropy-Langevin parameters
        self.entropy_reg.update_parameters(epoch, self.n_epochs)
        
        # Initialize lists to store losses and gradients
        total_losses = []
        pde_losses = []
        bc_losses = []
        ic_losses = []
        all_gradients = []
        
        # Step 1: Compute losses and gradients for all models
        for i in range(self.num_models):
            with tf.GradientTape() as tape:
                # Compute losses
                pde_loss = self.compute_pde_loss(i, x_collocation)
                bc_loss = self.compute_bc_loss(i, boundary_points)
                ic_loss = self.compute_ic_loss(i, initial_points)
                
                # Weight the losses
                weighted_pde_loss = 1.0 * pde_loss
                weighted_bc_loss = 10.0 * bc_loss
                weighted_ic_loss = 10.0 * ic_loss
                
                # Compute total loss (without entropy regularization for now)
                total_loss = weighted_pde_loss + weighted_bc_loss + weighted_ic_loss
            
            # Store losses
            total_losses.append(total_loss)
            pde_losses.append(pde_loss)
            bc_losses.append(bc_loss)
            ic_losses.append(ic_loss)
            
            # Compute gradients
            gradients = tape.gradient(total_loss, self.models[i].trainable_variables)
            all_gradients.append(gradients)
        
        # Step 2: Compute ensemble average gradient
        # This is a simplified approach; in practice, we need to handle variable shapes properly
        avg_gradients = []
        for var_idx in range(len(all_gradients[0])):
            avg_grad = tf.zeros_like(all_gradients[0][var_idx])
            for model_idx in range(self.num_models):
                avg_grad += all_gradients[model_idx][var_idx]
            avg_grad /= self.num_models
            avg_gradients.append(avg_grad)
        
        # Step 3: Apply modified gradients with Langevin dynamics
        for i in range(self.num_models):
            # Apply entropy regularization to gradients
            modified_gradients = []
            for var_idx, grad in enumerate(all_gradients[i]):
                # Entropy feedback term: -α*β*(∇L - E[∇L])
                entropy_feedback = -self.entropy_reg.alpha * self.entropy_reg.beta * (grad - avg_gradients[var_idx])
                
                # Modified gradient: ∇L + entropy_feedback
                modified_grad = grad + entropy_feedback
                
                # Add Langevin noise: √(2η/β) * N(0,1)
                noise_scale = tf.sqrt(2 * self.optimizers[i].learning_rate / self.entropy_reg.beta)
                noise = noise_scale * tf.random.normal(shape=grad.shape)
                
                # Final modified gradient with Langevin noise
                final_modified_grad = modified_grad + noise
                
                modified_gradients.append(final_modified_grad)
            
            # Apply gradients
            self.optimizers[i].apply_gradients(zip(modified_gradients, self.models[i].trainable_variables))
        
        # Convert lists to tensors for return
        total_losses = tf.stack(total_losses)
        pde_losses = tf.stack(pde_losses)
        bc_losses = tf.stack(bc_losses)
        ic_losses = tf.stack(ic_losses)
        
        return total_losses, pde_losses, bc_losses, ic_losses
    
    def train(self, n_epochs, n_collocation_points=5000, print_frequency=100):
        """
        Train the ensemble of PINN models
        
        Parameters:
        -----------
        n_epochs : int
            Number of training epochs
        n_collocation_points : int
            Number of collocation points for PDE residuals
        print_frequency : int
            Frequency of printing results
            
        Returns:
        --------
        dict
            Loss history
        """
        print("Starting Entropy-Langevin PINN training...")
        start_time = time.time()
        
        self.n_epochs = n_epochs  # Store for use in parameter scheduling
        
        # Generate training data once
        x_collocation = self.data_generator.generate_collocation_points(n_collocation_points)
        x_collocation = tf.convert_to_tensor(x_collocation, dtype=tf.float32)
        
        boundary_points = self.data_generator.generate_boundary_points(n_collocation_points // 10)
        # Convert to tensors
        for key in boundary_points:
            boundary_points[key] = tf.convert_to_tensor(boundary_points[key], dtype=tf.float32)
        
        initial_points = self.data_generator.generate_initial_points(n_collocation_points // 10)
        initial_points = tf.convert_to_tensor(initial_points, dtype=tf.float32)
        
        # Training loop
        avg_total_loss = 0.0
        
        for epoch in range(n_epochs):
            # Perform one training step for all models
            total_losses, pde_losses, bc_losses, ic_losses = self.train_step(
                epoch, x_collocation, boundary_points, initial_points
            )
            
            # Compute average losses across ensemble
            avg_total_loss = tf.reduce_mean(total_losses)
            avg_pde_loss = tf.reduce_mean(pde_losses)
            avg_bc_loss = tf.reduce_mean(bc_losses)
            avg_ic_loss = tf.reduce_mean(ic_losses)
            
            # Update loss history
            self.loss_history['total'].append(avg_total_loss.numpy())
            for i in range(self.num_models):
                self.loss_history['pde'][i].append(pde_losses[i].numpy())
                self.loss_history['bc'][i].append(bc_losses[i].numpy())
                self.loss_history['ic'][i].append(ic_losses[i].numpy())
            
            # Print progress
            if (epoch + 1) % print_frequency == 0:
                elapsed = time.time() - start_time
                print(f"Epoch {epoch+1}/{n_epochs}, "
                      f"Avg Loss: {avg_total_loss.numpy():.6e}, "
                      f"Avg PDE: {avg_pde_loss.numpy():.6e}, "
                      f"Avg BC: {avg_bc_loss.numpy():.6e}, "
                      f"Avg IC: {avg_ic_loss.numpy():.6e}, "
                      f"Alpha: {self.entropy_reg.alpha.numpy():.4f}, "
                      f"Beta: {self.entropy_reg.beta.numpy():.2f}, "
                      f"Time: {elapsed:.2f}s")
        
        total_time = time.time() - start_time
        print(f"Training completed in {total_time:.2f} seconds.")
        
        return self.loss_history
    
    def save_models(self, base_filename):
        """
        Save all models in the ensemble
        
        Parameters:
        -----------
        base_filename : str
            Base filename to save the models
        """
        for i, model in enumerate(self.models):
            filename = f"{base_filename}_model_{i}.h5"
            model.save_weights(filename)
        
        # Save loss history
        with open(f"{base_filename}_loss_history.pkl", 'wb') as f:
            pickle.dump(self.loss_history, f)
    
    def load_models(self, base_filename):
        """
        Load all models in the ensemble
        
        Parameters:
        -----------
        base_filename : str
            Base filename to load the models from
        """
        for i, model in enumerate(self.models):
            filename = f"{base_filename}_model_{i}.h5"
            model.load_weights(filename)
        
        # Load loss history if available
        try:
            with open(f"{base_filename}_loss_history.pkl", 'rb') as f:
                self.loss_history = pickle.load(f)
        except:
            print("No loss history found.")
    
    def predict_ensemble(self, x_test):
        """
        Make predictions with all models in the ensemble
        
        Parameters:
        -----------
        x_test : np.ndarray or tf.Tensor
            Test points
            
        Returns:
        --------
        tuple
            (mean_prediction, std_prediction)
        """
        # Convert to tensor if numpy array
        if isinstance(x_test, np.ndarray):
            x_test = tf.convert_to_tensor(x_test, dtype=tf.float32)
        
        # Make predictions with all models
        predictions = []
        for model in self.models:
            y_pred = model(x_test)
            predictions.append(y_pred.numpy())
        
        # Stack predictions
        predictions = np.stack(predictions, axis=0)
        
        # Compute mean and standard deviation
        mean_prediction = np.mean(predictions, axis=0)
        std_prediction = np.std(predictions, axis=0)
        
        return mean_prediction, std_prediction
    
    def plot_loss_history(self):
        """Plot the loss history during training"""
        plt.figure(figsize=(12, 8))
        
        epochs = range(1, len(self.loss_history['total']) + 1)
        
        # Plot total loss
        plt.semilogy(epochs, self.loss_history['total'], 'k-', linewidth=2, label='Avg Total Loss')
        
        # Plot individual model PDE losses with transparency
        for i in range(self.num_models):
            plt.semilogy(epochs, self.loss_history['pde'][i], 'r-', linewidth=0.5, alpha=0.3)
        
        # Plot average PDE loss
        avg_pde_loss = np.mean([self.loss_history['pde'][i] for i in range(self.num_models)], axis=0)
        plt.semilogy(epochs, avg_pde_loss, 'r-', linewidth=1.5, label='Avg PDE Loss')
        
        # Plot average BC loss
        avg_bc_loss = np.mean([self.loss_history['bc'][i] for i in range(self.num_models)], axis=0)
        plt.semilogy(epochs, avg_bc_loss, 'b-', linewidth=1.5, label='Avg BC Loss')
        
        # Plot average IC loss
        avg_ic_loss = np.mean([self.loss_history['ic'][i] for i in range(self.num_models)], axis=0)
        plt.semilogy(epochs, avg_ic_loss, 'g-', linewidth=1.5, label='Avg IC Loss')
        
        plt.xlabel('Epochs', fontsize=14)
        plt.ylabel('Loss', fontsize=14)
        plt.title('Loss History (Entropy-Langevin PINN)', fontsize=16)
        plt.legend(fontsize=12)
        plt.grid(True, which='both', linestyle='--', alpha=0.6)
        
        plt.tight_layout()
        plt.savefig('figures/entropy_langevin_pinn_loss.png', dpi=300, bbox_inches='tight')
        plt.show()
    
    def visualize_predictions(self, t_idx=5, output_idx=0, nx=50, ny=50, nt=10):
        """
        Visualize predictions with uncertainty quantification
        
        Parameters:
        -----------
        t_idx : int
            Time index to visualize
        output_idx : int
            Output index to visualize (0: SiH4, 1: Si, 2: H2, 3: SiH2, 4: T)
        nx, ny, nt : int
            Number of points in each dimension for visualization grid
        """
        # Species names and titles
        species_names = ["SiH4", "Si", "H2", "SiH2", "Temperature"]
        
        # Generate uniform grid for visualization
        grid_points, grid_shape = self.data_generator.generate_uniform_grid(nx, ny, nt)
        
        # Make predictions
        mean_pred, std_pred = self.predict_ensemble(grid_points)
        
        # Reshape predictions
        mean_pred = mean_pred.reshape(grid_shape[0], grid_shape[1], grid_shape[2], 5)
        std_pred = std_pred.reshape(grid_shape[0], grid_shape[1], grid_shape[2], 5)
        
        # Extract data for the specified time step and output
        x = np.linspace(self.domain_bounds['x_min'], self.domain_bounds['x_max'], nx)
        y = np.linspace(self.domain_bounds['y_min'], self.domain_bounds['y_max'], ny)
        t = np.linspace(self.domain_bounds['t_min'], self.domain_bounds['t_max'], nt)
        
        # Get actual time value
        time_val = t[t_idx]
        
        # Extract mean and std predictions for the specified time and output
        mean_slice = mean_pred[:, :, t_idx, output_idx]
        std_slice = std_pred[:, :, t_idx, output_idx]
        
        # Create meshgrid for plotting
        X, Y = np.meshgrid(x, y, indexing='ij')
        
        # Create a 3x1 grid of plots
        fig = plt.figure(figsize=(18, 6))
        gs = GridSpec(1, 3, figure=fig)
        
        # Plot mean prediction
        ax1 = fig.add_subplot(gs[0, 0])
        cf1 = ax1.contourf(X, Y, mean_slice, 50, cmap='viridis')
        plt.colorbar(cf1, ax=ax1, label=f"{species_names[output_idx]}")
        ax1.set_xlabel('x (m)')
        ax1.set_ylabel('y (m)')
        ax1.set_title(f"Mean {species_names[output_idx]} at t = {time_val:.2f}s")
        
        # Plot standard deviation
        ax2 = fig.add_subplot(gs[0, 1])
        cf2 = ax2.contourf(X, Y, std_slice, 50, cmap='plasma')
        plt.colorbar(cf2, ax=ax2, label=f"Std Dev of {species_names[output_idx]}")
        ax2.set_xlabel('x (m)')
        ax2.set_ylabel('y (m)')
        ax2.set_title(f"Uncertainty in {species_names[output_idx]} at t = {time_val:.2f}s")
        
        # Plot coefficient of variation (std/mean)
        ax3 = fig.add_subplot(gs[0, 2])
        # Add a small epsilon to avoid division by zero
        epsilon = 1e-10
        cv = std_slice / (np.abs(mean_slice) + epsilon)
        # Clip extremely high values for better visualization
        cv = np.clip(cv, 0, 0.5)
        cf3 = ax3.contourf(X, Y, cv, 50, cmap='hot')
        plt.colorbar(cf3, ax=ax3, label='Coefficient of Variation')
        ax3.set_xlabel('x (m)')
        ax3.set_ylabel('y (m)')
        ax3.set_title(f"Relative Uncertainty at t = {time_val:.2f}s")
        
        plt.tight_layout()
        plt.savefig(f'notebooks/figures/prediction_{species_names[output_idx]}_t{t_idx}.png', dpi=300, bbox_inches='tight')
        plt.show()



In [6]:
#---------------------------------------------------------------------------
# Section 3: Demo and Testing
#---------------------------------------------------------------------------

def create_pinn_model(hidden_layers=[64, 64, 64, 64, 64, 64], activation='tanh'):
    """Create a PINN model with specified architecture"""
    model = nb1.PINN(hidden_layers, activation)
    # Compile with dummy data to build
    dummy_input = tf.zeros((1, 3))
    _ = model(dummy_input)
    return model

def create_model_ensemble(num_models=10, hidden_layers=[64, 64, 64, 64, 64, 64], activation='tanh'):
    """Create an ensemble of PINN models"""
    models = []
    for i in range(num_models):
        models.append(create_pinn_model(hidden_layers, activation))
    return models

# Demo of Entropy-Langevin training
def demo_entropy_langevin(train_models=True, compare_with_traditional=True, n_epochs=1000):
    """
    Demonstrate Entropy-Langevin PINN training compared to traditional training
    
    Parameters:
    -----------
    train_models : bool
        Whether to train new models or load existing ones
    compare_with_traditional : bool
        Whether to compare with traditional PINN training
    n_epochs : int
        Number of training epochs
    """
    # Define domain bounds
    domain_bounds = {
        'x_min': 0.0,
        'x_max': 0.1,
        'y_min': 0.0,
        'y_max': 0.05,
        't_min': 0.0,
        't_max': 10.0
    }
    
    # Create physical parameters
    phys_params = nb1.CVDPhysicalParams()
    
    if compare_with_traditional:
        # Create traditional PINN
        traditional_model = create_pinn_model()
        traditional_trainer = TraditionalPINNTrainer(traditional_model, phys_params, domain_bounds)
        
        if train_models:
            # Train traditional PINN
            print("\n====== Training Traditional PINN ======")
            traditional_loss_history = traditional_trainer.train(n_epochs=n_epochs)
            
            # Save traditional model
            traditional_trainer.save_model("notebooks/models/traditional_pinn.h5")
        else:
            # Load traditional model
            print("\n====== Loading Traditional PINN ======")
            traditional_trainer.load_model("notebooks/models/traditional_pinn.h5") 
        
        # Plot traditional loss history
        traditional_trainer.plot_loss_history()
    
    # Create ensemble of PINNs for Entropy-Langevin
    ensemble_size = 10
    ensemble_models = create_model_ensemble(num_models=ensemble_size)
    
    # Create Entropy-Langevin trainer
    el_trainer = EntropyLangevinPINNTrainer(ensemble_models, phys_params, domain_bounds)
    
    if train_models:
        # Train Entropy-Langevin PINN
        print("\n====== Training Entropy-Langevin PINN ======")
        el_loss_history = el_trainer.train(n_epochs=n_epochs)
        
        # Save Entropy-Langevin models
        el_trainer.save_models("notebooks/models/entropy_langevin_pinn.h5")
    else:
        # Load Entropy-Langevin models
        print("\n====== Loading Entropy-Langevin PINN ======")
        el_trainer.load_models("notebooks/models/entropy_langevin_pinn.h5 ")
    
    # Plot Entropy-Langevin loss history
    el_trainer.plot_loss_history()
    
    # Visualize predictions with uncertainty
    print("\n====== Visualizing Predictions ======")
    # Visualize SiH4 concentration
    el_trainer.visualize_predictions(t_idx=5, output_idx=0)
    
    # Visualize Si concentration
    el_trainer.visualize_predictions(t_idx=5, output_idx=1)
    
    # Visualize temperature
    el_trainer.visualize_predictions(t_idx=5, output_idx=4)
    
    return traditional_trainer if compare_with_traditional else None, el_trainer


demo_entropy_langevin(train_models=True)


Starting traditional PINN training...
Epoch 100/1000, Loss: nan, PDE: nan, BC: nan, IC: nan, Time: 296.70s


KeyboardInterrupt: 