In [1]:
!pip install --upgrade "protobuf<5.0.0"

import google.protobuf
print("protobuf version:", google.protobuf.__version__)


Collecting protobuf<5.0.0
  Downloading protobuf-4.25.8-cp37-abi3-manylinux2014_x86_64.whl.metadata (541 bytes)
Downloading protobuf-4.25.8-cp37-abi3-manylinux2014_x86_64.whl (294 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m294.9/294.9 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: protobuf
  Attempting uninstall: protobuf
    Found existing installation: protobuf 6.33.0
    Uninstalling protobuf-6.33.0:
      Successfully uninstalled protobuf-6.33.0
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
bigframes 2.12.0 requires google-cloud-bigquery-storage<3.0.0,>=2.30.0, which is not installed.
opentelemetry-proto 1.37.0 requires protobuf<7.0,>=5.0, but you have protobuf 4.25.8 which is incompatible.
a2a-sdk 0.3.10 requires protobuf>=5.29.5, but you have protobuf 4.25.8 which is incom

In [2]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

2025-11-26 14:46:57.178212: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1764168417.433629      47 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1764168417.508553      47 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [3]:
### Defines the forward pass of NN. (Step 1: Define NN)

# create_model(): defines and returns a d
def create_model():
    model = {
        'hidden_1': tf.keras.layers.Dense(50, activation='tanh'),
        'hidden_2': tf.keras.layers.Dense(50, activation='tanh'),
        'hidden_3': tf.keras.layers.Dense(50, activation='tanh'),
        'output_layer': tf.keras.layers.Dense(1)
    }
    return model

def call_model(model, x):
    x = model['hidden_1'](x)
    x = model['hidden_2'](x)
    x = model['hidden_3'](x)
    x = model['output_layer'](x)
    
    return x

### Define the PDE using tf.GradientTape (Step 2: Compute Derivatives)
'''pde() evaluates the 2nd order PDE of a model's prediction. 
   'tf.GradientTape' is used to compute the 1st y' and 2nd y" derivatives 
   of the model's output. 'tf.GradientTape' returns y" + pi*sin(pi*x)
'''

def pde(x, model):
    """
    Compute the PDE residual for the governing equation:
        y''(x) + pi^2 * sin(pi * x) = 0

    Here, y(x) is approximated by a neural network y_hat(x) = N_theta(x).
    This function returns:
        r(x; theta) = d^2 y_hat / dx^2 + pi^2 * sin(pi * x)
    which should be close to zero at the collocation points.
    """

    # 'persistent=True' allows us to take higher-order derivatives (up to 2nd order here).
    with tf.GradientTape(persistent=True) as tape:
        tape.watch(x)  # Tell the tape to treat x as a differentiable variable

        # Forward pass through the neural network: y_hat(x) = N_theta(x)
        y_pred = call_model(model,x)

        # 1st order derivatives: dy_hat/dx
        y_x = tape.gradient(y_pred,x)

    # 2nd order derivatives: d^2 y_hat/dx^2    
    y_xx = tape.gradient(y_x,x)

    # Free the tape to release memory (no longer needed after second derivative)
    del tape

    # PDE residual: y"(x) + pi^2 * sin(pi * x)
    # For the true solution, this should be 0; during training we drive this residual toward 0.    
    return y_xx + np.pi**2 * tf.sin(np.pi *x)


# Define the loss function (Step 3~4: Define residual and loss function)
def loss(model, x, x_bc, y_bc):
    res = pde(x, model)

    # compute the mean squared error of the PDE residual at interior collocation points
    loss_pde = tf.reduce_mean(tf.square(res))

    # neural network prediction at boundary points x_bc
    y_bc_pred = call_model(model, x_bc)

    # compute the mean squared error of the boundary conditions
    loss_bc = tf.reduce_mean(tf.square(y_bc - y_bc_pred))

    # total loss = PDE loss + boundary-condition loss
    return loss_pde + loss_bc


# Training step (Step 5: Training)
def train_step(model, x, x_bc, y_bc, optimizer):
    """
    Perform one training step:
    1) Compute the total loss (PDE residual + boundary condition loss).
    2) Compute gradients of the loss w.r.t. all trainable variables.
    3) Apply the gradients using the given optimizer.
    """

    # Record operations for automatic differentiation
    with tf.GradientTape() as tape:
        # Forward pass and loss computation
        loss_val = loss(model, x, x_bc, y_bc)

    # Collect all trainable variables from all layers in the model dictionary.
    # This flattens:
    #   [layer1.trainable_variables, layer2.trainable_variables, ...]
    # into a single list: [W1, b1, W2, b2, ..., W_out, b_out] ~ kernel and bias (traini)
    variables = [var
                 for layer in model.values()
                 for var in layer.trainable_variables]

    # Compute gradients of the scalar loss w.r.t. each trainable variable.
    # This is the backward pass: TensorFlow walks the computation graph
    # from 'loss_val' back to each variable and applies the chain rule
    # to obtain dL/dW, dL/db, etc.
    # e.g.: grads = [dL/dW1, dL/db1, dL/dW2, dL/db2, ..., dL/dW_out, dL/db_out]
    grads = tape.gradient(loss_val, variables)

    # Apply gradients to update model parameters (kernel and biases)
    # Apply the gradient updates: for each (grad, var) pair,
    # the optimizer updates var <- var - lr * grad (or Adam-style update, etc.).
    optimizer.apply_gradients(zip(grads, variables))

    # Return the scalar loss value for logging/monitoring
    return loss_val


In [4]:
#### Train/Test process

# Generate interior training data (collocation points) for the PDE domain x ∈ [-1, 1]
x_train = np.linspace(-1, 1, 100).reshape(-1, 1)   # shape: (100, 1)
x_train = tf.convert_to_tensor(x_train, dtype=tf.float32)

# Boundary data: x = -1 and x = 1, with boundary condition y(-1) = 0, y(1) = 0
x_bc = np.array([[-1.0], [1.0]], dtype=np.float32)
y_bc = np.array([[0.0], [0.0]], dtype=np.float32)

# Convert boundary data to tensors (for use with TensorFlow ops)
x_bc = tf.convert_to_tensor(x_bc, dtype=tf.float32)
y_bc = tf.convert_to_tensor(y_bc, dtype=tf.float32)

### Define the PINN model (a dictionary of layers: hidden_1, hidden_2, ..., output_layer)
model = create_model()



### Define the optimizer with a learning rate scheduler

## Option 1: Adam with an exponential decay learning rate (currently active)

# The learning rate starts at 1e-3 and decays by a factor of 0.9 every 1000 steps.
lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate=1e-3,
    decay_steps=1000,
    decay_rate=0.9
)
optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

## Option 2: Adam with a constant learning rate
## Use this if you want a fixed learning rate without decay.
# optimizer = tf.keras.optimizers.Adam(learning_rate=1e-3)

## Option 3: SGD with momentum
## Slower but more "classical" optimizer; sometimes good for fine control.
# optimizer = tf.keras.optimizers.SGD(learning_rate=1e-2, momentum=0.9)

## Option 4: RMSprop
## Often used for non-stationary problems, can be stable for many PINN tasks.
# optimizer = tf.keras.optimizers.RMSprop(learning_rate=1e-3)

## Option 5: Nadam (Adam + Nesterov momentum)
## A variant of Adam that may converge a bit differently on some problems.
# optimizer = tf.keras.optimizers.Nadam(learning_rate=1e-3)



### Training loop

# Total number of training epochs (full passes over the training data)
epochs = 2000  

for epoch in range(epochs):
    # One training step:
    # - Forward pass through the PINN
    # - Compute PDE + boundary losses
    # - Backward pass (compute gradients)
    # - Update all trainable variables (weights and biases) using the optimizer
    loss_value = train_step(model, x_train, x_bc, y_bc, optimizer)

    # Print progress every 1000 epochs (you can change this frequency)
    if epoch % 1000 == 0:
        print(f"Epoch {epoch}: Loss = {loss_value.numpy()}")

2025-11-26 14:47:45.749253: E external/local_xla/xla/stream_executor/cuda/cuda_driver.cc:152] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


Epoch 0: Loss = 47.71510314941406
Epoch 1000: Loss = 0.0009805683512240648


In [None]:
# Predict the solution
x_test = np.linspace(-1, 1, 1000).reshape(-1, 1)
x_test = tf.convert_to_tensor(x_test, dtype=tf.float32)
y_pred = call_model(model, x_test).numpy()

# Analytical solution
y_true = np.sin(np.pi * x_test)


# Plot the results
plt.figure(figsize=(5, 3))
plt.plot(x_test, y_true, 'b-', label='Analytical Solution')
plt.plot(x_test, y_pred, 'r--', label='PINN Solution')
plt.xlabel('x')
plt.ylabel('y')
plt.legend()
plt.title('Comparison of Analytical Solution and PINN Solution')
plt.show()

In [None]:
import numpy as np
import tensorflow as tf
import scipy.optimize  # for L-BFGS-B


#### Two-stage parameter tuning
### Stage 1: Adam pre-training

def train_with_adam(model, x_train, x_bc, y_bc,
                    adam_epochs=2000, print_every=200):
    """
    Stage 1: Use Adam to do rough optimization of the PINN parameters.
    This gets the model into a reasonable basin of attraction before
    running L-BFGS for fine-tuning.
    """
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=1e-3,
        decay_steps=1000,
        decay_rate=0.9
    )
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

    for epoch in range(adam_epochs):
        # One gradient-based update step using Adam
        loss_value = train_step(model, x_train, x_bc, y_bc, optimizer)

        # Optional: monitor training progress
        if epoch % print_every == 0:
            print(f"[Adam] Epoch {epoch}: loss = {loss_value.numpy():.6e}")

    # Return: in-place model update



### 2) Functions for Stage 2 (L-BFGS) 
## If a large scale data (over hundreds of thousands ~ millions) is sampled, 
## minibatch can be used for 2nd stage instead of L-BFGS

def train_with_lbfgs(model, x_train, x_bc, y_bc, maxiter=5000):
    
    # 1) Collect all trainable variables (W, b, etc.) from the model
    variables = [var
                 for layer in model.values()
                 for var in layer.trainable_variables]

    
    # 2) Pre-compute shapes & sizes for each variable
    #    (we will reuse these in both pack and unpack logic)
    shapes = [v.shape for v in variables]
    sizes = [int(np.prod(s)) for s in shapes]  # number of elements per variable

    
    # 3) Create initial parameter vector theta0 (flatten all variables)
    flat_vars = [tf.reshape(v, [-1]) for v in variables]         # list of 1D tensors
    theta0 = tf.concat(flat_vars, axis=0).numpy().astype(np.float64)  # 1D numpy array

    
    # 4) Define the objective function for SciPy: returns (loss, grad)
    def objective(theta_np):
        """
        theta_np: 1D numpy array containing all model parameters.
        Returns:
            loss_value (float)
            grad_np (1D numpy array of gradients)
        """

        # unpack step: update TF variables from theta_np ----
        theta_tf = tf.convert_to_tensor(theta_np, dtype=tf.float32)

        idx = 0
        for v, size, shape in zip(variables, sizes, shapes):
            # slice out the portion of theta corresponding to this variable
            vals = theta_tf[idx:idx + size]
            vals = tf.reshape(vals, shape)
            v.assign(vals)             # in-place update of the TF variable
            idx += size

        
        # compute loss and gradients with current parameters ----
        with tf.GradientTape() as tape:
            loss_val = loss(model, x_train, x_bc, y_bc)

        grads = tape.gradient(loss_val, variables)

        # ---- pack gradients into a single 1D vector ----
        flat_grads = [tf.reshape(g, [-1]) for g in grads]          # list of 1D tensors
        grad_flat = tf.concat(flat_grads, axis=0)                  # single 1D tensor

        # SciPy expects float64 numpy results
        loss_np = float(loss_val.numpy())
        grad_np = grad_flat.numpy().astype(np.float64)

        return loss_np, grad_np

    
    # 5) Call SciPy's L-BFGS-B optimizer
    print("Starting L-BFGS-B (no helper functions)...")

    result = scipy.optimize.minimize(
        fun=objective,
        x0=theta0,
        jac=True,           # objective returns (loss, grad)
        method='L-BFGS-B',
        options={
            'maxiter': maxiter,
            'disp': True,
            'ftol': 1e-12,
        }
    )

    print("L-BFGS-B finished.")
    print("  success :", result.success)
    print("  message :", result.message)
    print("  final loss:", result.fun)

    
    # During optimization, 'objective' kept assigning the updated theta
    # back into the TF variables, so the 'model' is already updated.
    return result

In [None]:
### Full training pipeline

### 1) Prepare data (same as before)
x_train = np.linspace(-1, 1, 100).reshape(-1, 1).astype(np.float32)
x_train = tf.convert_to_tensor(x_train)

x_bc = np.array([[-1.0], [1.0]], dtype=np.float32)
y_bc = np.array([[0.0], [0.0]], dtype=np.float32)
x_bc = tf.convert_to_tensor(x_bc)
y_bc = tf.convert_to_tensor(y_bc)

### 2) Create PINN model
model = create_model()

### 3) Stage 1: Adam pre-training
train_with_adam(model, x_train, x_bc, y_bc,
                adam_epochs=2000, print_every=200)

### 4) Stage 2: L-BFGS-B fine-tuning
lbfgs_result = train_with_lbfgs(model, x_train, x_bc, y_bc,
                                maxiter=5000)

### 5) Evaluate and visualize

## Fine grid for plotting
x_test = np.linspace(-1.0, 1.0, 200).reshape(-1, 1).astype(np.float32)
x_test_tf = tf.convert_to_tensor(x_test)

## PINN prediction after two-stage training
y_pred = call_model(model, x_test_tf).numpy()

## Analytical solution for this PDE:
# y''(x) + pi^2 sin(pi x) = 0, y(-1)=0, y(1)=0  →  y(x) = sin(pi x)
y_true = np.sin(np.pi * x_test)

## Mean squared error on the test grid
mse_test = np.mean((y_true - y_pred) ** 2)
print(f"\nTest MSE on grid: {mse_test:.6e}")

## Figure 1: true solution vs PINN prediction
plt.figure(figsize=(6, 4))
plt.plot(x_test, y_true, label="Analytical solution: sin(pi x)")
plt.plot(x_test, y_pred, "--", label="PINN prediction")
plt.xlabel("x")
plt.ylabel("y")
plt.title("PDE solution: true vs PINN (Adam + L-BFGS)")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

## Figure 2: prediction error
plt.figure(figsize=(6, 4))
plt.plot(x_test, y_pred - y_true)
plt.xlabel("x")
plt.ylabel("Error")
plt.title("PINN error: y_pred(x) - y_true(x)")
plt.grid(True)
plt.tight_layout()
plt.show()


In [None]:
#### Two-stage parameter tuning
### Stage 1: Adam pre-training

## Stage 1 function: Adam pre-training (full-batch)
def train_with_adam_fullbatch(model, x_train, x_bc, y_bc,
                              adam_epochs=2000, print_every=200):
    """
    Stage 1:
    Use full-batch Adam to get the parameters into a good basin.
    """
    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(
        initial_learning_rate=1e-3,
        decay_steps=1000,
        decay_rate=0.9,
    )
    optimizer = tf.keras.optimizers.Adam(learning_rate=lr_schedule)

    for epoch in range(adam_epochs):
        loss_value = train_step(model, x_train, x_bc, y_bc, optimizer)

        if epoch % print_every == 0:
            print(f"[Stage 1 - Adam full] Epoch {epoch}: loss = {loss_value.numpy():.6e}")


## Stage 2 function: mini-batch fine-tuning
def train_with_minibatch(model, x_train, x_bc, y_bc,
                         epochs=20, batch_size=256, lr=1e-4):
    """
    Stage 2:
    Use mini-batch Adam with a smaller learning rate
    for fine-tuning the parameters.

    This is especially useful when the number of interior points is large.
    """
    dataset = tf.data.Dataset.from_tensor_slices(x_train)
    dataset = dataset.shuffle(buffer_size=len(x_train)).batch(batch_size)

    optimizer = tf.keras.optimizers.Adam(learning_rate=lr)

    for epoch in range(epochs):
        for x_batch in dataset:
            loss_value = train_step_batch(model, x_batch, x_bc, y_bc, optimizer)
        print(f"[Stage 2 - Mini-batch] Epoch {epoch}: loss = {loss_value.numpy():.6e}")



## Interior collocation points x ∈ [-1, 1]
x_train = np.linspace(-1.0, 1.0, 100).reshape(-1, 1).astype(np.float32)
x_train = tf.convert_to_tensor(x_train)

## Boundary points and values: y(-1) = 0, y(1) = 0
x_bc = np.array([[-1.0], [1.0]], dtype=np.float32)
y_bc = np.array([[0.0], [0.0]], dtype=np.float32)
x_bc = tf.convert_to_tensor(x_bc)
y_bc = tf.convert_to_tensor(y_bc)


### Train the PINN

# Create model
model = create_model()

# Stage 1: full-batch Adam
train_with_adam_fullbatch(model, x_train, x_bc, y_bc,
                          adam_epochs=2000, print_every=200)

# Stage 2: mini-batch fine-tuning
# (here still using the same x_train, but treated as if it's large;
# in a real large-scale case, x_train would have many more points)
train_with_minibatch(model, x_train, x_bc, y_bc,
                     epochs=50, batch_size=32, lr=1e-4)



# 6. Evaluate and visualize results

# Create a fine grid for visualization
x_test = np.linspace(-1.0, 1.0, 200).reshape(-1, 1).astype(np.float32)
x_test_tf = tf.convert_to_tensor(x_test)

# PINN prediction
y_pred = call_model(model, x_test_tf).numpy()

# Analytical solution: y(x) = sin(pi * x)
y_true = np.sin(np.pi * x_test)

# Mean squared error on the test grid
mse_test = np.mean((y_true - y_pred) ** 2)
print(f"\nTest MSE on grid: {mse_test:.6e}")

# Plot: true solution vs PINN prediction
plt.figure(figsize=(6, 4))
plt.plot(x_test, y_true, label="Analytical solution: sin(pi x)")
plt.plot(x_test, y_pred, "--", label="PINN prediction")
plt.xlabel("x")
plt.ylabel("y")
plt.title("PDE solution: true vs PINN (two-stage training)")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

# Optional: plot the error
plt.figure(figsize=(6, 4))
plt.plot(x_test, y_pred - y_true)
plt.xlabel("x")
plt.ylabel("Prediction error")
plt.title("PINN error: y_pred(x) - y_true(x)")
plt.grid(True)
plt.tight_layout()
plt.show()


