# Reduced-dimension Emulator: Reaction-Diffusion example

### Overview
In this example, we aim to create an _emulator_ to generate solutions to a 2D parameterized reaction-diffusion problem governed by the following partial differential equations:

$$
\dot{u} = (1 - (u^2 + v^2)) u + \beta (u^2 + v^2) v + d (u_{xx} + u_{yy}),
$$
$$
\dot{v} = -\beta (u^2 + v^2) u + (1 - (u^2 + v^2)) v + d (v_{xx} + v_{yy}),
$$

where:
- $ u $ and $ v $ are the concentrations of two species,
- $ \beta $ and $ d $ control the reaction and diffusion terms.

This system exhibits complex spatio-temporal dynamics such as spiral waves.

In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt 
import autoemulate as ae
from tqdm import tqdm

from autoemulate_design import LatinHypercube
from autoemulate.simulations.reaction_diffusion import simulate_reaction_diffusion
from autoemulate.compare import AutoEmulate
from autoemulate.plotting import _predict_with_optional_std

### 1) Data generation
Data are computed using a numerical [_simulator_](https://github.com/dynamicslab/pysindy/blob/master/examples/10_PDEFIND_examples.ipynb) using Fourier spectral method.
The simulator takes two inputs: the reaction parameter $\beta$ and the diffusion parameter $d$.

We sample 80 sets of inputs `X` using Latin Hypercube sampling and run the simulator for those inputs to get the solutions `Y`.


In [None]:
save = False
train = False

In [None]:
n = 50

# Reaction-diffusion parameters
beta = (1., 2.) # lower and upper bounds for the reaction coefficient
d = (0.05, 0.3) # lower and upper bounds for the diffusion coefficient
n_param_samples = 30
lhd = LatinHypercube([beta, d])
X_param = lhd.sample(n_param_samples)

# Time configuration
dt, T = 0.5, 12. # time step and total time
X_time = np.linspace(0, T, int(T/dt), endpoint=True) 
n_time_samples = len(X_time)

# Create the input array (combining parameters and time)
param_expanded = np.repeat(X_param, n_time_samples, axis=0)  # (n_samples * n_time_step, n_param)
time_expanded = np.tile(X_time, n_param_samples)  # (n_samples * n_time_step,)
X = np.column_stack((param_expanded, time_expanded))  # (n_samples * n_time_step, n_param + 1)

if train:
  U, V = zip(*[simulate_reaction_diffusion(x, n=n, dt=dt, T=T, return_timeseries=True) for x in tqdm(X_param)])
  U = np.stack(U)
  V = np.stack(V)
  U = U.reshape(n_param_samples*n_time_samples, n*n)
  V = V.reshape(n_param_samples*n_time_samples, n*n)

    # Let's consider as output the concentration of the specie U
  Y = U

  if save:
    # Create the data folder if it doesn't exist
    data_folder = "autoemulate/data/reactiondiffusion"
    if not os.path.exists(data_folder):
        os.makedirs(data_folder)  
    # Save the data
    X_file = os.path.join(data_folder, "X_time.csv")
    Y_file = os.path.join(data_folder, "Y_time.csv")
    pd.DataFrame(X).to_csv(X_file, index=False)
    pd.DataFrame(Y).to_csv(Y_file, index=False)

else:
  data_folder = "autoemulate/data/reactiondiffusion"
  if not os.path.exists(data_folder):
      os.makedirs(data_folder)  
  # Save the data
  X_file = os.path.join(data_folder, "X_time.csv")
  Y_file = os.path.join(data_folder, "Y_time.csv")
# Load the data
  X = pd.read_csv(X_file).values
  Y = pd.read_csv(Y_file).values


print(f"shapes: input X: {X.shape}, output Y: {Y.shape}\n")

`X` and `Y` are matrices where each row represents one run of the simulation.  In the input matrix `X` each column is a different input parameters. 
In the output matrix `Y` each column indicates a spatial location where the solution (i.e. the concentration of $U$ at final time $T$) is computed. <br>
We consider a 2D spatial grid of $32\times 32$ points, therefore each row of `Y` corresponds to a 1024-dimensional vector!

Let’s now plot the simulated data to see how the reaction-diffusion pattern looks like.

In [None]:
plt.figure(figsize=(15,4.5))
time = n_time_samples - 1
for param in range(3):
    plt.subplot(1,3,1+param)
    plt.imshow(Y.reshape(n_param_samples, n_time_samples, n,n)[param][time], interpolation='bilinear')
    plt.axis('off')
    plt.xlabel('x', fontsize=12)
    plt.ylabel('y')
    plt.title(r'$\beta = {:.2f}, d = {:.2f}, t = {:.1f}$'.format(X_param[param][0], X_param[param][1], X_time[time]), fontsize=12)
    plt.colorbar(fraction=0.046)
plt.suptitle('2D solutions to the reaction-diffusion system for different parameters', fontsize=15)
plt.show()


## 2) Reduced-dimension Emulator
The numerical simulator is computationally expensive to run, thus we aim to replace it with a fast _emulator_.
As output we aim to emulate is the full spatial fields of the concentration of $U$ which is high-dimensional, we employ dimensionality reduction techniques to create a faster and more efficient emulator.

You can do so by selecting `reduce_dim_output=True` and indicate which dimensionality reducer `dim_reducer_output` you want to use among:
- `"PCA"`: Principal Component Analysis (deafult),
- `"AE"`: Autoencoder,
- `"VAE"`: Variational Autoencoder,

which will be trained together with the emulator.

In [None]:
from sklearn.decomposition import PCA
from sklearn.model_selection import KFold
em = AutoEmulate()
cross_validator = KFold(
            n_splits=2, shuffle=True, random_state=np.random.randint(1e5)
        )

preprocessing_methods = [{"name": "PCA", "params": {"reduced_dim": 16}},
                        {"name": "PCA", "params": {"reduced_dim": 32}},
                        {"name": "PCA", "params": {"reduced_dim": 64}}]

em.setup(X, Y, models=["gp"], scale_output = False, cross_validator = cross_validator, reduce_dim_output=True, preprocessing_methods=preprocessing_methods)

best_model = em.compare()

In [None]:
best_model

## 3) Summarising cross-validation results
We can look at the cross-validation results to see which model provides the best emulator. 

In [None]:
em.summarise_cv()

We can select the best performing emulator model (in this case GaussianProcess) and see how it performs on the test-set, which AutoEmulate automatically sets aside.

We can plot the test-set performance for chosen emulator.

In [None]:
## 3) Evaluate the emulator (on the test set)
gp = em.get_model('GaussianProcess')
em.evaluate(gp)

In [None]:
em.plot_eval(gp, input_index=[2], output_index=[0,130, 1000])

## 4) Refitting the model on the full dataset
AutoEmulate splits the dataset into a training and holdout set. All cross-validation, parameter optimisation and model selection is done on the training set. After we selected a best emulator model, we can refit it on the full traiing dataset.

In [None]:
gp_final = em.refit(em.get_model())

## 5) Predict on the test set
Now we run the emulator for unseen combinations of reaction and diffusion parameter and we compare its performance with respect to the reference (simulator)

In [None]:
# Define parameter bounds
beta_bounds = (1.0, 2.0)
d_bounds = (0.05, 0.3)

# Generate two random parameter samples
np.random.seed(14)  # For reproducibility
n_test_samples = 2
X_param_test = np.array([
    [np.random.uniform(*beta_bounds), np.random.uniform(*d_bounds)],
    [np.random.uniform(*beta_bounds), np.random.uniform(*d_bounds)]
])

# Create a time array for the test samples
dt_test = 0.25
T_test = T
X_time_test = np.linspace(0, T_test, int(T_test/dt_test), endpoint=True)
n_time_samples_test = len(X_time_test)

# Expand across time
param_expanded_test = np.repeat(X_param_test, n_time_samples_test, axis=0)
time_expanded_test = np.tile(X_time_test, n_test_samples)  # Two test samples
X_test = np.column_stack((param_expanded_test, time_expanded_test))

# Simulate the reaction-diffusion system for test parameters
U_test, V_test = zip(*[simulate_reaction_diffusion(x, n=n, dt=dt_test, T=T_test, return_timeseries=True) for x in tqdm(X_param_test)])
U_test = np.stack(U_test)
V_test = np.stack(V_test)
U_test = U_test.reshape(n_test_samples * n_time_samples_test, n * n)
V_test = V_test.reshape(n_test_samples * n_time_samples_test, n * n)

In [None]:
#y_pred = gp_final.predict(X[em.test_idxs]) #TODO: fix this
y_pred, y_std_pred = _predict_with_optional_std(gp_final, X_test)
y_true = U_test

In [None]:
n_param_samples

In [None]:
# Plot the results for some unseen (test) parameter instances
params_test = [0,1]
times_test = [15, 25]

for param_test in params_test:
  for time_test in times_test:
    plt.figure(figsize=(20,4.5))
    plt.subplot(1,4,1)
    plt.imshow(y_true.reshape(n_test_samples, n_time_samples_test, n,n)[param_test][time_test], interpolation='bilinear', vmin=-1, vmax=1)
    plt.axis('off')
    plt.xlabel('x', fontsize=12)
    plt.ylabel('y')
    plt.title('True solution (simulator)', fontsize=12)
    plt.colorbar(fraction=0.046)

    plt.subplot(1,4,2)
    plt.imshow(y_pred.reshape(n_test_samples, n_time_samples_test, n,n)[param_test][time_test], interpolation='bilinear', vmin=-1, vmax=1)
    plt.axis('off')
    plt.xlabel('x', fontsize=12)
    plt.ylabel('y')
    plt.title('Prediction (emulator)', fontsize=12)
    plt.colorbar(fraction=0.046)

    plt.subplot(1,4,3)
    plt.imshow(y_std_pred.reshape(n_test_samples, n_time_samples_test, n,n)[param_test][time_test], cmap = 'bwr', interpolation='bilinear', vmax = np.max(y_std_pred[params_test]))
    plt.axis('off')
    plt.xlabel('x', fontsize=12)
    plt.ylabel('y')
    plt.title('Standard Deviation (emulator)', fontsize=12)
    plt.colorbar(fraction=0.046)

    plt.subplot(1,4,4)
    plt.imshow(np.abs(y_pred - y_true).reshape(n_test_samples, n_time_samples_test, n,n)[param_test][time_test], cmap = 'bwr', interpolation='bilinear')
    plt.axis('off')
    plt.xlabel('x', fontsize=12)
    plt.ylabel('y')
    plt.title('Absolute error', fontsize=12)
    plt.colorbar(fraction=0.046)

    plt.suptitle(r'Results for test parameters: $\beta = {:.2f}, d = {:.2f}, t = {:.1f}$'.format(X_param_test[param_test][0], X_param_test[param_test][0], X_time_test[time_test]), fontsize=12)
    plt.show()

In [None]:
import matplotlib.animation as animation
from IPython.display import Image, display

# Define test parameters and reshape Y_test
params_test = [0, 1]
y_true_reshaped = y_true.reshape(n_test_samples, n_time_samples_test, n, n)
y_pred_reshaped = y_pred.reshape(n_test_samples, n_time_samples_test, n, n)
y_std_pred_reshaped = y_std_pred.reshape(n_test_samples, n_time_samples_test, n, n)
abs_error = np.abs(y_pred - y_true).reshape(n_test_samples, n_time_samples_test, n, n)

# Function to update the animation frame
def update(frame, im_true, im_pred, im_std, im_err, title, param_idx):
    im_true.set_array(y_true_reshaped[param_idx][frame])
    im_pred.set_array(y_pred_reshaped[param_idx][frame])
    im_std.set_array(y_std_pred_reshaped[param_idx][frame])
    im_err.set_array(abs_error[param_idx][frame])

    title.set_text(r'Results for test parameters: $\beta = {:.2f}, d = {:.2f}, t = {:.1f}$'.format(
        X_param_test[param_idx][0], X_param_test[param_idx][1], X_time[frame])
    )

    return [im_true, im_pred, im_std, im_err, title]

# Create animations for each test parameter instance
for param_test in params_test:
    fig, axes = plt.subplots(1, 4, figsize=(20, 4.5))

    # Titles for each subplot
    titles = ["True solution (simulator)", "Prediction (emulator)", "Standard Deviation (emulator)", "Absolute Error"]
    
    # Initialize images with desired colormaps
    ims = []
    ims.append(axes[0].imshow(y_true_reshaped[param_test][0], interpolation='bilinear', cmap="viridis", vmin=-1, vmax=1))
    ims.append(axes[1].imshow(y_pred_reshaped[param_test][0], interpolation='bilinear', cmap="viridis", vmin=-1, vmax=1))
    ims.append(axes[2].imshow(y_std_pred_reshaped[param_test][0], interpolation='bilinear', cmap="bwr"))  # rwb colormap for std
    ims.append(axes[3].imshow(abs_error[param_test][0], interpolation='bilinear', cmap="bwr", vmin = 0, vmax = 0.2))  # rwb colormap for error
    
    # Remove ticks and set titles
    for i, ax in enumerate(axes):
        ax.set_xticks([])
        ax.set_yticks([])
        ax.set_title(titles[i], fontsize=12)
        plt.colorbar(ims[i], ax=ax, fraction=0.046)

    # Global title
    suptitle = fig.suptitle("", fontsize=12)

    # Create the animation
    ani = animation.FuncAnimation(
        fig, update, frames=n_time_samples, fargs=(ims[0], ims[1], ims[2], ims[3], suptitle, param_test), interval=300
    )

    # Save animation as GIF
    gif_filename = f"reaction_diffusion_test_param{param_test}.gif"
    ani.save(gif_filename, writer="pillow", dpi=200)
    
    # Display GIF inline in the Jupyter notebook
    display(Image(filename=gif_filename))

    # Close the figure to prevent displaying it
    plt.close(fig)
