### **Figure 2. Results from iML1515 model**

A. Show a sample BayesOpt run with Growth/Cost as y axis and x axis as BayesOpt. There are some example plots in my [ACS paper](https://pubs.acs.org/doi/full/10.1021/acssynbio.3c00120) that will help you, I would draw a line for the best sample seen so far over time.

B. Show a Pareto front of Growth vs. Cost. Show with original medium.

C. Show surface with two most interesting conditions and BO run over that surface. You have this in toy model but I would like it in iML1515 if possible.

goal: 2025-04-07_BayesOpt_textbook_growth-cost_qPAREGO_30it_round_1_pareto_batch_colour.png

In [None]:
# imports
import torch
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from matplotlib.cm import ScalarMappable
import matplotlib.colors as mcolors
import matplotlib.cm as cm
import colormaps as cmaps # for scientific colormaps
from scipy.interpolate import griddata

from botorch.utils.multi_objective.pareto import is_non_dominated

In [None]:
# for Figure 1A
# TODO: Write description
def plot_growth_per_cost(
        results, figname = "figure.png"):
    """
    Plots growth rate per cost (x-axis) against iteration for each candidate medium
    Each dot is colour-coded according to the iteration it resulted from.
    Saves the figure (as png file)

    PARAMETERS
    * results - dictionary - output of media_BayesOpt
    * figname - string - name under which to save the figure
    
    RETURNS
    - 
    """

    # Throw error when results doesn't have a cost tensor or growth rate tensor entry
    if 'growth rate tensors' not in results or 'cost tensors' not in results:
        raise KeyError("Missing 'growth rate tensors' or 'cost tensors' in results")
    
    # extract data from results (growth rate and medium costs)
    growth_rates = results['growth rate tensors'].cpu().numpy() # are positive
    medium_costs = results['cost tensors'].cpu().numpy() # are positive
    growth_costs = np.divide(growth_rates, medium_costs, out = np.zeros_like(growth_rates), where = medium_costs != 0)
    
    # Define batch numbers (iterations)
    n_samples = len(growth_rates) # number of candidate mediums = length of growth_rate array
    iterations = np.arange(1, n_samples + 1)  # Create iteration numbers for each sample

    # Create the plot with given size
    fig, axes = plt.subplots(1, 1, figsize = (9, 7))
    

    # Define colour mapping - 1 for random initial points, 1 each per batch (n_iter)
    n_batch = results["n_iter"]
    n_start = results["n_start"]
    n_candidates = results["n_candidates"]

    # Generate distinct colours
    colours = cmaps.batlow(np.linspace(0, 1, n_batch + 1))
    # Create a custom colourmap for the colour bar
    cmap = mcolors.ListedColormap(colours)

    # Create an array to store colours for each data point
    point_colours = np.zeros(n_samples, dtype = object)

    # Assign first `n_start` points the same color
    point_colours[:n_start] = [colours[0]] * n_start
    # Assign different colours to each batch
    for i in range(n_batch):
        start_idx = n_start + i * n_candidates
        end_idx = start_idx + n_candidates
        point_colours[start_idx:end_idx] = [colours[i + 1]] * (end_idx - start_idx) # Assign a new color per batch
    
    
    # Set boundaries between each batch, from -0.5 to n_batch + 0.5
    boundaries = np.arange(n_batch + 2) - 0.5
    norm = mcolors.BoundaryNorm(np.arange(n_batch + 2) - 0.5, cmap.N)

    # Scatter plot with custom colours, applying transparence (alpha = 0.8)
    sc = axes.scatter(x = iterations, y = growth_costs, c = point_colours, alpha = 0.8)
    

    # draw a line along the best so far result (growth/cost)
    best_so_far = -np.inf
    best_values = []
    for val in growth_costs:
        if val > best_so_far:
            best_so_far = val
        best_values.append(best_so_far)
    axes.step(
        iterations, best_values, where = 'post', color = 'black', linestyle = '-',
        linewidth = 2, label = 'Best Growth per Cost So Far')


    # Add labels and titles
    axes.set_xlabel("Sample Number", fontsize = 14)
    axes.set_ylabel("Growth per Cost Cost [gDW/$10^{-3}£$]", fontsize = 14)
    axes.xaxis.set_tick_params(width = 2, labelsize = 10)
    axes.yaxis.set_tick_params(width = 2, labelsize = 10)
    axes.set_title(
        "Growth per Cost For Each Tested Medium Composition",
        fontsize = 16)
    
    # Add the color bar & define ticks
    tick_positions = np.arange(0, n_batch + 1, 5)
    sm = cm.ScalarMappable(cmap = cmap, norm = norm)
    cbar = fig.colorbar(sm, ax = axes, 
                        ticks = tick_positions, pad = 0.12)
    cbar.ax.set_title("Iteration", fontsize = 10)
    cbar.ax.tick_params(which = 'minor', size = 0) # turn off minor ticks at colour boundaries
    
    # Display the legend
    axes.legend()

    # Show the plot
    plt.show()

    # Save the figure
    figname = figname
    fig.set_size_inches(9, 7)  # Consistent physical size in inches
    fig.savefig(figname, dpi = 300, bbox_inches = None)

In [None]:
# for Figure 2B
# TODO: Write description
def plot_pareto_batch_colour(
    results, 
    xax = "growth rate tensors", 
    yax = "cost tensors", 
    figname = "figure.png",
    MetModel = None,
    initial_medium = None,
    initial_costs = None,
    model_objective = None,
    ):
    """
    Plots the performance category indicated by "xax" (default: growth rate) on x-axis against 
    the performance category indicated by "yax" (default: medium cost) on the y-axis
    for each candidate medium.
    Each dot is colour-coded according to the batch it resulted from.
    Plots Pareto front deduced from data.
    When the metabolic model (MetModel), the baseline medium (initial_medium) and 
    the corresponding baseline costs (initial_costs) are given, the performance of the baseline
    medium is plotted onto the graph to allow for visual performance comparison
    Saves the figure (as png file)

    PARAMETERS
    * results - dictionary - output of media_BayesOpt
    * xax - string - variable to plot on x-axis
    * yax - string - variable to plot on y-axis
    * figname - string - name under which to save the figure
    * MetModel - cobra model - Metabolic model for simulation & used for optimisation
    * initial_medium - dictionary - Medium for initial simulation
    * initial_costs - dictionary - Costs associated with the initial medium
    * model_objective
    
    RETURNS
    - 
    """
    valid_values = {"growth rate tensors", "cost tensors", "production tensors"}
    if xax not in valid_values or yax not in valid_values:
        raise ValueError(f"xax and yax must be one of {valid_values}, but got xax='{xax}' and yax='{yax}'")


    # extract data from results (growth rate and medium costs)
    x_np = results[xax].cpu().numpy() # are positive
    y_np = results[yax].cpu().numpy() # are positive
    
    # Stack the two objectives (growth rate and medium cost) into a single 2Darray
    # rows: candidates
    # columns: grwoth rate, medium costs
    y = np.column_stack([x_np, y_np])
    
    # Create the plot with given size
    fig, axes = plt.subplots(1, 1, figsize = (9, 7))
    

    # Define colour mapping - 1 for random initial points, 1 each per batch (n_iter)
    n_batch = results["n_iter"]
    n_start = results["n_start"]
    n_candidates = results["n_candidates"]
    
    # Generate distinct colours
    colours = cmaps.batlow(np.linspace(0, 1, n_batch + 1))
    # Create a custom colourmap for the colour bar
    cmap = mcolors.ListedColormap(colours)

    # Create an array to store colours for each data point
    point_colours = np.zeros(len(results[xax]), dtype = object)

    # Assign first n_start points the same colour
    point_colours[:n_start] = [colours[0]] * n_start
    # Assign different colours to each batch
    for i in range(n_batch):
        start_idx = n_start + i * n_candidates
        end_idx = start_idx + n_candidates
        point_colours[start_idx:end_idx] = [colours[i + 1]] * (end_idx - start_idx) # Assign a new color per batch
    
    # Set boundaries between each batch, from -0.5 to n_batch + 0.5
    boundaries = np.arange(n_batch + 2) - 0.5
    norm = mcolors.BoundaryNorm(boundaries, cmap.N)

    # Scatter plot with custom colours, applying transparence (alpha = 0.8)
    sc = axes.scatter(y[:, 0], y[:, 1], c = point_colours, alpha = 0.8)

    # Set y-axis to log scale if it contains costs
    if yax == "cost tensors":
        axes.set_yscale("log")
    
    """
    Pareto Front
    is_non_dominated assumes maximisation
    """
    # negate costs because maximisation is assumed
    factor = 1
    if yax == "cost tensors":
        y[:, 1] = -y[:, 1]
        factor = -1

    # Sort points by the first objective (growth rate) 
    # -> allows to plot front in order of increasing xax (growth rate)
    y_sorted = y[np.argsort(y[:, 0])]
    
    # Compute non-dominated (Pareto front) points; i.e. optimal trade.offs
    is_pareto = is_non_dominated(torch.tensor(y_sorted).to(**tkwargs))

    # Plot the Pareto front
    axes.plot(
        [y[0] for pareto, y in zip(is_pareto, y_sorted) if pareto], # negate again so it's back o orig. value
        [factor * y[1] for pareto, y in zip(is_pareto, y_sorted) if pareto], # negate if production
        label="Pareto Front",
        color="r",
        linewidth=2,
    )

    """
    M9 on Pareto front
    """
    # if model, initial medium and costs are given
    if MetModel and initial_medium and initial_costs:
        # Set the initial medium as medium in the model
        MetModel.medium = initial_medium
        # Set model objective to desired one
        if model_objective is None:
            MetModel.objective = results["model objective"]
        else:
            MetModel.objective = model_objective
        # Run optimisation
        solution = MetModel.optimize()
        initial_growth_rate = solution.fluxes[results["biomass objective"]]
        initial_cost = calc_cost_tot(initial_costs, initial_medium).cpu().numpy()
        
        initial_production = -1        
        if results["production objective"] is not None:
            initial_production = solution.fluxes[results["production objective"]]

        # map initial data to corresponding axes depending on call
        mapping = {
            "growth rate tensors": initial_growth_rate,
            "cost tensors": initial_cost,
            "production tensors": initial_production
        }
        ini_x = mapping.get(xax)
        ini_y = mapping.get(yax)
        
        # Plot initial point as a red cross in 3D
        axes.scatter(
            ini_x, # x-axis
            ini_y, # y-axis
            color = "red", marker = "x", label = "Original Medium", s = 100, zorder = 5
        )

        
    """
    Add labels and titles
    """

    mapping_with_units = {
        "growth rate tensors": "Growth Rate [1/h]",
        "cost tensors": "Medium Cost [$10^{-3}$ £/gDW·h]",
        "production tensors": "Production Rate [mmol/gDW·h]"
    }
    # axes
    axes.set_xlabel(mapping_with_units.get(xax), fontsize = 14)
    axes.set_ylabel(mapping_with_units.get(yax), fontsize = 14)
    axes.xaxis.set_tick_params(width = 2, labelsize = 10)
    axes.yaxis.set_tick_params(width = 2, labelsize = 10)
    # title
    mapping = {
        "growth rate tensors": "Growth Rate",
        "cost tensors": "Medium Cost",
        "production tensors": "Production Rate"
    }
    axes.set_title(
        f"{mapping.get(xax)} vs. {mapping.get(yax)} With Pareto Front",
        fontsize = 16)

    # Add the color bar & define ticks
    tick_positions = np.arange(0, n_batch + 1, 5)
    sm = cm.ScalarMappable(cmap = cmap, norm = norm)
    cbar = fig.colorbar(sm, ax = axes, 
                        ticks = tick_positions, pad = 0.12)
    cbar.ax.set_title("Iteration", fontsize = 10)
    cbar.ax.tick_params(which = 'minor', size = 0) # turn off minor ticks at colour boundaries
    
    # Display the legend
    axes.legend()
    
    # Show the plot
    plt.show()

    # Save the figure
    figname = figname

    fig.set_size_inches(9, 7)  # Consistent physical size in inches
    fig.savefig(figname, dpi = 300, bbox_inches = None)

In [None]:
# for Figure 2C
# TODO: change description

def plot_contour(
        results, 
        results_BO = None,
        var_1 = "EX_glc__D_e", 
        var_2 = "EX_h_e", 
        var_Z = "Growth per Cost",
        levels = 40, 
        figname = "figname"
        ):
    """
    Creates a contour plot from the GridSearch results for a model and medium.
    The x-axis is given by var_1, the y-axis by var_2, the contour is given by var_Z.
    If passed, the results of a BayesianOpt are plotted on top of the contour.

    PARAMETERS
    - results - dictionary - as resulting from GridSearch_textbook-model.ipynb
    - results_BO - dictionary - as resulting from BayesOpt-[..].ipynb
    - var_1 - string - 1st medium component to create contour plot
    - var_2 - string - 2nd medium component to create contour plot
    - var_Z - string - what is the contour value ("growth per cost", "medium cost", "growth rate")
    - levels - integer - levels to pass to plt.contour

    RETURNS
    -
    """

    '''TEST VALIDITY OF ARGUMENTS'''
    # var_Z
    valid_var_Z_types = {"Growth per Cost", "Medium Cost", "Growth Rate"}
    if var_Z not in valid_var_Z_types:
        raise ValueError(f"var_Z must be one of {valid_var_Z_types}, but got '{var_Z}'")
    valid_var_12 = set(results["medium list"][0].keys())
    if var_1 not in valid_var_12 or var_2 not in valid_var_12:
        raise ValueError(f"var_1 and var_2 must be one of {valid_var_12}, but got var_1 = '{var_1}' and var_2 = '{var_2}'.")

    # Convert result dictionary to a DataFrame
    results_df = pd.DataFrame.from_dict(results["medium list"])
    # Add growth rates and cost
    # extract data from results (growth rate and medium costs)
    growth_rates = results['growth rate tensors'].cpu().numpy() # are positive
    medium_costs = results['cost tensors'].cpu().numpy() # are positive

    results_df["Growth Rate"] = growth_rates
    results_df["Medium Cost"] = medium_costs
    growth_costs = np.divide(growth_rates, medium_costs, 
                             out = np.zeros_like(growth_rates), where = medium_costs != 0)
    results_df["Growth per Cost"] = growth_costs

    # extract chosen medium components as X and Y
    X = results_df[var_1]
    Y = results_df[var_2]
    # Z (the contour) is Growth per Cost or Cost or Growth
    Z = results_df[var_Z]

    # Create a grid of X and Y values
    xi = np.linspace(X.min(), X.max(), 100)
    yi = np.linspace(Y.min(), Y.max(), 100)
    
    # Interpolate Z values onto the grid (linearly)
    #zi = griddata((X, Y), Z, (xi[None, :], yi[:, None]), method = "linear") # results in many white areas in plots
    #zi = griddata((X, Y), Z, (xi[None, :], yi[:, None]), method = "nearest") # doesn't work
    zi = griddata((X, Y), Z, (xi[None, :], yi[:, None]), method = "cubic")

    # Create figure and axis
    fig, axes = plt.subplots(figsize=(11, 6))
    # Plot the contour
    contour = axes.contourf(xi, yi, zi, levels = levels, cmap = cmaps.buda)
    """
    tick_positions = np.arange(0, levels + 1)
    cbar = fig.colorbar(contour, 
                        ticks = tick_positions, pad = 0.05)
    """
    cbar = fig.colorbar(contour, pad = 0.05)
    cbar.ax.set_title(var_Z, fontsize = 10)
    cbar.ax.tick_params(which = "minor", size = 0) # turn off minor ticks
    
    # Add BO samples as dots if available
    if results_BO is not None:
        # Convert results_BO dictionary to a DataFrame
        results_BO_df = pd.DataFrame.from_dict(results_BO["medium list"])

        # Add growth rates and cost
        # extract data from results (growth rate and medium costs)
        growth_rates_BO = results_BO['growth rate tensors'].cpu().numpy() # are positive
        medium_costs_BO = results_BO['cost tensors'].cpu().numpy() # are positive

        results_BO_df["Growth Rate"] = growth_rates_BO
        results_BO_df["Medium Cost"] = medium_costs_BO
        growth_costs_BO = np.divide(growth_rates_BO, medium_costs_BO, 
                                    out = np.zeros_like(growth_rates_BO), where = medium_costs_BO != 0)
        results_BO_df["Growth per Cost"] = growth_costs_BO
        
        # Define colour scheme according to iterations
        n_batch = results_BO["n_iter"]
        n_start = results_BO["n_start"]
        n_candidates = results_BO["n_candidates"]

        # Generate distinct colours
        colours = cmaps.batlow(np.linspace(0, 1, n_batch + 1))
        # Create a custom colourmap for the colour bar
        cmap = mcolors.ListedColormap(colours)

        # Create an array to store colours for each data point
        point_colours = np.zeros(len(results_BO_df[var_1]), dtype = object)

        # Assign first n_start points the same colour
        point_colours[:n_start] = [colours[0]] * n_start
        # Assign different colours to each batch
        for i in range(n_batch):
            start_idx = n_start + i * n_candidates
            end_idx = start_idx + n_candidates
            point_colours[start_idx:end_idx] = [colours[i + 1]] * (end_idx - start_idx) # Assign a new color per batch
        
        # Set boundaries between each batch, from -0.5 to n_batch + 0.5
        boundaries = np.arange(n_batch + 2) - 0.5
        norm = mcolors.BoundaryNorm(boundaries, cmap.N)

        sc = axes.scatter(
            x = results_BO_df[var_1], y = results_BO_df[var_2], 
            c = point_colours, alpha = 0.8, label = "BO Samples", edgecolor = 'white'
        )
        
        # Add the color bar & define ticks
        tick_positions_BO = np.arange(0, n_batch + 1, 5)
        sm = cm.ScalarMappable(cmap = cmap, norm = norm)
        cbar_BO = fig.colorbar(sm, ax = axes,
                               ticks = tick_positions_BO, pad = 0.12)
        cbar_BO.ax.set_title("Iteration", fontsize = 10)
        cbar_BO.ax.tick_params(which = 'minor', size = 0) # turn off minor ticks at colour boundaries
        
        
    # Labels and title
    plt.xlabel(var_1, fontsize = 14)
    plt.ylabel(var_2, fontsize = 14)
    axes.xaxis.set_tick_params(width = 2, labelsize = 10)
    axes.yaxis.set_tick_params(width = 2, labelsize = 10)
    plt.title(f"Contour Plot of {var_1} vs {var_2} ({var_Z})", fontsize = 16)
    
    # Display the legend
    axes.legend()

    # Show plot
    plt.show() 

    # Save the figure
    figname = figname + "_" + var_1 + "_" + var_2 + "_" + var_Z +".png"
    fig.set_size_inches(9, 7)  # Consistent physical size in inches
    fig.savefig(figname, dpi=300, bbox_inches=None)