# Comparison - Exercise

For this exercise, we will expand our previous fit to include the measurements of a conductivity sensor. We will also include the replicate that our colleagues in the lab made to control for system variance. You can find all required data in the labbook.

## Exercise 1: Comparator

The experiment was repeated to account for system variability. The data was converted to concentrations in $mM$ and can be found in `./experimental_data`. Inspect the conductivity measurements and see what kind of pre-processing might make them usefull and which unit operation you can compare them to.

**Task:**
- Import and plot the experimental data using the `ReferenceIO` class.
- Add the references to the `Comparator`.
- Add the `SSE` difference metric and compare with simulation results.
- Add the `Shape` difference metric and compare with simulation results.


## Exercise 2: Optimization:

Optimize these parameters:

`flow_sheet.column.axial_dispersion` from `1e-8` to `1e-5`
and

`flow_sheet.column.total_porosity` from `0.2` to `0.9`

use the `U_NSGA3` optimizer with a population size of 64 and a maximum number of generatinos of 12 on4 cores.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from CADETProcess.processModel import ComponentSystem
from CADETProcess.processModel import FlowSheet
from CADETProcess.processModel import Inlet, TubularReactor, LumpedRateModelWithoutPores, Outlet
from CADETProcess.processModel import Process
from CADETProcess.processModel import StericMassAction
from CADETProcess.simulator import Cadet

component_system = ComponentSystem(["Acetone"])

column = LumpedRateModelWithoutPores(component_system, name='column')
column.length = 0.014
column.total_porosity = 0.5
column.diameter = 0.01
column.axial_dispersion = 5.75e-7

pipe5 = TubularReactor(component_system, name="pipe5")
pipe5.length = 0.1
pipe5.diameter = 0.001
pipe5.axial_dispersion = 6e-6
pipe5.discretization.ncol = 50

pipe6 = TubularReactor(component_system, name="pipe6")
pipe6.length = 0.02
pipe6.diameter = 0.001
pipe6.axial_dispersion = 6e-6
pipe6.discretization.ncol = 50

pipe7 = TubularReactor(component_system, name="pipe7")
pipe7.length = 0.1
pipe7.diameter = 0.001
pipe7.axial_dispersion = 6e-6
pipe7.discretization.ncol = 50

volumetric_flow_rate = 1.67e-8

inlet = Inlet(component_system, name='inlet')
inlet.flow_rate = volumetric_flow_rate

outlet = Outlet(component_system, name='outlet')

# Flow Sheet
flow_sheet = FlowSheet(component_system)

flow_sheet.add_unit(inlet, feed_inlet=True)
flow_sheet.add_unit(pipe5)
flow_sheet.add_unit(column)
flow_sheet.add_unit(pipe6)
flow_sheet.add_unit(pipe7)
flow_sheet.add_unit(outlet, product_outlet=True)

flow_sheet.add_connection(inlet, pipe5)
flow_sheet.add_connection(pipe5, column)
flow_sheet.add_connection(column, pipe6)
flow_sheet.add_connection(pipe6, pipe7)
flow_sheet.add_connection(pipe7, outlet)

process = Process(flow_sheet, 'batch elution')

process.cycle_time = 3 * 60

process.add_event('load', 'flow_sheet.inlet.c', 1, 0)
process.add_event('wash', 'flow_sheet.inlet.c', 0, 1)
simulator = Cadet()
simulator.time_resolution = 0.5

simulation_results = simulator.simulate(process)
print(simulation_results.time_elapsed)

In [None]:
import pandas as pd
data1 = pd.read_excel("experimental_data/tracer_1.xlsx", index_col=0)
data2 = pd.read_excel("experimental_data/tracer_2.xlsx", index_col=0)

from CADETProcess.reference import ReferenceIO

reference_uv1 = ReferenceIO('tracer experiment uv1', data1.index, data1.iloc[:, 0])
reference_uv2 = ReferenceIO('tracer experiment uv2', data2.index, data2.iloc[:, 0])
reference_conductivity1 = ReferenceIO('tracer experiment conductivity1', data2.index, 0.1-data2.iloc[:, 1])
reference_uv2.plot()
reference_conductivity1.plot()

In [None]:
from CADETProcess.comparison import Comparator

comparator = Comparator()

comparator.add_reference(reference_uv1)
comparator.add_reference(reference_uv2)
comparator.add_reference(reference_conductivity1)

In [None]:
comparator.add_difference_metric('Shape', reference_uv1, 'pipe6.outlet')
comparator.add_difference_metric('Shape', reference_uv2, 'pipe6.outlet')

comparator.add_difference_metric('Shape', reference_conductivity1, 'pipe7.outlet')

In [None]:
comparator.evaluate(simulation_results)

In [None]:
_ = comparator.plot_comparison(simulation_results)

In [None]:
from CADETProcess.optimization import OptimizationProblem
optimization_problem = OptimizationProblem('Column transport')

optimization_problem.add_evaluation_object(process)

optimization_problem.add_variable(
    name='axial dispersion', parameter_path='flow_sheet.column.axial_dispersion',
    lb=1e-8, ub=1e-5,
    transform='auto'
)

optimization_problem.add_variable(
    name='porosity', parameter_path='flow_sheet.column.total_porosity',
    lb=0.2, ub=0.9,
    transform='auto'
)


optimization_problem.add_evaluator(simulator)

optimization_problem.add_objective(
    comparator,
    n_objectives=comparator.n_metrics,
    requires=[simulator]
)

def callback(simulation_results, individual, evaluation_object, callbacks_dir='./'):
    comparator.plot_comparison(
        simulation_results,
        file_name=f'{callbacks_dir}/{individual.id}_{evaluation_object}_comparison.png',
        show=False
    )


optimization_problem.add_callback(callback, requires=[simulator])

In [None]:
from CADETProcess.optimization import U_NSGA3
optimizer = U_NSGA3()
optimizer.n_cores = 8
optimizer.pop_size = 64
optimizer.n_max_gen = 16

# optimization_results = optimizer.optimize(
#     optimization_problem,
#     use_checkpoint=False
# )

**Task:**
- Import and plot the experimental data using the `ReferenceIO` class.
- Add the references to the `Comparator`.
- Add the `SSE` difference metric and compare with simulation results.
- Compare with other metrics.
- *Consider the accuracy of the model fit and the model complexity. Are there details in the data that your model is not yet capturing?*
-

## Exercise 2: SMA parameter estimation

Set up an optimization problem to identify the binding parameters of your three proteins. The data is in `./experimental_data`.

**Task:**
- Import and plot the experimental data using the `ReferenceIO` class.
- Add the references to the `Comparator`.
- Add the `SSE` difference metric and compare with simulation results.
- Add the `Shape` difference metric and compare with simulation results.


## Exercise 2: Optimization:

Optimize these parameters:

`flow_sheet.column.axial_dispersion` from `1e-8` to `1e-5`
and

`flow_sheet.column.total_porosity` from `0.2` to `0.9`

use the `U_NSGA3` optimizer with a population size of 64 and a maximum number of generatinos of 12 on4 cores.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from CADETProcess.processModel import ComponentSystem
from CADETProcess.processModel import FlowSheet
from CADETProcess.processModel import Inlet, TubularReactor, LumpedRateModelWithoutPores, Outlet
from CADETProcess.processModel import Process
from CADETProcess.processModel import StericMassAction
from CADETProcess.simulator import Cadet

def create_column(component_system):
    column = LumpedRateModelWithoutPores(component_system, name='column')
    column.length = 0.014
    column.total_porosity = 0.5
    column.diameter = 0.01
    column.axial_dispersion = 5.75e-7
    return column

def create_pipes(component_system):
    pipe1 = TubularReactor(component_system, name="pipe5")
    pipe1.length = 0.1
    pipe1.diameter = 0.001
    pipe1.axial_dispersion = 6e-6
    pipe1.discretization.ncol = 50

    pipe2 = TubularReactor(component_system, name="pipe6")
    pipe2.length = 0.02
    pipe2.diameter = 0.001
    pipe2.axial_dispersion = 6e-6
    pipe2.discretization.ncol = 50

    pipe3 = TubularReactor(component_system, name="pipe7")
    pipe3.length = 0.1
    pipe3.diameter = 0.001
    pipe3.axial_dispersion = 6e-6
    pipe3.discretization.ncol = 50
    return pipe1, pipe2, pipe3


def create_flow_sheet(component_system):

    # Binding Model
    binding_model = StericMassAction(component_system)
    binding_model.is_kinetic = True
    binding_model.adsorption_rate = [0, 2.3e-5, 5.59e-1, 3.5e-3]
    binding_model.desorption_rate = [0, 1, 1, 1]
    binding_model.characteristic_charge = [0, 6.9, 2.3, 5.8]
    binding_model.steric_factor = [0, 10, 10.6, 11.83]
    binding_model.capacity = 1.2e3

    column = create_column(component_system)

    pipe5, pipe6, pipe7 = create_pipes(component_system)

    column.binding_model = binding_model

    column.q = [50, 0, 0, 0]
    column.c = [50, 0, 0, 0]
    pipe5.c = [50, 0, 0, 0]
    pipe6.c = [50, 0, 0, 0]
    pipe7.c = [50, 0, 0, 0]

    volumetric_flow_rate = 1.67e-8

    inlet = Inlet(component_system, name='inlet')
    inlet.flow_rate = volumetric_flow_rate

    outlet = Outlet(component_system, name='outlet')

    # Flow Sheet
    flow_sheet = FlowSheet(component_system)

    flow_sheet.add_unit(inlet, feed_inlet=True)
    flow_sheet.add_unit(pipe5)
    flow_sheet.add_unit(column)
    flow_sheet.add_unit(pipe6)
    flow_sheet.add_unit(pipe7)
    flow_sheet.add_unit(outlet, product_outlet=True)

    flow_sheet.add_connection(inlet, pipe5)
    flow_sheet.add_connection(pipe5, column)
    flow_sheet.add_connection(column, pipe6)
    flow_sheet.add_connection(pipe6, pipe7)
    flow_sheet.add_connection(pipe7, outlet)
    return flow_sheet


def create_breakthrough_process(component_system):
    flow_sheet = create_flow_sheet(component_system)

    process = Process(flow_sheet, 'breakthrough')

    process.cycle_time = 60 * 60

    c_salt_load = 50
    c_load = [c_salt_load, 1, 1, 1]

    process.add_event('load', 'flow_sheet.inlet.c', c_load, 0)
    return process

def create_gradient_process(gradient_length_in_cv, component_system):
    flow_sheet = create_flow_sheet(component_system)
    process = Process(flow_sheet, f"gradient {gradient_length_in_cv}")

    c_salt_load = 50
    c_salt_gradient1_start = 80
    c_salt_gradient1_end = 500
    duration_gradient1 = gradient_length_in_cv * 60
    process.cycle_time = duration_gradient1 + 600
    t_gradient1_start = 90
    t_gradient1_end = 90 + duration_gradient1
    t_start_wash = 10
    gradient_1_slope = (c_salt_gradient1_end - c_salt_gradient1_start)/(duration_gradient1)

    c_load = [c_salt_load, 1, 1, 1]

    c_wash = [c_salt_load, 0, 0, 0]

    c_gradient1_poly = [
        [c_salt_gradient1_start, gradient_1_slope, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0]
    ]

    process.add_duration("grad1_duration", duration_gradient1)

    process.add_event('load', 'flow_sheet.inlet.c', c_load, time=0)
    process.add_event('wash', 'flow_sheet.inlet.c', c_wash, time=t_start_wash)
    process.add_event('grad1_start', 'flow_sheet.inlet.c', c_gradient1_poly, t_gradient1_start)
    process.add_event('grad1_end', 'flow_sheet.inlet.c', [c_salt_gradient1_end, 0, 0, 0], t_gradient1_end)
    return process


component_system = ComponentSystem(['Salt', 'A', "B", "C"])
process_bt = create_breakthrough_process(component_system)
process_15cv = create_gradient_process(15, component_system)
process_120cv = create_gradient_process(120, component_system)

simulator = Cadet()
simulator.time_resolution = 0.5

# for process in [process_bt, process_15cv, process_120cv]:
#     simulation_results = simulator.simulate(process)
#     simulation_results.solution.outlet.outlet.plot()
#     print(simulation_results.time_elapsed)

In [None]:
import pandas as pd
data_bt = pd.read_excel("experimental_data/breakthrough.xlsx", index_col=0)
data_15cv = pd.read_excel("experimental_data/15cv_gradient.xlsx", index_col=0)
data_120cv = pd.read_excel("experimental_data/120cv_gradient.xlsx", index_col=0)

# data_bt.describe()
from CADETProcess.reference import ReferenceIO

reference_bt = ReferenceIO('experiment bt', data_bt.index, data_bt, component_system=component_system)
reference_15cv = ReferenceIO('experiment 15cv', data_15cv.index, data_15cv , component_system=component_system)
reference_120cv = ReferenceIO('experiment 120cv', data_120cv.index, data_120cv, component_system=component_system)
reference_bt.plot()
reference_15cv.plot()

In [None]:
from CADETProcess.comparison import Comparator
from CADETProcess.optimization import OptimizationProblem
optimization_problem = OptimizationProblem('SMA binding')

optimization_problem.add_evaluator(simulator)

for process, reference in zip([process_bt, process_15cv, process_120cv], [reference_bt, reference_15cv, reference_120cv]):
    comparator = Comparator(name=reference.name)

    comparator.add_reference(reference)
    comparator.add_difference_metric('Shape', reference, 'pipe6.outlet', components=["A"])
    comparator.add_difference_metric('Shape', reference, 'pipe6.outlet', components=["B"])
    comparator.add_difference_metric('Shape', reference, 'pipe6.outlet', components=["C"])

    optimization_problem.add_evaluation_object(process)

    optimization_problem.add_objective(
        comparator,
        name=f"Objective {reference.name} for {process.name}",
        n_objectives=comparator.n_metrics,
        evaluation_objects=[process],
        requires=[simulator]
    )

    simulation_results = simulator.simulate(process)
    comparator.plot_comparison(simulation_results)

In [None]:
for component_idx in [1, 2, 3]:
    # As kD = 1, kEq = kA / kD = kA
    optimization_problem.add_variable(
        name=f'equilibrium_constant_{component_idx}',
        parameter_path='flow_sheet.column.binding_model.adsorption_rate',
        lb=1e-7, ub=1,
        transform='auto',
        indices=[component_idx],
    )

    optimization_problem.add_variable(
        name=f'characteristic_charge_{component_idx}',
        parameter_path='flow_sheet.column.binding_model.characteristic_charge',
        lb=1, ub=10,
        transform='linear',
        indices=[component_idx]
    )

    optimization_problem.add_variable(
        name=f'steric_factor_{component_idx}',
        parameter_path='flow_sheet.column.binding_model.steric_factor',
        lb=1, ub=30,
        transform='auto',
        indices=[component_idx]
    )

In [None]:
# comparator.evaluate(simulation_results)

In [None]:
# _ = comparator.plot_comparison(simulation_results)

```{note}
It's also possible to add multiple references, e.g. for triplicate experiments or for different sensors.
```

In [None]:
from CADETProcess.optimization import U_NSGA3
optimizer = U_NSGA3()
optimizer.n_cores = 8
optimizer.pop_size = 16
optimizer.n_max_gen = 6

# optimization_results = optimizer.optimize(
#     optimization_problem,
#     use_checkpoint=False
# )