# Script to validate MCAC after major modifications

## Table of Contents
1. [Introduction](#Introduction)
2. [Parameters](#Parameters)
2. [Read and compute](#Read_and_compute)
3. [Results](#Results)
    1. [Residence time](#Residence_time)
    2. [Volume fraction](#Volume_fraction)
    3. [Diffusion coefficient](#Diffusion_coefficient)
    4. [Particle number concentration](#Particle_number_concentration)
    5. [Gas Knudsen number](#Knudsen)
    6. [Radius of gyration](#gyration)
    7. [Aggregate polydispersity](#polydispersity)
    8. [Total energy](#energy)

## Introduction <a class="anchor" id="Introduction"></a>

This Python script aims at validating MCAC by comparing with a "well known" case already simulated in a previous version of the code (published results). This is referred as "ref_case" compared with the new simulation "new_case".

In this particular code only coagulation takes place without other mechanisms of particle growth or nucleation. The aspects of the code to be validated are: the mobility (diffusion coefficient D), kinetics of aggregation (number of clusters: Nc, the size of the box: L_box, and the particle number concentration: N), flow regime (Gas Knudsen number) morpholoogy (Rg vs. Np, fractal dimension and prefactor), and aggregate polydispersity (GSD of Dv). Finally, the conservation of energy (only kinetic since interaction potential are not simulated) is tested.

In [None]:
from pathlib import Path

import matplotlib.pyplot as plt
import numpy as np
import xarray as xr
import dask
import dask.array as da
from sklearn.linear_model import LinearRegression
from IPython.display import display

from pymcac import dask_distribute
from pymcac import JupyterDaskDistribute
from pymcac import MCAC
from pymcac import groupby_agg
from pymcac import progress_compute
from pymcac import mobility_diameter
from pymcac.tools.core.groupby import groupby_aggregate

In [None]:
def add_missing_quantities(xAggregates, xSpheres):

    xAggregates["log_Rv"] = np.log((6 * xAggregates.Volume / np.pi)) / 3
    xAggregates["Ec"] = 0.5 * xAggregates.Volume * rhop_0 * (xAggregates.lpm / xAggregates.Deltat) ** 2

    xAggregates["Dm"] = mobility_diameter(xAggregates.f_agg,
                           A1=A1, A2=A2, A3=A3, lambda_g=lambda_g, mu_g=mu_g)
    xAggregates["Kn"] = 2 * lambda_g / xAggregates["Dm"]

    # does the job, only slower
#     xMeanRp = groupby_aggregate(xSpheres, xAggregates,
#                                 np.mean,
#                                 ["Radius"],
#                                 {"MeanRp": np.float64})
    xMeanRp = groupby_agg(xSpheres,
                          by=["Time", "Label"],
                          agg=[("Radius", "mean", "Radius")],
                          index_arrays=[xAggregates.kTime, xAggregates.kLabel])
    # rechunking to align xMeanRp on xAggregates
    xMeanRp = xMeanRp.chunk({"k": xAggregates.chunks["k"]})
    # using xMeanRp.data avoid triggering a computation
    # due to possibly misaligned coordinates
    # (groupby_agg sorts by default so this can be skipped)
    xAggregates["DgOverDp"] = xAggregates.Rg / xMeanRp["Radius"].data

    return xAggregates, xSpheres

In [None]:
def time_averaged_quantities(xAggregates, xSpheres):

    time_averaged = groupby_agg(xAggregates,
                                by="Time",
                                index_arrays=xAggregates.Time,
                                agg=[
        ("f_agg", "mean","f_agg"),
        ("Np", "mean","Np"),
        ("Rg", "mean","Rg"),
        ("DgOverDp", "mean","DgOverDp"),
        ("Dm", "mean","Dm"),
        ("Kn", "mean","Kn"),
        ("Nc", "size","Np"),
        ("Ec", "sum","Ec"),
        ("fv", "sum","Volume"),
        ("Rv_geo", "mean","log_Rv"),
        ("sv_geo", "std","log_Rv")
    ])
    BoxVolume = xAggregates.BoxVolume.chunk({"Time": -1})
    time_averaged["fv"] /= BoxVolume
    time_averaged["Rv_geo"] = np.exp(time_averaged.Rv_geo)
    time_averaged["sv_geo"] = np.exp(time_averaged.sv_geo)

    time_averaged["Nc density"] = time_averaged.Nc / BoxVolume
    time_averaged["Diffusion"] = k_B * T_g / time_averaged.f_agg

    time_averaged["Dm_avg"] = mobility_diameter(time_averaged.f_agg,
                                                A1=A1, A2=A2, A3=A3, lambda_g=lambda_g, mu_g=mu_g)
    time_averaged["Kn_avg"] = 2 * lambda_g / time_averaged["Dm_avg"]

    xMeanRp = groupby_agg(xSpheres,
                          by="Time",
                          agg=[("Radius", "mean", "Radius")],
                          index_arrays=xSpheres.Time,)
    # rechunking to align xMeanRp on xAggregates
    xMeanRp = xMeanRp.chunk({"Time": xAggregates.chunks["Time"]})
    # using xMeanRp.data avoid triggering a computation
    # due to possibly misaligned coordinates
    # (groupby_agg sorts by default so this can be skipped)
    time_averaged["DgOverDp_avg"] = time_averaged.Rg / xMeanRp.data

    time_averaged = time_averaged.reset_coords("nLabel")

    return time_averaged

In [None]:
def read(path, tmax=None, nt=None):
    # The folder with all .h5 and .xmf files
    MCACSimulation = MCAC(path)

    # Read all data
    Spheres = MCACSimulation.get_xspheres(spheres_cols, tmax=tmax, nt=nt)
    Aggregates = MCACSimulation.get_xaggregates(aggregates_cols, tmax=tmax, nt=nt)

    # Per aggregate computations
    Aggregates, Spheres = add_missing_quantities(Aggregates, Spheres)

    # Per time-step computations
    time_averaged = time_averaged_quantities(Aggregates, Spheres)

    return MCACSimulation, time_averaged

In [None]:
def plot_compare(ds1, ds2, x, y, xlabel=None, ylabel=None, **kwargs):
    fig = plt.figure(figsize=(10, 6), dpi=80, facecolor='w', edgecolor='k')
    fig.set_tight_layout(False)

    kwargs["ax"] = kwargs.get("ax", plt.gca())

    if x is None:
        x = next(iter(ds1.coords))

    if isinstance(y, str):
        y = [y]

    if isinstance(ds1, xr.DataArray):
        ds1 = ds1.to_dataset()
    if isinstance(ds2, xr.DataArray):
        ds2 = ds2.to_dataset()

    df1 = ds1[[x]+y].to_dataframe()
    df2 = ds2[[x]+y].to_dataframe()

    if x in df1.columns:
        df1 = df1.set_index(x)
        df2 = df2.set_index(x)
    df1 = df1.sort_index()
    df2 = df2.sort_index()

    df1.plot(**kwargs, style="-")
    df2.plot(**kwargs, style="--")

    if xlabel is not None:
        plt.xlabel(xlabel)
    if ylabel is not None:
        plt.ylabel(ylabel)

    plt.legend([f"New {varname}" for varname in y] + [f"Ref {varname}" for varname in y])
    plt.show()

## Parameters <a class="anchor" id="Parameters"></a>

Check these parameters before running the script!

In [None]:
T_g = 1_700  # temperature in K
P_g = 101_300  # pressure in Pa
rhop_0 = 1_800  # particle bulk density in kg/m^3
k_B = 1.38066E-23  # Boltzmann constant in J/K

# gas mean free path and viscosity
lambda_g = 66.5E-9 * (101_300 / P_g) * (T_g / 293.15) * (1 + 110 / 293.15) / (1 + 110 / T_g)  # in m
mu_g = 18.203E-6 * (293.15 + 110) / (T_g + 110) * (T_g / 293.15) ** 1.5  # in Ps*s

# for Cunningham slip correction factor: Cc
A1 = 1.142
A2 = 0.558
A3 = 0.999

spheres_cols = ["Radius", "Label"]
aggregates_cols = ["Volume", "lpm", "Deltat", "f_agg", "Rg", "Np", "BoxVolume"]

polydisperse = False
if polydisperse:
    reference_path = Path("/stockage/samba/Partages/public/MCAC_validation/03_VARYING_PP_polyd/03p1_SIGMAp_1p25/run1")
    result_path = Path("polydisperse_data")
else:
    #reference_path = Path("/stockage/samba/Partages/public/MCAC_validation/02_VARYING_DP/02p3_Dp10nm_np0_3200/run1")
    reference_path = Path("monodisperse_data_ref")
    result_path = Path("monodisperse_data")

In [None]:
# distribute = JupyterDaskDistribute()
# distribute.start()

## Read and compute  <a class="anchor" id="Read_and_compute"></a>

You can look at the dashboard to see progress in more details

In [None]:
with dask_distribute(report=True) as c:
    display(c)
    print("Reading new")
    newSimulation, new_time_averaged = read(result_path, nt=200)

    print("Reading reference")
    refSimulation, ref_time_averaged = read(reference_path, nt=200)

    print("Start compute")
    new_time_averaged, ref_time_averaged = progress_compute(new_time_averaged, ref_time_averaged)
print("Done")

## Results <a class="anchor" id="Results"></a>

In [None]:
new_time_averaged

In [None]:
ref_time_averaged

### Residence time <a class="anchor" id="Residence_time"></a>

In [None]:
print(f" Ref total residence time: "
      f"{float(ref_time_averaged.Time[-1] - ref_time_averaged.Time[0]) * 1e3} (ms) "
      f"({ref_time_averaged.Time.size} it)")
print(f" New total residence time: "
      f"{float(new_time_averaged.Time[-1] - new_time_averaged.Time[0]) * 1e3} (ms) "
      f"({new_time_averaged.Time.size} it)")

### Volume fraction <a class="anchor" id="Volume_fraction"></a>

In [None]:
print(f" ref volume fraction = {float(ref_time_averaged.fv[0]) * 1e+06} ppm")
print(f" new volume fraction = {float(new_time_averaged.fv[0]) * 1e+06} ppm")
plot_compare(new_time_averaged.fv * 1e+06, ref_time_averaged.fv * 1e+06,
             x=None, xlabel="Time (s)",
             y="fv", ylabel="volume fraction (ppm)",
             loglog=False)

### Diffusion coefficient <a class="anchor" id="Diffusion_coefficient"></a>

In [None]:
# Comparison of diffusion coefficients allow to check: temperature, friction coefficient,
# fLuid viscosity. Becareful, f_agg depends on the fractal dimension!
plot_compare(new_time_averaged, ref_time_averaged,
             x="Np", xlabel="Number of monomers per aggregate (mean)",
             y="Diffusion", ylabel="Diffusion coefficient (m^2/s)",
             loglog=True)

### Particle number concentration <a class="anchor" id="Particle_number_concentration"></a>

In [None]:
# plot_compare(new_time_averaged, ref_time_averaged,
#              x=None, xlabel="Time (s)",
#              y="Nc", ylabel="Number of aggregate (-)",
#              loglog=True)
# plot_compare(new_time_averaged, ref_time_averaged,
#              x=None, xlabel="Time (s)",
#              y="BoxSize", ylabel="Box size (m)",
#              loglog=True)
plot_compare(new_time_averaged, ref_time_averaged,
             x=None, xlabel="Time (s)",
             y="Nc density", ylabel="Particle number concentration (-)",
             loglog=True)
print(f"Ref case initialized with {int(ref_time_averaged.Nc[0])} monomers")
print(f"New case initialized with {int(new_time_averaged.Nc[0])} monomers")

### Gas Knudsen number <a class="anchor" id="Knudsen"></a>

In [None]:
plot_compare(new_time_averaged, ref_time_averaged,
             x=None, xlabel="Time (s)",
             y=["Kn", "Kn_avg"], ylabel="Gas Knudsen number",
             loglog=True)

### Radius of gyration <a class="anchor" id="gyration"></a>

In [None]:
# Allows to check the role played by monomer mass is correcly considered for each
# agglomerate. Also, according to Np and the fractal dimension they should have
# comparable values.
plot_compare(new_time_averaged, ref_time_averaged,
             x="Np", xlabel="Number of monomers per aggregate (mean)",
             y="Rg", ylabel="Radius of gyration (m)",
             loglog=True)

In [None]:
plot_compare(new_time_averaged, ref_time_averaged,
             x="Np", xlabel="Number of monomers per aggregate (mean)",
             y=["DgOverDp", "DgOverDp_avg"], ylabel="DgOverDp",
             loglog=True)

In [None]:
# The population-based fractal dimension and prefactor
# Becareful: There is variability on these parameters for individual simulations.

model = LinearRegression()
model.fit(np.log(ref_time_averaged.DgOverDp_avg).values[:, np.newaxis],
          np.log(ref_time_averaged.Np))
print(f"Ref fractal Law: {np.exp(model.intercept_)} x^{model.coef_[0]}")

model.fit(np.log(new_time_averaged.DgOverDp_avg).values[:, np.newaxis],
          np.log(new_time_averaged.Np))
print(f"New fractal Law: {np.exp(model.intercept_)} x^{model.coef_[0]}")

### Aggregate polydispersity  <a class="anchor" id="polydispersity"></a>

In [None]:
plot_compare(new_time_averaged, ref_time_averaged,
             x="Np", xlabel="Number of monomers per aggregate (mean)",
             y="Rv_geo", ylabel="Geometric mean vol-eq. radius, R_v (nm)",
             loglog=False)
plot_compare(new_time_averaged, ref_time_averaged,
             x="Np", xlabel="Number of monomers per aggregate (mean)",
             y="sv_geo", ylabel="Geometric standard deviation, $\sigma_{g,rv}$ (-)",
             loglog=False)

### Total energy  <a class="anchor" id="energy"></a>

The total kinetic energy of the system of $N$ particles is, $$K=\sum_i^{N} \frac{1}{2}m_ic_i^2$$
where $m_i$ and $c_i=\frac{lpm_i}{\Delta t_i}$ are the mass and averaged velocity of the $i^{th}$ particle, respectively.

From the kinetic theory of gases we have, $$\frac{1}{2}m_ic_i^2=\frac{3}{2}k_BT$$

Therefore, the total energy is related to the temperature as, $$E_{tot}=N\frac{3}{2}k_BT=K$$

In [None]:
new_time_averaged["Kinetic energy per aggregate"] = new_time_averaged.Ec / new_time_averaged.Nc
ref_time_averaged["Kinetic energy per aggregate"] = ref_time_averaged.Ec / ref_time_averaged.Nc

print(f"    kb * T = {k_B * T_g}")
print(f" ref Ec/Nc = {float(ref_time_averaged['Kinetic energy per aggregate'][0])}")
print(f" new Ec/Nc = {float(new_time_averaged['Kinetic energy per aggregate'][0])}")
print(f" ref Ec/Nc = {float(ref_time_averaged['Kinetic energy per aggregate'][-1])}")
print(f" new Ec/Nc = {float(new_time_averaged['Kinetic energy per aggregate'][-1])}")

plot_compare(new_time_averaged, ref_time_averaged,
             x="Np", xlabel="Number of monomers per aggregate (mean)",
             y="Kinetic energy per aggregate", ylabel="Kinetic energy per aggregate",
             loglog=False)