# Dusty box analysis

This notebook contains analysis of the dusty box test for multigrain dust.

## Imports

In [None]:
import importlib
import pathlib
from pathlib import Path
from typing import Any, Dict, List, Tuple

In [None]:
import numpy as np
import pandas as pd
import plonk
from bokeh.io import output_notebook, show
from bokeh.layouts import gridplot
from bokeh.models import ColumnDataSource
from bokeh.palettes import Category10
from bokeh.plotting import figure
output_notebook()

In [None]:
import multigrain

## Path to data

In [None]:
root_directory = pathlib.Path('~/runs/multigrain/dustybox').expanduser()
paths_dict = dict([(d.name, sorted(list(d.glob('*')))) for d in list(root_directory.glob('[a-z]*'))])

## Perform analysis

Define analysis function which does the analysis on a set of simulations.

In [None]:
def analysis(paths: List):
    dataframes = dict()

    for p in paths:
        try:
            sim = plonk.load_sim(prefix='dustybox', directory=p)
            name = sim.paths['directory'].name
            print(f'Running analysis on {name}')
            dataframes[name] = multigrain.dustybox.generate_results(sim)
            del sim
        except ValueError:
            pass
        
    return dataframes

Run analysis on simulations.

In [None]:
experiments = dict()
for name, paths in paths_dict.items():
    print(f'--- Experiment: {name} ---')
    experiments[name] = analysis(paths)

## Plot results

Define function to plot results from one simulation.

In [None]:
def plot(name, df):
    """Plot one simulation."""
    palette = Category10[5]

    x = [df['time'] for col in df.columns if col.startswith('data')]
    y_data = [df[col] for col in df.columns if col.startswith('data')]
    y_exact1 = [df[col] for col in df.columns if col.startswith('exact1')]
    y_exact2 = [df[col] for col in df.columns if col.startswith('exact2')]

    fig = figure(title=name)
    fig.multi_line(x, y_exact1, line_dash='solid', line_color=palette, line_width=2)
    fig.multi_line(x, y_exact2, line_dash=[10, 10], line_color=palette, line_width=2)
    for xx, yy, color in zip(x, y_data, palette):
        fig.scatter(xx, yy, line_color=color, fill_color=None, size=6)

    return fig

## Time evolution

In [None]:
ncols = 3
figs = list()

for name, df in experiments['time_evolution'].items():
    fig = plot(name, df)
    figs.append(fig)
    
p = gridplot(figs, ncols=ncols, sizing_mode='stretch_width', plot_height=300)
show(p)

## Accuracy

In [None]:
ncols = 2
figs = list()

for name, df in experiments['accuracy'].items():
    fig = plot(name, df)
    figs.append(fig)
    
p = gridplot(figs, ncols=ncols, sizing_mode='stretch_width', plot_height=300)
show(p)

In [None]:
def calculate_errors(experiments):
    
    def calculate_error(df):
        error = 0.0
        n_dust_species = int((len(df.columns) - 1)/3)
        for idx in range(1, n_dust_species):
            error += (df[f'data.{idx}'] - df[f'exact1.{idx}']).mean() ** 2
        error = np.sqrt(error)
        
        return error
    
    C_force = list()
    errors = list()
    for name, df in experiments['accuracy'].items():
        C_force.append(float(name[8:]))
        errors.append(calculate_error(df))
        
    return pd.DataFrame(np.array([C_force, errors]).T, columns=['C_force', 'error'])

In [None]:
error = calculate_errors(experiments)

In [None]:
source = ColumnDataSource(error)
p = figure(x_axis_type='log', y_axis_type='log')
p.line(x='C_force', y='error', source=source)
p.scatter(x='C_force', y='error', source=source)
p.xaxis.axis_label = 'C_force'
p.yaxis.axis_label = 'Error'
show(p)