# Figure 4
Particle-size distributions. These are created using the *faamasd* library, developed for this paper. Currently, this can be installed using pip via github, following the instructions in the README.md file.

In [1]:
# Standard Library
import os
import re  # regex (not sure why)
from glob import glob
import datetime as dt
#import cmath

# Others
import pandas as pd
import numpy as np
import xarray as xr

import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import matplotlib.dates as mdates
import matplotlib.patches as mpatches
import matplotlib.colors as mcolors
from matplotlib.collections import LineCollection
import matplotlib as mpl

import warnings
warnings.simplefilter("ignore", category=FutureWarning) # for FutureWarning only

import faamasd as asd

In [2]:
base = '/home/users/erinraif/mphase_data/probe_calibration/'

In [3]:
pcasp_scattering_inputs = asd.pcasp_scattering_inputs
cdp_scattering_inputs = asd.cdp_scattering_inputs
mie_scattering_folder = base + 'scattering_files'
cdp_channel_data_folder = base + 'channel_data_CDP'
pcasp_channel_data_folder = base + 'channel_data_PCASP'
attribute_file = base + 'attributes_for_calibration_arrays.txt' # Set to None if not present

In [4]:
pcasp_calibration = asd.produce_calibration_dataset(pcasp_channel_data_folder,pcasp_scattering_inputs,
                                                    attribute_file)
cdp_calibration = asd.produce_calibration_dataset(cdp_channel_data_folder,cdp_scattering_inputs,attribute_file)

In [5]:
pcasp_cal_at_ri = pcasp_calibration.sel(refractive_index = 1.56+0j)
cdp_cal_at_ri = cdp_calibration.sel(refractive_index = 1.56+0j)


In [6]:
timings_data = pd.read_csv(
    '/home/users/erinraif/mphase_data/metadata/M-Phase_filter_start_stops.csv',
    index_col='unique_ID')
#timings_data = timings_data[timings_data['psd_available'] == True]
#timings_data = timings_data.drop(['c276r3t','c278r3t'])

In [7]:
timings_data['short_ID'] = (timings_data.index.str[7:11].str.lower() +
                            timings_data.index.str[12:14].str.lower())
timings_data['flight'] = timings_data['flight'].str.lower()
timings_data['no_pauses'] = timings_data['no_pauses'].fillna(0)

# Convert the 'no_pauses' column to integer data type
timings_data['no_pauses'] = timings_data['no_pauses'].astype(int)

In [11]:
def get_timings(metadata_row):
    """Retrieve filter run timings, including pauses.
    
    Returns a list of consecutive times as ints in SPM. This is in the format
    [start, (pause1 start), (pause1 end), ..., end]
    where pauses are only present where necessary.
    
    Parameters
    ----------
    metadata_row: pandas Series
        row of the metadata pandas dataframe
    
    Returns
    -------
    list of ints
        times in SPM of run events
    """
    leg_start_times = [metadata_row.start_time]
    leg_end_times = [metadata_row.end_time]
    pauses = metadata_row.no_pauses
    for i in range(pauses):
        start_str = 'metadata_row.pause' + str(i+1) + '_end'
        end_str = 'metadata_row.pause' + str(i+1) + '_start'
        leg_start_times.append(eval(start_str))
        leg_end_times.insert(i, eval(end_str))
    return leg_start_times, leg_end_times

In [12]:
def get_linestyle(count, colors, lines):
    c = colors[count % len(colors)]
    ls = lines[count // len(colors)]
    return ls, c
default_colors = mcolors.TABLEAU_COLORS
colors = list(default_colors.keys())
lines=['solid','dotted',(0,(5,1))]

In [13]:
def custom_formatter(x, pos):
    if x < 1:
        return "{:.1f}".format(x)
    else:
        return int(x)


In [23]:
for run in timings_data.itertuples():    
    core_cloud_data_fn = glob(os.path.join(
        '/badc/faam/data/2022',run.flight + '*/core_processed','core-cloud*' + run.flight + '.nc'
    ))[-1]
    core_data_fn = glob(os.path.join(
        '/badc/faam/data/2022',run.flight + '*/core_processed','core_faam*' + run.flight + '.nc'
    ))[-1]
    nev_data_fn = glob(os.path.join(
        '/home/users/erinraif/mphase_data/nevzorov_data',run.flight + '*'))[-1]
    flight_data = asd.get_data(core_cloud_data_fn, core_data_fn)

    # Make Nevzorov cloud-flag data compatible with other datasets
    nev_ds = xr.open_dataset(nev_data_fn, engine='netcdf4',decode_times=False)
    nev_ds = nev_ds.rename_dims({'TIME': 'time'})
    nev_ds = nev_ds.rename({'TIME': 'time'})
    nev_flag = nev_ds.CLRFLG_COMBINED
    leg_start_times, leg_end_times = get_timings(run)
    nev_flag = asd.time_slice_data(leg_start_times, leg_end_times, nev_flag)
    uncorrected_pcasp, corrected_pcasp, pcasp_flow = asd.get_pcasp_data_for_leg(flight_data, leg_start_times, leg_end_times)
    uncorrected_pcasp = uncorrected_pcasp*1000
    corrected_pcasp = corrected_pcasp*1000
    pcasp_psds = asd.get_mean_log_psds(pcasp_cal_at_ri, uncorrected_pcasp, corrected_pcasp, pcasp_flow)
    uncorrected_cdp, corrected_cdp, cdp_flow = asd.get_cdp_data_for_leg(flight_data, leg_start_times, leg_end_times)
    uncorrected_cdp = uncorrected_cdp*1000
    corrected_cdp = corrected_cdp*1000
    try:
        rh_liq = asd.time_slice_data(leg_start_times, leg_end_times, flight_data['RH_LIQ'])
        rh_mask = rh_liq < 80
        nev_mask = nev_flag > 0.5
        comb_mask = rh_mask & nev_mask
    except:
        # if RH_LIQ doesn't work (C330, no idea why, haven't looked)
        nev_mask = nev_flag > 0.5
        comb_mask = nev_mask
        print('rh_liq unavailable run ',run.Index)
    comb_uncorrected_cdp = uncorrected_cdp.where(comb_mask,drop=True)
    comb_corrected_cdp = corrected_cdp.where(comb_mask, drop=True)
    comb_cdp_psds = asd.get_mean_log_psds(cdp_cal_at_ri, comb_uncorrected_cdp, comb_corrected_cdp, cdp_flow)
    dN, dS, dV, dN_err, dS_err, dV_err = asd.integrate_distribution_with_errors(pcasp_psds, comb_cdp_psds)
    df_row = dict(
        run_ID = str(run.Index),
        cdp_assump = 'both',
        dN = dN,
        dS = dS,
        dV = dV,
        dN_err = dN_err,
        dS_err = dS_err,
        dV_err = dV_err
    )
    # MARK CHANGE THE FOLLOWING dNdlogD to other variable names
    # dNdlogD, dSdlogD, dVdlogD
    # dNdlogD_err, dSdlogD_err, dVdlogD_err
    # asymmetric size of x-errors (same for all flights):
    # lin_log_diam_upper_error, lin_log_diam_lower_error
    pcasp_dNdlogD = pcasp_psds.sel(bin=slice(2,29)).dNdlogD.values
    cdp_dNdlogD = comb_cdp_psds.dNdlogD.values
    pcasp_df.loc[run.Index] = pcasp_dNdlogD
    cdp_df.loc[run.Index] = cdp_dNdlogD
pcasp_df.to_csv('pcasp_dNdlogD_mphase.csv')
cdp_df.to_csv('cdp_dNdlogD_mphase.csv')

rh_liq unavailable run  221103_C330_T1_1,000ft
rh_liq unavailable run  221103_C330_T2_7,300ft
