# ABOVE AC9 Data Processing  

## Scatter correction and calculating particulate backscatter

In [27]:
from fopt_toolkit import fopt_toolkit as fp
import pandas as pd
import glob

### Scatter correction

Get path to directory containing particulate (vicariously calibrated) absorption, pre-scatter correction

In [28]:
particulate_absorption_filepath = fp.make_dir('Data/4_vic_averaged_absorption_PRE_SCATTER_CORRECTION/')

Using path, get list of filepaths to csvs of pre-scatter-correction absorption

In [29]:
particulate_absorption_file_list = sorted(glob.glob(particulate_absorption_filepath+'/*.csv'))

Create dictionary where keys are site names (lakes) and entries are dataframes.

In [30]:
dict_of_absorption_dfs = {i.split('/')[-1].split('_')[1]:pd.read_csv(i, skiprows=0, delimiter= '\t', index_col=0) 
               for i in particulate_absorption_file_list}

** Now you have a dictionary of dataframes of particulate absorption, with the keys as lake names and the entries as dataframes. Do with it what you will. **

### Saving corrected particulate absorption files

Right now these are just passing along the uncorrected dataframes, since scatter correction is not present yet.

In [31]:
final_particulate_path = fp.make_dir('Data/5_final_vical/')

In [32]:
for i in dict_of_absorption_dfs:
    df = dict_of_absorption_dfs[i].reset_index()
    filename = final_particulate_path+'Vical_'+i+'_a.csv'
    df.to_csv(filename, sep='\t',index=False)

### Calculating backscatter

Path to final particulate files (from whence we shall pull final particulate absorption and attenuation, and save final particulate scatter)

In [33]:
final_particulate_path = fp.make_dir('Data/5_final_vical/')

Get list of all filepaths to final particulate absorption and attenuation csvs

In [34]:
particulate_final_file_list = sorted(glob.glob(final_particulate_path+'/*.csv'))

Get a list of all sites

In [35]:
site_list = list(set([i.split('/')[-1].split('_')[1] for i in particulate_final_file_list]))

Create empty nested dictionary to store dataframes, so that each site is the key to another dictionary, where the keys are 'a' and 'c' and the entries are the corresponding dataframes

In [36]:
site_to_df_dict = {i:{'a':None, 'c':None} for i in site_list}

Fill dictionary (now we can call each site and get both a and c dataframes at once)

In [37]:
for file_path in particulate_final_file_list:
    file_name_elements = file_path.split('/')[-1].split('_')
    site = file_name_elements[1]
    analysis_type = file_name_elements[2].split('.')[0]
    df = pd.read_csv(file_path, skiprows=0, delimiter= '\t', index_col=0)
    site_to_df_dict[site][analysis_type] = df
    

Function to combine standard deviations

In [38]:
def combine_std(series):
    from math import sqrt
    return sqrt(sum([i**2 for i in series]))

Calculate scatter (b) and save to file containing final particulate dataframes

In [39]:
for site in site_to_df_dict:
    a = site_to_df_dict[site]['a']
    c = site_to_df_dict[site]['c']
    b = c.copy(deep=True)
    b['mean'] = c['mean']-a['mean']
    b['std'] = pd.concat([a['std'], c['std']]).groupby(level=0).agg(combine_std)
    b.rename_axis(None, axis=1)
    filename = 'Vical_'+site+'_b.csv'
    b.to_csv(final_particulate_path+filename, sep='\t',index=True)