### Concatenate vicarious calibration information into formatting for master datasheet

** Predefined wavelengths: weren't included in averaged files, may go back and do that later **

In [1]:
wavelengths = [412.0, 440.0, 488.0, 510.0, 532.0, 555.0, 650.0, 676.0, 715.0]

** Import packages **

In [20]:
from fopt_toolkit import fopt_toolkit as fp
import glob
import pandas as pd

** Filepath for vicarious calibration files **

In [125]:
vic_cal_filepath = fp.make_dir('Data/3_2_vic_averaged_summary_stats')
averaged_filepath = fp.make_dir('Data/3_1_averaged_summary_stats/')

** Filepath to store summary data **

In [126]:
new_filepath = fp.make_dir('Data/4_summary_info')

** Get the list of vicarious calibration csv files from filepath **

In [128]:
vic_file_list = sorted(glob.glob(vic_cal_filepath+'/*.csv'))
averaged_file_list = sorted(glob.glob(averaged_filepath+'/*.csv'))

In [131]:
averaged_fil_file_list = []
averaged_raw_file_list = []

for i in averaged_file_list:
    sample_type = i.split('_')[-1].strip('.csv')
    if sample_type == 'fil':
        averaged_fil_file_list.append(i)
    else:
        averaged_raw_file_list.append(i)

** Empty dictionary for storing filepaths according to sample type ('a': absorbance, 'b': backscatter, 'c': attenuation) **

In [113]:
vic_cal_dfs = {'a':[],'b':[],'c':[]}

In [133]:
fil_cal_dfs = {'a':[],'b':[],'c':[]}

In [134]:
raw_cal_dfs = {'a':[],'b':[],'c':[]}

** Fill dictionary 'vic_cal_dfs' with dataframes (grouped by sample type) **

In [114]:
for file_path in file_list:
    file_name_cols = file_path.split('/')[-1].split('.')[0].split('_')
    lake_name = file_name_cols[1]
    sample_type = file_name_cols[2]
    df = pd.read_csv(file_path, skiprows=0, delimiter= '\t')
    df['wl'] = wavelengths
    df['lake'] = lake_name
    vic_cal_dfs[sample_type].append(df)

** Concatenate dataframes by sample type, rename columns **

In [117]:
all_data_frames = []
for sample_type in vic_cal_dfs:
    ## Get list of dataframes from vic_cal_dfs
    dfs = vic_cal_dfs[sample_type]
    ## Concatenate dfs (stacked)
    concatenated_dfs = pd.concat(dfs)
    ## Pivot dataframe so that each lake is its own row, columns are wavelengths
    df_means = concatenated_dfs.pivot(index='lake',columns='wl',values='mean').rename_axis(None, axis=0).rename_axis(None, axis=1)
    df_stds = concatenated_dfs.pivot(index='lake',columns='wl',values='std').rename_axis(None, axis=0).rename_axis(None, axis=1)
    ## Get new names for columns
    means_column_names = [sample_type+'_'+str(int(i))+'_mean' for i in list(df_means.columns)]
    stds_column_names = [sample_type+'_'+str(int(i))+'_std' for i in list(df_stds.columns)]
    ## Assign new column names to dataframes
    df_means.columns = means_column_names
    df_stds.columns = stds_column_names
    ## Combine means and standard deviations
    df_all = pd.concat([df_means,df_stds], axis = 1)
    ## Sort columns
    df_all = df_all.reindex_axis(sorted(df_all.columns), axis=1)
    all_data_frames.append(df_all)

** Combine all data into one dataframe **

In [118]:
all_dfs_all_sample_types = pd.concat(all_data_frames, axis = 1)
all_dfs_all_sample_types = all_dfs_all_sample_types.reindex_axis(sorted(all_dfs_all_sample_types.columns), axis=1)

** Save to csv **

In [123]:
all_dfs_all_sample_types.to_csv(new_filepath+'/Vical_summary.csv', sep='\t',index=True)