### Concatenate averaged raw & fil and vicarious calibration information into formatting for master datasheet

** Import packages **

In [21]:
from fopt_toolkit import fopt_toolkit as fp
import glob
import pandas as pd

** Filepaths for averaged raw/fil and vicarious calibration files **

In [22]:
vic_cal_filepath = fp.make_dir('Data/5_final_vical')
averaged_filepath = fp.make_dir('Data/4_averaged_summary_stats')

** Filepath to store summary data **

In [23]:
new_filepath = fp.make_dir('Data/7_overall_summary')

** Get the list of averaged fil/raw and vicarious calibration csv files from filepath **

In [24]:
vic_file_list = sorted(glob.glob(vic_cal_filepath+'/*.csv'))
average_file_list = sorted(glob.glob(averaged_filepath+'/*.csv'))

In [25]:
raw_file_list = []
fil_file_list = []
for i in average_file_list:
    if 'raw.csv' in i:
        raw_file_list.append(i)
    else:
        fil_file_list.append(i)

** Get dictionaries with dataframes (grouped by sample type) **

In [26]:
def fill_dict(file_list, scatter=True):
    if scatter is True:
        my_dictionary = {'a':[],'b':[],'c':[]}
    else:
        my_dictionary = {'a':[],'c':[]}
    for file_path in file_list:
        file_name_cols = file_path.split('/')[-1].split('.')[0].split('_')
        lake_name = file_name_cols[1]
        sample_type = file_name_cols[2]
        df = pd.read_csv(file_path, skiprows=0, delimiter= '\t')
        df['lake'] = lake_name
        my_dictionary[sample_type].append(df)
    return my_dictionary

In [27]:
vic_cal_dfs = fill_dict(vic_file_list)
raw_dfs = fill_dict(raw_file_list)
fil_dfs = fill_dict(fil_file_list)

In [43]:
raw_file_list

['/Users/scintillation/Documents/GitHub/FreshwatersOpticsToolkit/Data/4_averaged_summary_stats/Averaged_bea_a_raw.csv',
 '/Users/scintillation/Documents/GitHub/FreshwatersOpticsToolkit/Data/4_averaged_summary_stats/Averaged_bea_b_raw.csv',
 '/Users/scintillation/Documents/GitHub/FreshwatersOpticsToolkit/Data/4_averaged_summary_stats/Averaged_bea_c_raw.csv',
 '/Users/scintillation/Documents/GitHub/FreshwatersOpticsToolkit/Data/4_averaged_summary_stats/Averaged_ch6_a_raw.csv',
 '/Users/scintillation/Documents/GitHub/FreshwatersOpticsToolkit/Data/4_averaged_summary_stats/Averaged_ch6_b_raw.csv',
 '/Users/scintillation/Documents/GitHub/FreshwatersOpticsToolkit/Data/4_averaged_summary_stats/Averaged_ch6_c_raw.csv',
 '/Users/scintillation/Documents/GitHub/FreshwatersOpticsToolkit/Data/4_averaged_summary_stats/Averaged_dor_a_raw.csv',
 '/Users/scintillation/Documents/GitHub/FreshwatersOpticsToolkit/Data/4_averaged_summary_stats/Averaged_dor_b_raw.csv',
 '/Users/scintillation/Documents/GitHub/

** Concatenate and combine dataframes by sample type, rename columns **

In [29]:
def get_concat(my_dictionary):
    all_data_frames = []
    for sample_type in my_dictionary:
        ## Get list of dataframes from my_dictionary
        dfs = my_dictionary[sample_type]
        ## Concatenate dfs (stacked)
        concatenated_dfs = pd.concat(dfs)
        ## Pivot dataframe so that each lake is its own row, columns are wavelengths
        df_means = concatenated_dfs.pivot(index='lake',columns='wl',values='mean').rename_axis(None, axis=0).rename_axis(None, axis=1)
        df_stds = concatenated_dfs.pivot(index='lake',columns='wl',values='std').rename_axis(None, axis=0).rename_axis(None, axis=1)
        ## Get new names for columns
        means_column_names = [sample_type+'_'+str(int(i))+'_mean' for i in list(df_means.columns)]
        stds_column_names = [sample_type+'_'+str(int(i))+'_std' for i in list(df_stds.columns)]
        ## Assign new column names to dataframes
        df_means.columns = means_column_names
        df_stds.columns = stds_column_names
        ## Combine means and standard deviations
        df_all = pd.concat([df_means,df_stds], axis = 1)
        ## Sort columns
        df_all = df_all.reindex(sorted(df_all.columns), axis=1)
        all_data_frames.append(df_all)
    all_dfs_all_sample_types = pd.concat(all_data_frames, axis = 1)
    all_dfs_all_sample_types = all_dfs_all_sample_types.reindex(sorted(all_dfs_all_sample_types.columns), axis=1)
    return all_dfs_all_sample_types

In [36]:
dfs = fil_dfs['a']
        ## Concatenate dfs (stacked)
concatenated_dfs = pd.concat(dfs)

In [38]:
df_means = concatenated_dfs.pivot(index='lake',columns='wl',values='mean').rename_axis(None, axis=0).rename_axis(None, axis=1)


In [39]:
df_means

Unnamed: 0,412.0,440.0,488.0,510.0,532.0,555.0,650.0,676.0,715.0
bea,4.096789,2.864019,1.922016,1.683106,1.496809,1.360035,1.116762,1.097409,1.031823
ch6,3.754134,2.421812,1.437218,1.153294,1.008096,0.961315,0.78582,0.689139,0.712909
dor,2.792985,1.919414,1.276419,1.133394,1.026247,0.942682,0.798766,0.786012,0.749659
egg,16.779353,10.348803,5.34982,4.15199,3.236153,2.492249,1.091991,0.929152,0.676786
flt,5.334881,3.628133,2.333191,1.989251,1.766941,1.667248,1.350828,1.298575,1.252814
gst,2.05116,1.572973,1.166641,1.062914,0.979948,0.916295,0.804942,0.804784,0.734155
lim,7.814719,5.187099,3.081641,2.535268,2.109746,1.78315,1.096455,1.033741,0.851563
lit,6.96429,5.1776,3.635649,3.200915,2.857893,2.586936,2.194684,2.350307,1.94322
rat,16.523725,10.373091,5.80694,4.715655,3.906761,3.30242,2.088881,1.93816,1.711911
rob,12.019942,8.039374,4.946395,4.14149,3.516272,3.020026,2.261977,2.248483,1.927516


In [41]:
vic_cal_summary = get_concat(vic_cal_dfs)

In [42]:
raw_summary = get_concat(raw_dfs)

KeyError: 'wl'

In [40]:
vic_cal_summary = get_concat(vic_cal_dfs)
fil_summary = get_concat(fil_dfs)
raw_summary = get_concat(raw_dfs)

KeyError: 'wl'

** Save to csv **

In [None]:
vic_cal_summary.to_csv(new_filepath+'/Vical_summary.csv', sep='\t',index=True)
fil_summary.to_csv(new_filepath+'/Fil_summary.csv', sep='\t',index=True)
raw_summary.to_csv(new_filepath+'/Raw_summary.csv', sep='\t',index=True)