### Concatenate averaged raw & fil and vicarious calibration information into formatting for master datasheet

** Predefined wavelengths: weren't included in averaged files, may go back and do that later **

In [23]:
wavelengths_num = [412.0, 440.0, 488.0, 510.0, 532.0, 555.0, 650.0, 676.0, 715.0]
wavelengths = [int(i) for i in wavelengths_num]

** Import packages **

In [24]:
from fopt_toolkit import fopt_toolkit as fp
import glob
import pandas as pd

** Filepaths for averaged raw/fil and vicarious calibration files **

In [25]:
vic_cal_filepath = fp.make_dir('Data/3_2_vic_averaged_summary_stats')
averaged_filepath = fp.make_dir('Data/3_1_averaged_summary_stats')

** Filepath to store summary data **

In [26]:
new_filepath = fp.make_dir('Data/4_overall_summary')

** Get the list of averaged fil/raw and vicarious calibration csv files from filepath **

In [27]:
vic_file_list = sorted(glob.glob(vic_cal_filepath+'/*.csv'))
average_file_list = sorted(glob.glob(averaged_filepath+'/*.csv'))

In [28]:
raw_file_list = []
fil_file_list = []
for i in average_file_list:
    if 'raw.csv' in i:
        raw_file_list.append(i)
    else:
        fil_file_list.append(i)

** Get dictionaries with dataframes (grouped by sample type) **

In [29]:
def fill_dict(file_list):
    my_dictionary = {'a':[],'b':[],'c':[]}
    for file_path in file_list:
        file_name_cols = file_path.split('/')[-1].split('.')[0].split('_')
        lake_name = file_name_cols[1]
        sample_type = file_name_cols[2]
        df = pd.read_csv(file_path, skiprows=0, delimiter= '\t')
        df['wl'] = wavelengths
        df['lake'] = lake_name
        my_dictionary[sample_type].append(df)
    return my_dictionary

In [30]:
vic_cal_dfs = fill_dict(vic_file_list)
raw_dfs = fill_dict(raw_file_list)
fil_dfs = fill_dict(fil_file_list)

** Concatenate and combine dataframes by sample type, rename columns **

In [31]:
def get_concat(my_dictionary):
    all_data_frames = []
    for sample_type in my_dictionary:
        ## Get list of dataframes from my_dictionary
        dfs = my_dictionary[sample_type]
        ## Concatenate dfs (stacked)
        concatenated_dfs = pd.concat(dfs)
        ## Pivot dataframe so that each lake is its own row, columns are wavelengths
        df_means = concatenated_dfs.pivot(index='lake',columns='wl',values='mean').rename_axis(None, axis=0).rename_axis(None, axis=1)
        df_stds = concatenated_dfs.pivot(index='lake',columns='wl',values='std').rename_axis(None, axis=0).rename_axis(None, axis=1)
        ## Get new names for columns
        means_column_names = [sample_type+'_'+str(int(i))+'_mean' for i in list(df_means.columns)]
        stds_column_names = [sample_type+'_'+str(int(i))+'_std' for i in list(df_stds.columns)]
        ## Assign new column names to dataframes
        df_means.columns = means_column_names
        df_stds.columns = stds_column_names
        ## Combine means and standard deviations
        df_all = pd.concat([df_means,df_stds], axis = 1)
        ## Sort columns
        df_all = df_all.reindex_axis(sorted(df_all.columns), axis=1)
        all_data_frames.append(df_all)
    all_dfs_all_sample_types = pd.concat(all_data_frames, axis = 1)
    all_dfs_all_sample_types = all_dfs_all_sample_types.reindex_axis(sorted(all_dfs_all_sample_types.columns), axis=1)
    return all_dfs_all_sample_types

In [32]:
vic_cal_summary = get_concat(vic_cal_dfs)
fil_summary = get_concat(fil_dfs)
raw_summary = get_concat(raw_dfs)

In [34]:
raw_summary

Unnamed: 0,a_412_mean,a_412_std,a_440_mean,a_440_std,a_488_mean,a_488_std,a_510_mean,a_510_std,a_532_mean,a_532_std,...,c_532_mean,c_532_std,c_555_mean,c_555_std,c_650_mean,c_650_std,c_676_mean,c_676_std,c_715_mean,c_715_std
9mi,6.079409,0.055966,3.952569,0.055771,2.185625,0.048877,1.755109,0.051379,1.428504,0.042189,...,4.221323,0.120528,3.870104,0.122884,3.063242,0.12049,2.921051,0.115386,2.725044,0.110235
bai,44.515696,4.538274,31.91253,4.756697,20.452499,5.221019,17.555776,5.183311,15.261038,4.899393,...,33.34385,27.562481,31.493777,27.823853,27.455943,27.576024,25.195045,26.163311,25.416481,28.051185
boo,5.161771,0.115469,3.208003,0.111161,1.612685,0.111592,1.242498,0.102735,0.984085,0.106678,...,1.590649,0.198359,1.360649,0.193888,0.918385,0.18857,0.868077,0.20063,0.790738,0.197578
cbr,10.020215,0.26323,6.032584,0.174528,2.811861,0.108578,2.117284,0.104221,1.617375,0.09141,...,2.225751,0.179185,1.816195,0.179154,1.013675,0.168163,0.92478,0.176926,0.803771,0.170852
gre,2.588448,0.140001,1.721027,0.137017,0.966645,0.134136,0.772083,0.129123,0.638295,0.129914,...,2.128346,0.252265,1.935193,0.25119,1.499913,0.253395,1.456038,0.258624,1.326777,0.258718
sco,4.289649,0.127722,3.093894,0.115397,1.873059,0.101241,1.549112,0.097525,1.290416,0.096991,...,4.146949,0.214999,3.795844,0.218704,2.922919,0.210904,2.760781,0.211249,2.491906,0.211527
y17,11.717587,0.341889,9.158723,0.349471,6.252699,0.313065,5.436517,0.2959,4.708248,0.270243,...,15.219374,0.776718,14.367033,0.761566,12.105852,0.722707,11.727665,0.707185,10.750656,0.670646
y20,5.439816,0.146895,3.326468,0.144228,1.703035,0.135121,1.327465,0.137067,1.059344,0.129655,...,1.662729,0.302874,1.416102,0.295672,0.954035,0.29445,0.898879,0.303164,0.822909,0.286831


** Save to csv **

In [33]:
vic_cal_summary.to_csv(new_filepath+'/Vical_summary.csv', sep='\t',index=True)
fil_summary.to_csv(new_filepath+'/Fil_summary.csv', sep='\t',index=True)
raw_summary.to_csv(new_filepath+'/Raw_summary.csv', sep='\t',index=True)