### Concatenate averaged raw & fil and vicarious calibration information into formatting for master datasheet

** Import packages **

In [51]:
from fopt_toolkit import fopt_toolkit as fp
import glob
import pandas as pd

** Filepaths for averaged raw/fil and vicarious calibration files **

In [52]:
averaged_filepath = fp.make_dir('Data/4_averaged_summary_stats')
vic_cal_filepath = fp.make_dir('Data/5_final_vical')

** Filepath to store summary data **

In [53]:
new_filepath = fp.make_dir('Data/7_overall_summary')


** Get the list of averaged fil/raw and vicarious calibration csv files from filepath **

In [54]:
vic_file_list = sorted(glob.glob(vic_cal_filepath+'/*.csv'))
average_file_list = sorted(glob.glob(averaged_filepath+'/*.csv'))

In [55]:
raw_file_list = []
fil_file_list = []
for i in average_file_list:
    if 'raw.csv' in i:
        raw_file_list.append(i)
    else:
        fil_file_list.append(i)

** Get dictionaries with dataframes (grouped by sample type) **

In [56]:
def fill_dict(file_list):
    my_dictionary = {'a':[],'b':[],'c':[]}
    for file_path in file_list:
        file_name_cols = file_path.split('/')[-1].split('.')[0].split('_')
        lake_name = file_name_cols[1]
        sample_type = file_name_cols[2]
        df = pd.read_csv(file_path, skiprows=0, delimiter= '\t')
        df['lake'] = lake_name
        my_dictionary[sample_type].append(df)
    return my_dictionary

In [57]:
vic_cal_dfs = fill_dict(vic_file_list)
raw_dfs = fill_dict(raw_file_list)
fil_dfs = fill_dict(fil_file_list)

** Concatenate and combine dataframes by sample type, rename columns **

In [84]:
def get_concat(my_dictionary):
    all_data_frames = []
    for sample_type in my_dictionary:
        ## Get list of dataframes from my_dictionary
        dfs = my_dictionary[sample_type]
        ## Concatenate dfs (stacked)
        concatenated_dfs = pd.concat(dfs)
        ## Pivot dataframe so that each lake is its own row, columns are wavelengths
        df_means = concatenated_dfs.pivot(index='lake',columns='wl',values='mean').rename_axis(None, axis=0).rename_axis(None, axis=1)
        df_stds = concatenated_dfs.pivot(index='lake',columns='wl',values='std').rename_axis(None, axis=0).rename_axis(None, axis=1)
        ## Get new names for columns
        means_column_names = [sample_type+'_'+str(int(i))+'_mean' for i in list(df_means.columns)]
        stds_column_names = [sample_type+'_'+str(int(i))+'_std' for i in list(df_stds.columns)]
        ## Assign new column names to dataframes
        df_means.columns = means_column_names
        df_stds.columns = stds_column_names
        ## Combine means and standard deviations
        df_all = pd.concat([df_means,df_stds], axis = 1)
        ## Sort columns
        df_all = df_all.reindex(sorted(df_all.columns), axis=1)
        all_data_frames.append(df_all)
    all_dfs_all_sample_types = pd.concat(all_data_frames, axis = 1)
    all_dfs_all_sample_types = all_dfs_all_sample_types.reindex(sorted(all_dfs_all_sample_types.columns), axis=1)
    return all_dfs_all_sample_types

In [95]:
def get_concat_2(my_dictionary):
    all_data_frames = []
    for sample_type in my_dictionary:
        dfs = my_dictionary[sample_type]
        concatenated_dfs = pd.concat(dfs)
        x = pd.melt(concatenated_dfs, id_vars=['lake','wl'], value_vars=['mean','std'], 
                              var_name='Statistic', value_name='Value')
        x['Side'] = sample_type
        x['Variable'] = x['Side']+'_'+x['wl'].apply(int).apply(str)+'_'+x['Statistic']
        all_data_frames.append(x)
    all_dfs_all_sample_types = pd.concat(all_data_frames, axis = 0)
    return all_dfs_all_sample_types

In [97]:
particulate = get_concat_2(vic_cal_dfs)
raw = get_concat_2(raw_dfs)
fil = get_concat_2(fil_dfs)

In [85]:
vic_cal_summary = get_concat(vic_cal_dfs)
fil_summary = get_concat(fil_dfs)
raw_summary = get_concat(raw_dfs)

** Save to csv **

In [10]:
vic_cal_summary.to_csv(new_filepath+'/Vical_summary.csv', sep='\t',index=True)
fil_summary.to_csv(new_filepath+'/Fil_summary.csv', sep='\t',index=True)
raw_summary.to_csv(new_filepath+'/Raw_summary.csv', sep='\t',index=True)

In [99]:
particulate.to_csv(new_filepath+'/AC9_particulate_2019.csv', sep=',',index=False)
fil.to_csv(new_filepath+'/AC9_fil_2019.csv', sep=',',index=False)
raw.to_csv(new_filepath+'/AC9_raw_2019.csv', sep=',',index=False)

In [42]:
vic_cal_summary

Unnamed: 0,a_412_mean,a_412_std,a_440_mean,a_440_std,a_488_mean,a_488_std,a_510_mean,a_510_std,a_532_mean,a_532_std,...,c_532_mean,c_532_std,c_555_mean,c_555_std,c_650_mean,c_650_std,c_676_mean,c_676_std,c_715_mean,c_715_std
bea,0.399195,0.435969,0.273086,0.100059,0.157636,0.052441,0.116836,0.04717,0.083273,0.045732,...,0.250713,0.019892,0.212924,0.018553,0.105454,0.015675,0.096915,0.016906,0.073696,0.014857
ch6,1.390796,0.555127,0.992825,0.378413,0.62127,0.258133,0.475683,0.214731,0.34329,0.197074,...,4.089864,0.526573,3.802393,0.487732,3.425342,0.440928,3.145965,0.402782,3.045093,0.391366
dor,0.430769,0.049355,0.343763,0.045167,0.209711,0.041878,0.161069,0.040888,0.122321,0.038998,...,4.682213,0.197615,4.577741,0.194598,4.584928,0.199724,4.490566,0.196933,4.480813,0.202814
egg,3.264439,0.329994,1.914731,0.069112,0.88297,0.035123,0.63578,0.032549,0.443916,0.031952,...,1.125431,0.096452,1.014856,0.095417,0.681986,0.098058,0.784681,0.097402,0.692407,0.097906
flt,1.457741,0.360034,1.000612,0.166564,0.598002,0.148503,0.432479,0.142911,0.32109,0.139239,...,3.696387,0.305628,3.48771,0.300913,3.185815,0.327414,3.036848,0.306696,2.950927,0.318126
gst,0.401159,0.063227,0.311192,0.058308,0.200255,0.054248,0.167615,0.053026,0.13186,0.051793,...,2.645833,0.349591,2.613588,0.34814,2.566528,0.351872,2.543261,0.358918,2.520538,0.360664
lim,2.069345,0.033856,1.547445,0.027685,1.034709,0.023423,0.832306,0.02308,0.650062,0.021011,...,5.514857,0.08337,5.229221,0.084757,4.489671,0.088064,4.316256,0.088532,4.088486,0.092613
lit,1.673662,0.045533,1.353587,0.039583,0.771685,0.034376,0.596899,0.032447,0.433122,0.029927,...,5.149466,0.213338,4.722927,0.213386,3.344234,0.211581,3.333823,0.217039,2.823834,0.220116
rat,7.25624,0.094034,4.785509,0.028148,2.828173,0.023656,2.147627,0.02214,1.588864,0.021108,...,4.54494,0.155197,4.0533,0.155192,2.814583,0.155592,2.698712,0.155864,2.422888,0.160895
rob,0.49716,0.116551,0.416073,0.123075,0.274816,0.125068,0.192542,0.115592,0.122851,0.108837,...,3.632282,0.283284,3.549092,0.281488,3.54497,0.293735,3.39803,0.287109,3.393715,0.296515


In [24]:
pd.melt(vic_cal_summary)

Unnamed: 0,variable,value
0,a_412_mean,0.399195
1,a_412_mean,1.390796
2,a_412_mean,0.430769
3,a_412_mean,3.264439
4,a_412_mean,1.457741
5,a_412_mean,0.401159
6,a_412_mean,2.069345
7,a_412_mean,1.673662
8,a_412_mean,7.256240
9,a_412_mean,0.497160


*** File for just particulate absorption ***

In [11]:
concatenated_dfs = pd.concat(vic_cal_dfs['a'])
concatenated_dfs_2 = concatenated_dfs.drop(concatenated_dfs.columns[[0]], axis=1)
concatenated_dfs_2.to_csv(new_filepath+'/particulate_abs_melted_summary.csv', sep=',',index=False)

In [25]:
concatenated_dfs_2

Unnamed: 0,wl,mean,std,lake
0,412.0,0.399195,0.435969,bea
1,440.0,0.273086,0.100059,bea
2,488.0,0.157636,0.052441,bea
3,510.0,0.116836,0.047170,bea
4,532.0,0.083273,0.045732,bea
5,555.0,0.054841,0.067716,bea
6,650.0,0.013671,0.087536,bea
7,676.0,0.014514,0.146428,bea
8,715.0,0.000000,0.132478,bea
0,412.0,1.390796,0.555127,ch6


In [17]:
concatenated_dfs_fil = pd.concat(fil_dfs['a'])
concatenated_dfs_fil.to_csv(new_filepath+'/fil_abs_melted_summary.csv', sep=',',index=False)

In [18]:
concatenated_dfs_raw = pd.concat(raw_dfs['a'])
concatenated_dfs_raw.to_csv(new_filepath+'/raw_abs_melted_summary.csv', sep=',',index=False)