### Concatenate averaged raw & fil and vicarious calibration information into formatting for master datasheet

** Import packages **

In [1]:
from fopt_toolkit import fopt_toolkit as fp
import glob
import pandas as pd

** Filepaths for averaged raw/fil and vicarious calibration files **

In [2]:
vic_cal_filepath = fp.make_dir('Data/5_final_vical')
averaged_filepath = fp.make_dir('Data/4_averaged_summary_stats')

** Filepath to store summary data **

In [3]:
new_filepath = fp.make_dir('Data/7_overall_summary')

** Get the list of averaged fil/raw and vicarious calibration csv files from filepath **

In [4]:
vic_file_list = sorted(glob.glob(vic_cal_filepath+'/*.csv'))
average_file_list = sorted(glob.glob(averaged_filepath+'/*.csv'))

In [5]:
raw_file_list = []
fil_file_list = []
for i in average_file_list:
    if 'raw.csv' in i:
        raw_file_list.append(i)
    else:
        fil_file_list.append(i)

** Get dictionaries with dataframes (grouped by sample type) **

In [6]:
def fill_dict(file_list, scatter=True):
    if scatter is True:
        my_dictionary = {'a':[],'b':[],'c':[]}
    else:
        my_dictionary = {'a':[],'c':[]}
    for file_path in file_list:
        file_name_cols = file_path.split('/')[-1].split('.')[0].split('_')
        lake_name = file_name_cols[1]
        sample_type = file_name_cols[2]
        df = pd.read_csv(file_path, skiprows=0, delimiter= '\t')
        df['lake'] = lake_name
        my_dictionary[sample_type].append(df)
    return my_dictionary

In [7]:
vic_cal_dfs = fill_dict(vic_file_list)
raw_dfs = fill_dict(raw_file_list, scatter=False)
fil_dfs = fill_dict(fil_file_list, scatter=False)

In [15]:
fil_dfs

{'a': [       mean       std lake
  0  4.239384  0.242641  9mi
  1  2.436978  0.048707  9mi
  2  1.099925  0.044550  9mi
  3  0.819111  0.114747  9mi
  4  0.629009  0.274199  9mi
  5  0.474481  0.230942  9mi
  6  0.179823  0.128404  9mi
  7  0.169928  0.039547  9mi
  8  0.097893  0.032967  9mi,         mean       std lake
  0  37.960713  4.469452  bai
  1  25.904423  4.823540  bai
  2  15.100286  4.848345  bai
  3  12.538910  4.921509  bai
  4  10.520779  4.973956  bai
  5   9.073974  5.078033  bai
  6   5.317102  4.819553  bai
  7   4.732312  4.923445  bai
  8   4.051169  4.827392  bai,        mean       std lake
  0  4.410525  0.010037  boo
  1  2.617915  0.005565  boo
  2  1.221546  0.004136  boo
  3  0.912698  0.003336  boo
  4  0.706348  0.003337  boo
  5  0.530661  0.003545  boo
  6  0.181173  0.002718  boo
  7  0.156726  0.002622  boo
  8  0.101234  0.002490  boo,        mean       std lake
  0  9.147788  0.251112  cbr
  1  5.382436  0.053377  cbr
  2  2.404677  0.016669  cbr
  

** Concatenate and combine dataframes by sample type, rename columns **

In [12]:
def get_concat(my_dictionary):
    all_data_frames = []
    for sample_type in my_dictionary:
        ## Get list of dataframes from my_dictionary
        dfs = my_dictionary[sample_type]
        ## Concatenate dfs (stacked)
        concatenated_dfs = pd.concat(dfs)
        ## Pivot dataframe so that each lake is its own row, columns are wavelengths
        df_means = concatenated_dfs.pivot(index='lake',columns='wl',values='mean').rename_axis(None, axis=0).rename_axis(None, axis=1)
        df_stds = concatenated_dfs.pivot(index='lake',columns='wl',values='std').rename_axis(None, axis=0).rename_axis(None, axis=1)
        ## Get new names for columns
        means_column_names = [sample_type+'_'+str(int(i))+'_mean' for i in list(df_means.columns)]
        stds_column_names = [sample_type+'_'+str(int(i))+'_std' for i in list(df_stds.columns)]
        ## Assign new column names to dataframes
        df_means.columns = means_column_names
        df_stds.columns = stds_column_names
        ## Combine means and standard deviations
        df_all = pd.concat([df_means,df_stds], axis = 1)
        ## Sort columns
        df_all = df_all.reindex(sorted(df_all.columns), axis=1)
        all_data_frames.append(df_all)
    all_dfs_all_sample_types = pd.concat(all_data_frames, axis = 1)
    all_dfs_all_sample_types = all_dfs_all_sample_types.reindex(sorted(all_dfs_all_sample_types.columns), axis=1)
    return all_dfs_all_sample_types

In [13]:
vic_cal_summary = get_concat(vic_cal_dfs)
fil_summary = get_concat(fil_dfs)
raw_summary = get_concat(raw_dfs)

KeyError: 'wl'

** Save to csv **

In [None]:
vic_cal_summary.to_csv(new_filepath+'/Vical_summary.csv', sep='\t',index=True)
fil_summary.to_csv(new_filepath+'/Fil_summary.csv', sep='\t',index=True)
raw_summary.to_csv(new_filepath+'/Raw_summary.csv', sep='\t',index=True)