### Importing libraries

In [6]:
### Import the required python libraries
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
import glob
import os
import sys
import csv
from scipy import interpolate
from os import listdir
from os.path import isfile, join
import matplotlib.gridspec as gridspec
import inspect
from scipy import interpolate

In [7]:
from fopt_toolkit import fopt_toolkit as fp
for i in inspect.getmembers(fp, inspect.isfunction):
    print(i[0])

get_id
make_dir
plotting_multiple_files


### Get dataframe with metadata information, including sample temperatures and summary data filepaths

In [8]:
metadata_updated = pd.read_csv('../Metadata/project_metadata_updated.csv',dtype={'Date': object, 'Rep':object}, skiprows=0, delimiter= '\t')

## For each wavelength for each file

** Blank-corrected a or c =  (mean_a_or_c_of_sample - mean_a_or_c_of_blank) **

** Temp-corrected a or c = Blank-corrected a or c - (Phi_temp * (Sample_temp - Blank_temp)) **

** Corrected = (sample_mean-blank_mean) - (phi_temp * (sample_temp-blank_temp)) **

## Blank correction

### Get blank values to use for blank correction

** Which samples to use for blank correction? The cleanest of them all! Based on graphs, these are they: **

In [9]:
cleanest_a = 'AC9_072619_sea_cal_a_2_T24_6'
cleanest_c = 'AC9_072619_sea_cal_c_2_T24_6'

** Get dataframes for these samples **

In [10]:
cleanest_a_filepath = list(metadata_updated.loc[metadata_updated.ID == cleanest_a, 'Summary_file_path'])[0]
cleanest_c_filepath = list(metadata_updated.loc[metadata_updated.ID == cleanest_c, 'Summary_file_path'])[0]


In [11]:
cleanest_a_df = pd.read_csv(cleanest_a_filepath,skiprows = 0, delimiter= '\t') 
cleanest_c_df = pd.read_csv(cleanest_c_filepath,skiprows = 0, delimiter= '\t') 

In [12]:
cleanmeans_a = list(cleanest_a_df['a_mean'])
cleanmeans_c = list(cleanest_c_df['c_mean'])

** Get temps for these samples **

In [13]:
cleana_temp = metadata_updated.loc[metadata_updated.ID==cleanest_a,'Temp'].item()
cleanc_temp = metadata_updated.loc[metadata_updated.ID==cleanest_c,'Temp'].item()

  """Entry point for launching an IPython kernel.
  


## Temperature correction values

In [14]:
sullivan_chart = pd.read_csv('../Metadata/sullivan_chart_ac9.csv', skiprows=0, delimiter= '\t')

In [15]:
phi_t = list(sullivan_chart['phi_t'])

In [16]:
phi_t

[0.0, 0.0, 0.0, 0.0001, 0.0, 0.0, 0.0, -0.0001, 0.00415]

## Get dictionary of sample IDs to file dataframes

** Corrected = (sample_mean-blank_mean) - (phi_temp * (sample_temp-blank_temp)) **

In [17]:
ID_to_df = {}
## Iterate through rows of metadata file
for index, row in metadata_updated.iterrows():
    ## Pull information from individual samples as temporary variables
    ID, filepath, analysis_type, sample_type, temp = row['ID'], row['Summary_file_path'], row['Analysis_Type'], row['Sample_Type'], row['Temp']
    ## Skip the calibration samples
    if sample_type != 'cal':
        ## Read in the original summary data, do a little cleanup
        df = pd.read_csv(filepath, skiprows = 0, delimiter= '\t')         
        df.reset_index(inplace=True, drop=False)
        df.rename(columns = {'c_mean':'sample_mean', 'a_mean':'sample_mean', 'c_std':'std','a_std':'std'}, inplace = True)
        ## Subset the data to just include the wl and means, made visualizing easier
        df = df[['wl','sample_mean']]
        ## Add all necessary variables for blank and temp correction to the temp df
        ## (It makes it very easy to do mathematical operations)
        df['sample_temp'] = temp
        df['phi_t'] = phi_t
        if analysis_type == 'a':
            df['blank_mean'] = cleanmeans_a
            df['blank_temp'] = cleana_temp
        if analysis_type == 'c':
            df['blank_mean'] = cleanmeans_c
            df['blank_temp'] = cleanc_temp
        ## Perform blank and temp correction
        df['corrected_mean'] = (df['sample_mean'] - df['blank_mean']) - (df['phi_t'] * (df['sample_temp']-df['blank_temp']))
        ## Add corrected means to dictionary (in the form of a list) with IDs as keys
        ID_to_df[ID] = list(df['corrected_mean'])


** For each filtered or raw sample ID in the dictionary, there is a list of a or c blank- and temperature-corrected means. **

** Now, need to take summary dataframes, replace means with corrected means, and save as new csv files in a new folder. Could have done this all in the same loop...oh well. **

In [18]:
cor_filepath = fp.make_dir('Data/2_summary_stats_tempcor/')

In [19]:
new_file_paths = []
for index, row in metadata_updated.iterrows():
    ## Pull information from individual samples as temporary variables
    ID, filepath, analysis_type, sample_type, temp = row['ID'], row['Summary_file_path'], row['Analysis_Type'], row['Sample_Type'], row['Temp']
    ## Skip the calibration samples
    if sample_type != 'cal':
        ## Read in the original summary data, do a little cleanup
        df = pd.read_csv(filepath, skiprows = 0, delimiter= '\t')         
        df.reset_index(inplace=True, drop=True)
        if analysis_type == 'a':
            df['a_mean'] = ID_to_df[ID]
        if analysis_type == 'c':
            df['c_mean'] = ID_to_df[ID] 
        filename = filepath.split('/')[-1]
        full_path = cor_filepath+filename
        df.to_csv(full_path, sep='\t',index=False)
        new_file_paths.append(full_path)
    else:
        new_file_paths.append(None)

In [20]:
metadata_updated['Cor_summary_file_path'] = new_file_paths
metadata_updated.to_csv('../Metadata/project_metadata_updated.csv', sep='\t',index=False)