# ABOVE AC9 Data Processing  

## Part 2: Temperature and Salinity Corrections for Calibration Files

**Authors:** Catherine Kuhn and Elena Terzić   
**Last Updated:** August, 15th, 2018

This code reads in AC9 files of summary statistics and outputs a temperature and salinity corrected version of the file. Correction values are from Sullivan et al 2006 (Applied Optics). 

*Note: Focus on getting good pure water calibrations. Note the magnitude and shape of the pure water spectra; they should be repeatable to within the instrumented resolution between calibrations(0.005 m-1 for absorption; 0.01 m-1 for attenuation)

### Load required packages

In [1]:
### Import the required python libraries
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
import glob
import os
import sys
import csv
from scipy import interpolate
from os import listdir
from os.path import isfile, join
import matplotlib.gridspec as gridspec

### Load corrections 

In [2]:
%cd /Users/ckuhn/Documents/ABOVE/Resources/AC9_resources/Elena/ac9_originals/Code

df1 = pd.read_csv('Sullivan_T_chart.txt', skiprows=0, delimiter= ' ')
df2 = df1[['wl[nm]', 'phi_t', 'phi_s_a', 'phi_s_c']]     # read just the columns of interest

/Users/ckuhn/Documents/ABOVE/Resources/AC9_resources/Elena/ac9_originals/Code


In [3]:
wl_phi  = df2['wl[nm]']
phi_t   = df2['phi_t']
phi_s_a = df2['phi_s_a']
phi_s_c = df2['phi_s_c']

#### Choose index manually from 0 to 5 for the 6 site

During this step, you will need to manually change the index to run this code site by site for each site. 


In [4]:
idx = 0 #from 0 to 5

Read in files

In [5]:
%cd /Users/ckuhn/Documents/ABOVE/Data/AC9/ac9_data/

site01 = sorted(glob.glob('2_summary_stats/cal/*070718_sea*.csv'))
site02 = sorted(glob.glob('2_summary_stats/cal/*071318_fai*.csv'))
site03 = sorted(glob.glob('2_summary_stats/cal/*071618_can*.csv'))
site04 = sorted(glob.glob('2_summary_stats/cal/*071718_can*.csv'))
site05 = sorted(glob.glob('2_summary_stats/cal/*071818_can*.csv'))
site06 = sorted(glob.glob('2_summary_stats/cal/*071918_can*.csv'))
site07 = sorted(glob.glob('2_summary_stats/cal/*072118_fai*.csv'))
site08 = sorted(glob.glob('2_summary_stats/cal/*072118_fav*.csv'))
site09 = sorted(glob.glob('2_summary_stats/cal/*081618_sea*.csv'))

/Users/ckuhn/Documents/ABOVE/Data/AC9/ac9_data


In [6]:
# Create a list of the sites 
sites = [site01, site02, site03, site04, site05, site06, site07, site08, site09]   
sitenames = ['070718_sea', '071318_fai', '071618_can', '071718_can', '071818_can', '071918_can','072118_fai', '072118_fav', '081618_sea']

In [7]:
# paths for the processed calibration files
mypaths = ['2_summary_stats/cal/', '2_summary_stats/cal/', '2_summary_stats/cal/', 
           '2_summary_stats/cal/', '2_summary_stats/cal/', '2_summary_stats/cal/',
           '2_summary_stats/cal/', '2_summary_stats/cal/', '2_summary_stats/cal/']

In [8]:
# Pick the indexed site, site name and file path 
site     = sites[idx]
sitename = sitenames[idx]
mypath    = mypaths[idx]

# Each site has several files associated with it
# Make a list of the files associated with the selected site
onlyfiles_aux = [f for f in listdir(mypath) if isfile(join(mypath, f))]
onlyfiles     = [f for f in onlyfiles_aux if sitename in f]

# Parse information out of all the files for the selected site
instrument  = [str(i[0:3]) for i in onlyfiles if 'AC9' in i] 
site_date   = [str(i[4:14]) for i in onlyfiles if 'AC9' in i]
site        = [str(i[11:14]) for i in onlyfiles if 'AC9' in i]
date        = [str(i[4:10]) for i in onlyfiles if 'AC9' in i]
a_or_c      = [str(i[19:20]) for i in onlyfiles  if 'AC9' in i]
sample_type = [str(i[15:18]) for i in onlyfiles if 'AC9' in i]
temp        = [float((str(i[24:28])).replace('_', '.')) for i in onlyfiles  if 'AC9' in i]
rep         = [str(i[21:22]) for i in onlyfiles  if 'AC9' in i]

In [9]:
# Make a list of calibration and sample files
cal = [f for f in sample_type if 'cal' in f] 

In [10]:
# This assigns a salinity value of either (0) to each sample 
# Pretty pointless for freshwater samples 
S_cal    = [0. for f in onlyfiles if 'cal' in f] 

In [11]:
a_side_cal    = sorted(glob.glob(mypath + instrument[0] + '_' + site_date[0] + '_'+ str(cal[0]) + '_a_' + '*'))
c_side_cal    = sorted(glob.glob(mypath + instrument[0] + '_' + site_date[0] + '_'+ str(cal[0]) + '_c_' + '*'))

In [14]:
a_side_cal

['2_summary_stats/cal/AC9_070718_sea_cal_a_1_T20_6.csv',
 '2_summary_stats/cal/AC9_070718_sea_cal_a_2_T20_9.csv',
 '2_summary_stats/cal/AC9_070718_sea_cal_a_3_T20_9.csv']

#### Check if you have more than one cal/sample files to try both (all) of them --> if you have more than 4 files within a group

In [13]:
# First method: 
# If there are replicates, just pick one of them 
cal_idx = 1
a_cal    = a_side_cal[cal_idx]            
c_cal    = c_side_cal[cal_idx] 

In [None]:
## Here is a good place to average replicates instead of just taking one file 

In [None]:
# Read in the selected files 
dfcal_a = pd.read_csv(a_cal[0], skiprows=0, delimiter = '\t') ; 
dfcal_c = pd.read_csv(c_cal, skiprows=0, delimiter = '\t')

#### Statistics (which will be later used for error propagation

In [19]:
cal_a_mean    = dfcal_a['a_mean']     ;  cal_c_mean = dfcal_c['c_mean']          
cal_a_std = dfcal_a['a_std']          ;  cal_c_std = dfcal_c['c_std']

In [20]:
# This function takes the input a and c data and corrects it for 
# temperature and salinity

# # INPUTS: 
#     list of a wavelengths
#     list of c wavelengths
#     values for a wavelengths
#     values for c wavelengths
#     the difference in temperature between the sample and calibration values
#     the difference in salinity between the sample and the calibration values
#     the Sullivan temperature correction coefficients

# # OUTPUTS: 
#     one list of wavelengths (same for a and c now)
#     uncorrected a values
#     uncorrected c values 
#     corrected a values
#     corrected c values
    

def actempsalcorr(wl_a, wl_c, a, c, delta_t, delta_s):

    df1     = pd.read_csv('/Users/ckuhn/Documents/ABOVE/Resources/AC9_resources/Elena/ac9_originals/Code/Sullivan_T_chart.txt', skiprows=0, delimiter= ' ')
    df2     = df1[['wl[nm]', 'phi_t', 'phi_s_a', 'phi_s_c']]
    wl_phi  = df2['wl[nm]'].astype(float)
    phi_t   = df2['phi_t'].astype(float)
    phi_s_a = df2['phi_s_a'].astype(float)
    phi_s_c = df2['phi_s_c'].astype(float)
  
    # Interpolate Sullivan's phi_T values to AC wavelengths
    a_phi_t = interpolate.interp1d(wl_phi, phi_t, kind='linear', fill_value='extrapolate')(wl_a)
    c_phi_t = interpolate.interp1d(wl_phi, phi_t, kind='linear', fill_value='extrapolate')(wl_c)
    a_phi_s = interpolate.interp1d(wl_phi, phi_s_a, kind='linear', fill_value='extrapolate')(wl_a)
    c_phi_s = interpolate.interp1d(wl_phi, phi_s_c, kind='linear', fill_value='extrapolate')(wl_c)
        
    init = np.ones(len(wl_a))
    
    a_ts = a - init*[i*delta_t for i in a_phi_t] + init*[j*delta_s for j in a_phi_s]
    c_ts = c - init*[i*delta_t for i in c_phi_t] + init*[j*delta_s for j in c_phi_s]

    outputname = str(instrument[0]) + '_' + str(site_date[0]) + '_' + str(sample_type[0]) +'_'+ str(rep[0]) + '.csv'
    outputdir = '3_t_s_corrected/' + outputname
    output_df = pd.DataFrame([wl_a, a, c, a_ts, c_ts]).swapaxes(0,1)
    output_df.columns = ('wl', 'a_uncorr', 'c_uncorr', 'a_corrected', 'c_corrected')
    output_df.to_csv(outputdir, sep='\t')
    
    # Error propagation

    return


#### Insert the function inputs

In [21]:
# Make a list of the wavelengths, remember a = c
wl_a = dfcal_a['wl']  
wl_c = wl_a

# For just correcting the cal files
a = np.asarray(dfcal_a['a_mean']) 
c = np.asarray(dfcal_c['c_mean'])

##### UNDER DEVELOPMENT ######
# subtracts the dissolved sample from the particulate sample
# atot = sample_raw
# afil = sample_filtered
# apart = tot - fil

# ap = np.asarray(df_a_tot['a_mean'] - df_a_diss['a_mean']) 
# cp = np.asarray(df_c_tot['a_mean'] - df_c_diss['a_mean']) 
##################################

# parses the water temperature from the file name
T_cal = float(a_cal[24:28].replace('_', '.'))
    
# calculates the difference in temperature between the 
# sample and the calibration 
delta_t =  T_cal

# take the list of salinities associated with each sample
# and calculates the difference in salinity between the samples 
# and the clean water (cal) blanks
S_sample_first = S_cal[0]
S_cal_first = S_cal[0]
delta_s = S_sample_first - S_cal_first
print(delta_s)

# this change in temperature goes in the filename
dT = ("%.2f" % abs(0))


0.0


In [23]:
actempsalcorr(wl_a, wl_c, a, c, delta_t, delta_s)