### Importing libraries

In [1]:
### Import the required python libraries
import numpy as np
import pandas as pd
import matplotlib.pylab as plt
import glob
import os
import sys
import csv
from scipy import interpolate
from os import listdir
from os.path import isfile, join
import matplotlib.gridspec as gridspec
import inspect
from scipy import interpolate

In [2]:
from fopt_toolkit import fopt_toolkit as fp
for i in inspect.getmembers(fp, inspect.isfunction):
    print i[0]

get_id
make_dir
plotting_multiple_files


### Get dataframe with metadata information, including sample temperatures and summary data filepaths

In [3]:
metadata_updated = pd.read_csv('../Metadata/project_metadata_updated.csv',dtype={'Date': object, 'Rep':object}, skiprows=0, delimiter= '\t')

## For each wavelength for each file

** Blank-corrected a or c =  (mean_a_or_c_of_sample - mean_a_or_c_of_blank) **

** Temp-corrected a or c = Blank-corrected a or c - (Phi_temp * (Sample_temp - Blank_temp)) **

** Corrected = (sample_mean-blank_mean) - (phi_temp * (sample_temp-blank_temp)) **

## Blank correction

### Get blank values to use for blank correction

** Which samples to use for blank correction? The cleanest of them all! Based on graphs, these are they: **

In [4]:
cleanest_a = 'AC9_070718_sea_cal_a_3_T20_9'
cleanest_c = 'AC9_070718_sea_cal_c_3_T20_6'

** Get dataframes for these samples **

In [5]:
cleanest_a_filepath = list(metadata_updated.loc[metadata_updated.ID == cleanest_a, 'Summary_file_path'])[0]
cleanest_c_filepath = list(metadata_updated.loc[metadata_updated.ID == cleanest_c, 'Summary_file_path'])[0]


In [6]:
cleanest_a_df = pd.read_csv(cleanest_a_filepath,skiprows = 0, delimiter= '\t') 
cleanest_c_df = pd.read_csv(cleanest_c_filepath,skiprows = 0, delimiter= '\t') 

In [7]:
cleanmeans_a = list(cleanest_a_df['a_mean'])
cleanmeans_c = list(cleanest_c_df['c_mean'])

** Get temps for these samples **

In [8]:
cleana_temp = metadata_updated.loc[metadata_updated.ID==cleanest_a,'Temp'].item()
cleanc_temp = metadata_updated.loc[metadata_updated.ID==cleanest_c,'Temp'].item()

## Temperature correction values

In [9]:
sullivan_chart = pd.read_csv('../Metadata/sullivan_chart_ac9.csv', skiprows=0, delimiter= '\t')

In [10]:
phi_t = list(sullivan_chart['phi_t'])

In [11]:
phi_t

[0.0, 0.0, 0.0, 0.0001, 0.0, 0.0, 0.0, -0.0001, 0.00415]

## Get dictionary of sample IDs to file dataframes

** Corrected = (sample_mean-blank_mean) - (phi_temp * (sample_temp-blank_temp)) **

In [16]:
ID_to_df = {}
for index, row in metadata_updated.iterrows():
    ID, filepath, analysis_type, sample_type, temp = row['ID'], row['Summary_file_path'], row['Analysis_Type'], row['Sample_Type'], row['Temp']
    if sample_type != 'cal':
        df = pd.read_csv(filepath, skiprows = 0, delimiter= '\t')         
        df.reset_index(inplace=True, drop=False)
        df.rename(columns = {'c_mean':'sample_mean', 'a_mean':'sample_mean', 'c_std':'std','a_std':'std'}, inplace = True)
        df = df[['wl','sample_mean']]
        df['sample_temp'] = temp
        df['phi_t'] = phi_t
        if analysis_type == 'a':
            df['blank_mean'] = cleanmeans_a
            df['blank_temp'] = cleana_temp
        if analysis_type == 'c':
            df['blank_mean'] = cleanmeans_c
            df['blank_temp'] = cleanc_temp
        df['corrected_mean'] = (df['sample_mean'] - df['blank_mean']) - (df['phi_t'] * (df['sample_temp']-df['blank_temp']))
        ID_to_df[ID] = list(df['corrected_mean'])


In [17]:
ID_to_df['AC9_071718_y20_raw_a_1_T16_8']

[4.7648957513059997,
 2.9155578158509998,
 1.459975015068,
 1.128899108703,
 0.8707707825369998,
 0.68501706154700004,
 0.2947921268034,
 0.26159699683639998,
 0.19726364713720002]

In [None]:
ID_to_df['AC9_071718_y20_raw_a_1_T16_8']

In [None]:
for i in ID_to_df_absorption_a:
    print i

### Make directory for temperature-corrected summary stats

In [None]:
temperature_cor_2_filepath = fp.make_dir('Data/2_1_summary_stats_tempcor')
#figures_cleaned_2_filepath = fp.make_dir('Figures/Check_compared_reps')

In [None]:
metadata_updated.columns

### Subset metadata for just temperatures, file paths, and sample IDs for simpler handling

In [None]:
temp_data = metadata_updated[['Temp','Summary_file_path','ID']]

In [None]:
pd.read_csv('../Metadata/project_metadata_updated.csv', dtype={'Date': object, 'Rep':object}, skiprows=0, delimiter= '\t')

### Create a dictionary where keys are sample IDs and entries are pandas dataframes containing columns for wavelengths, mean absorption or attenuation, and the temperature at which the samplew was taken.

In [None]:
read_csv('output_list.txt', sep=" ", header=None)

In [None]:
file_to_df = {}
for index, row in temp_data.iterrows():
    Temp, File, ID = row['Temp'], row['Summary_file_path'], row['ID']
    df = pd.read_csv(File, skiprows = 0, delimiter= '\t')         
    df.reset_index(inplace=True, drop=False)
    df.rename(columns = {'c_mean':'mean', 'a_mean':'mean', 'c_std':'std','a_std':'std'}, inplace = True)
    df = df[['wl','mean']]
    df['Temp'] = Temp
    df['phi_t'] = list(sullivan_chart_ac9['phi_t'])
    file_to_df[ID] = df

In [None]:
sullivan_chart_ac9['phi_t']

### Get lists of sample ids for blanks ('cal') and non-blanks ('fil' or 'raw')

In [None]:
cal_ids = list(metadata_updated.loc[metadata_updated.Sample_Type=='cal','ID'])
non_cal_ids = list(metadata_updated.loc[metadata_updated.Sample_Type!='cal','ID'])

In [None]:
for i in file_to_df:
    print file_to_df[i]