## Script summary

This script (1) bins CTD data by depth for each cast and (2) takes an average CTD profile for each station. Data used to take mean value per depth increment are those within 0.5 m of the given depth (i.e., 6 m bin is the average of all data from 5.5 to 6.5 m). 

In [1]:
# This script was written using Python 3.7

import pandas as pd # written using v. 0.24.2
import numpy as np # written using v. 1.16.2
import math # written using v. 1.1.0
import matplotlib.pyplot as plt # written using v. 3.1.1
import scipy.stats as ss # written using v. 1.3.2
import cmocean # written using v. 2.0

In [11]:
# read in compiled CTD data

infile = '..\Data\Sharp16_compiledCTDdata_downcasts.csv'
df = pd.read_csv(infile)

# drop "test" cast
df = df[df.Cast > 1]

df.columns

Index(['Cast', 'Cond_Sm', 'Cond2_Sm', 'Dens_kgm3', 'SigmaTheta_kgm3',
       'Sigmat_kgm3', 'Dens2_kgm3', 'SigmaTheta2_kgm3', 'Sigmat2_kgm3',
       'Depth_m', 'Fluor_mgm3', 'Lat', 'Lon', 'DO_umolkg', 'DO_uM',
       'PotTemp_C', 'PotTemp2_C', 'Press_db', 'Sal', 'Sal2', 'SeafloorDepth_m',
       'Temp_C', 'Temp2_C'],
      dtype='object')

In [12]:
# read in cast list

infile = '..\Data\Sharp16_StaLatLon.csv'
meta = pd.read_csv(infile)
meta.columns

Index(['Station', 'Cast', 'Bottle Number', 'Lat', 'Lon'], dtype='object')

In [13]:
# for seafloor depth
# model-derived seafloor depth used because CTD seafloor depth appears highly inconsistent

infile = "../Calculations/ETOPO1_elevation_m.csv"
sf = pd.read_csv(infile)
sf.columns

Index(['lat', 'lon', 'value'], dtype='object')

## 1. Bin data by depth for each CTD cast

In [14]:
# extract mean values every x meters for each cast

castlist = df.Cast.unique()

OutFileName = "../Calculations/Sharp16_BinnedCTDData.csv"
OutFile = open(OutFileName, 'w')

# Write header line to new file
OutFile.write("Sta,Cast,Depth_m,Lat,Lon,Sal,Temp_C,Chl_mgm3,DO_umolkg,Sigmat_kgm3,SeafloorDepth_m\n")

for i in castlist: 
    sta = meta.Station[meta.Cast == i]
    stalat = np.round(meta.Lat[meta.Cast == i], decimals = 1).item()
    stalon = np.round(meta.Lon[meta.Cast == i], decimals = 1).item()

    sfz = np.mean(sf.value[(np.round(sf.lat, decimals = 1) == stalat) & (np.round(sf.lon, decimals = 1) == stalon)])

    inc = 2 # define depth increment to bin by (m)
    depthlist = np.arange(0,np.round(np.max(df.Depth_m[df.Cast == i]), decimals = 0) + inc, inc)
    
    for j in depthlist:
        loc = df[(df.Cast == i) & (np.round(df.Depth_m, decimals = 0) == j)]
        sal = np.mean((loc.Sal + loc.Sal2) / 2)
        temp = np.mean((loc.Temp_C + loc.Temp2_C) / 2)
        fluor = np.mean(loc.Fluor_mgm3)
        lat = np.mean(loc.Lat)
        lon= np.mean(loc.Lon)
        do = np.mean(loc.DO_umolkg)
#         sfz = np.mean(loc.SeafloorDepth_m)
        sigma = np.mean((loc.Sigmat_kgm3 + loc.Sigmat2_kgm3) / 2)
        
        OutputString = "%i,%i,%i,%f,%f,%f,%f,%f,%f,%f,%f" % (sta,i,j,lat,lon,sal,temp,fluor,do,sigma,sfz)
        OutFile.write(OutputString + "\n")

OutFile.close()

pd.read_csv(OutFileName).head()

Unnamed: 0,Sta,Cast,Depth_m,Lat,Lon,Sal,Temp_C,Chl_mgm3,DO_umolkg,Sigmat_kgm3,SeafloorDepth_m
0,1,2,0,37.668159,-73.998949,33.78165,26.681107,0.54083,209.27337,21.901372,-1260.857143
1,1,2,2,37.668076,-73.999029,33.426223,26.675904,0.570048,201.899284,21.635594,-1260.857143
2,1,2,4,37.66818,-73.998935,33.78523,26.681329,0.543282,209.82975,21.903988,-1260.857143
3,1,2,6,37.668195,-73.99892,33.790812,26.692741,0.553111,209.346341,21.904593,-1260.857143
4,1,2,8,37.668213,-73.998911,33.794196,26.698927,0.55206,209.540562,21.905189,-1260.857143


## 2. Take average CTD profile for each station

In [8]:
# average profiles per station

infile = "../Calculations/Sharp16_BinnedCTDData.csv"
binned = pd.read_csv(infile)

OutFileName = "../Calculations/Sharp16_BinnedCTDData_bySta.csv"
OutFile = open(OutFileName, 'w')

# Write header line to new file
OutFile.write("Sta,Depth_m,Lat,Lon,Sal,Temp_C,Chl_mgm3,DO_umolkg,Sigmat_kgm3,SeafloorDepth_m\n")

for i in binned.Sta.unique():
    for j in binned.Depth_m[binned.Sta == i].unique():
        lat = np.mean(binned.Lat[(binned.Sta == i) & (binned.Depth_m == j)])
        lon = np.mean(binned.Lon[(binned.Sta == i) & (binned.Depth_m == j)])
        sal = np.mean(binned.Sal[(binned.Sta == i) & (binned.Depth_m == j)])
        temp = np.mean(binned.Temp_C[(binned.Sta == i) & (binned.Depth_m == j)])
        fluor = np.mean(binned.Chl_mgm3[(binned.Sta == i) & (binned.Depth_m == j)])
        do = np.mean(binned.DO_umolkg[(binned.Sta == i) & (binned.Depth_m == j)])
        sigma = np.mean(binned.Sigmat_kgm3[(binned.Sta == i) & (binned.Depth_m == j)])
        sfz = np.mean(binned.SeafloorDepth_m[(binned.Sta == i) & (binned.Depth_m == j)])

        OutputString = "%i,%i,%f,%f,%f,%f,%f,%f,%f,%f" % (i,j,lat,lon,sal,temp,fluor,do,sigma,sfz)
        OutFile.write(OutputString + "\n")

OutFile.close()

pd.read_csv(OutFileName).head()

Unnamed: 0,Sta,Depth_m,Lat,Lon,Sal,Temp_C,Chl_mgm3,DO_umolkg,Sigmat_kgm3,SeafloorDepth_m
0,1,0,37.668159,-73.998949,33.78165,26.681107,0.54083,209.27337,21.901372,-1260.857143
1,1,2,37.667418,-73.995269,33.312039,26.708754,0.5683,200.930443,21.53939,-1260.857143
2,1,4,37.66746,-73.995188,33.802502,26.715632,0.537279,208.972659,21.906166,-1260.857143
3,1,6,37.667468,-73.99517,33.806943,26.726576,0.564704,209.137942,21.906058,-1260.857143
4,1,8,37.667477,-73.995165,33.813523,26.737001,0.552924,209.213988,21.907716,-1260.857143
