In [1]:
# import your standard packages
%run ../../../global_tools/start.py

# import your local functions
sys.path.insert(1, '../')
from local_functions import *

# make sure the figures plot inline rather than at the end
%matplotlib inline

# concatenates all profiles into one file with no_profiles x depth (up to 2000m)
# interpolates to standard depth levels as defined on Noaa: https://www.ncei.noaa.gov/access/world-ocean-database-select/depth_definition.html
# linear interp chosen based on ease and https://aslopubs.onlinelibrary.wiley.com/doi/abs/10.4319/lom.2013.11.213
# import xarray as xr
# import numpy as np
# import pandas as pd
# from scipy.interpolate import interp1d
# import os
# import glob
# from tqdm import tqdm

Default libraries loaded.


In [2]:
varlist = ['OSD','CTD','DRB','MRB','PFL','XBT','MBT','UOR','APB','GLD']
DEPTH = np.array([0,5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100,
                 125,150,175,200,225,250,275,300,325,350,375,400,425,450,475,500,
                 550,600,650,700,750,800,850,900,1000,1050,1100,1150,1200,1250,1300,
                 1350,1400,1450,1500,1550,1600,1650,1700,1750,1800,1850,1900,1950,2000])

In [3]:
inpath = '/tigress/GEOCLIM/LRGROUP/shared_data/WOD/test/'

lat = []
lon = []
t = []
cast_id = []

for var in varlist:
    # get all the datafiles
    fns = glob.glob(inpath + var + '/ocldb*.nc')
    for fn in fns:
        ds_in = xr.open_dataset(fn)
        lat.extend(ds_in.lat)
        lon.extend(ds_in.lon)
        t.extend(np.array(ds_in.time))
        cast_id.extend([var + str(i) for i in np.array(ds_in.cast) ])

no_profs = len(cast_id)

In [5]:
# initialize vars
lat = []
lon = []
t = []
cast_id = []
TEMP = np.full([no_profs,len(DEPTH)],np.nan)
DOXY = np.full([no_profs,len(DEPTH)],np.nan)
SAL = np.full([no_profs,len(DEPTH)],np.nan)
NITRATE = np.full([no_profs,len(DEPTH)],np.nan)
PHOSPHATE = np.full([no_profs,len(DEPTH)],np.nan)

# loop through all datatypes APB, CTD, DRB, MRB, OSD, PFL, XBT
pr = 0 # set profile counter to 0 and add 1 for each loop
for vv, var in enumerate(varlist):

    print('Processing ',var, flush=True)
    

    fns = sorted(glob.glob(inpath + '*' + var + '/wod*.nc'))
        
    # loop through all files of the same dataset
    for fn in tqdm(fns):
#     for ff in tqdm(range(10)):
#         fn = fns[ff]

        # read in file
        ds_in = xr.open_dataset(fn)
        
        lat.extend([float(ds_in.lat)])
        lon.extend([float(ds_in.lon)])
        t.extend([np.array(ds_in.time)])
        cast_id.append(var + '_' + str(int(ds_in.wod_unique_cast)))

        if "Temperature" in ds_in:

            #remove 'bad data'
            temp = ds_in.Temperature[ds_in.Temperature_WODflag == 0]
            z = ds_in.z[ds_in.Temperature_WODflag == 0]

            if temp.shape[0] > 3:
                
                z_int = DEPTH[(DEPTH <= float(z.max())) & (DEPTH >= float(z.min()))]
                
                # linearly interpolate to standard levels
                f = interp1d(z, temp)
                temp_int = f(z_int)

                # find indicies of DEPTH to fill
                z_ind = [i for i, val in enumerate(DEPTH) if val in set(np.array(z_int))]

                TEMP[pr,z_ind] = temp_int
                
        if "Salinity" in ds_in:

            #remove 'bad data'
            sal = ds_in.Salinity[ds_in.Salinity_WODflag == 0]
            z = ds_in.z[ds_in.Salinity_WODflag == 0]
            
            if sal.shape[0] > 3:
                
                z_int = DEPTH[(DEPTH <= float(z.max())) & (DEPTH >= float(z.min()))]
                
                # linearly interpolate to standard levels
                f = interp1d(z, sal)
                sal_int = f(z_int)

                # find indicies of DEPTH to fill
                z_ind = [i for i, val in enumerate(DEPTH) if val in set(np.array(z_int))]

                SAL[pr,z_ind] = sal_int

        if "Oxygen" in ds_in:

            #remove 'bad data'
            doxy = ds_in.Oxygen[ds_in.Oxygen_WODflag == 0]
            z = ds_in.z[ds_in.Oxygen_WODflag == 0]

            if doxy.shape[0] > 3:
                
                z_int = DEPTH[(DEPTH <= float(z.max())) & (DEPTH >= float(z.min()))]
                
                # linearly interpolate to standard levels
                f = interp1d(z, doxy)
                doxy_int = f(z_int)

                # find indicies of DEPTH to fill
                z_ind = [i for i, val in enumerate(DEPTH) if val in set(np.array(z_int))]

                DOXY[pr,z_ind] = doxy_int
                
        if "Nitrate" in ds_in:

            #remove 'bad data'
            nitrate = ds_in.Nitrate[ds_in.Nitrate_WODflag == 0]
            z = ds_in.z[ds_in.Nitrate_WODflag == 0]

            if nitrate.shape[0] > 3:
                
                z_int = DEPTH[(DEPTH <= float(z.max())) & (DEPTH >= float(z.min()))]
                
                # linearly interpolate to standard levels
                f = interp1d(z, nitrate)
                nitrate_int = f(z_int)

                # find indicies of DEPTH to fill
                z_ind = [i for i, val in enumerate(DEPTH) if val in set(np.array(z_int))]

                NITRATE[pr,z_ind] = nitrate_int
                
        if "Phosphate" in ds_in:

            #remove 'bad data'
            phosphate = ds_in.Phosphate[ds_in.Phosphate_WODflag == 0]
            z = ds_in.z[ds_in.Phosphate_WODflag == 0]

            if phosphate.shape[0] > 3:
                
                z_int = DEPTH[(DEPTH <= float(z.max())) & (DEPTH >= float(z.min()))]
                
                # linearly interpolate to standard levels
                f = interp1d(z, phosphate)
                phosphate_int = f(z_int)

                # find indicies of DEPTH to fill
                z_ind = [i for i, val in enumerate(DEPTH) if val in set(np.array(z_int))]

                PHOSPHATE[pr,z_ind] = phosphate_int
 
        pr = pr +1
        

Processing  OSD


 13%|█▎        | 75/556 [00:01<00:12, 40.04it/s]


ValueError: numpy.nanmax raises on a.size==0 and axis=None; So Bottleneck too.

# Add to Dataset

In [None]:
# add to dataset

#------------------------------ profiles -------------------------------------#
coords = [no_profs,DEPTH]
dims = ['no. profs', 'depth']

ds_out['temp'] = xr.DataArray(TEMP,dims = dims,coords = coords)
ds_out['sal'] = xr.DataArray(SAL,dims = dims,coords = coords)
ds_out['doxy'] = xr.DataArray(DOXY,dims = dims,coords = coords)
ds_out['nitrate'] = xr.DataArray(NITRATE,dims = dims,coords = coords)
ds_out['phosphate'] = xr.DataArray(PHOSPHATE,dims = dims,coords = coords)

# Sort in Time

# Subset for Indian Ocean

In [None]:
# def rr_interp(prof, obs_z, target_z):
#     '''See FAQ # 6 https://www.nodc.noaa.gov/OC5/WOD/wod-woa-faqs.html
#      For less than 3 points = linear interpolation
#      For 3 points = lagrangian interpolation
#      for 4 points or larger = Reiniger-Ross interpolation
     
#      https://waves-vagues.dfo-mpo.gc.ca/Library/53761.pdf
#      https://github.com/dankelley/oce/blob/develop/src/oce_approx.cpp
#      https://rda.ucar.edu/datasets/ds285.0/docs/WOD18-UsersManual_final.pdf
     
#     '''
    
#     # remove nans
#     y = prof(~np.isnan(prof))
#     x = obs_z(~np.isnan(prof))
    
#     if y.shape < 3: # linear
        
#     elif y.shape < 4: # lagrangian
        
#     else: # RR 
        