In [7]:
import os                                            # open folders
import numpy as np
from matplotlib import pyplot as plt
import xarray as xr
import pandas as pd
import cmocean
import seawater as sw
import oceans
import csv
import gsw
import glob
import cartopy.crs as ccrs
import cartopy.feature
%matplotlib inline 

In [10]:
directory_with_data = '/Users/hannah/Documents/UW-PMEL/Research/SO_BGC_clusters_repo/data/01_raw/SOCCOM_bgc_argo_float_data/SOCCOM_LoResQC_LIAR_30Aug2020_netcdf/'

In [17]:
#path lists for files
list_of_paths = sorted(glob.glob(os.path.join(directory_with_data, '*.nc')))
len(list_of_paths)

11

In [18]:
# FUNCTIONS (don't change any of this)

# define a function that smooths using a boxcar filter (running mean)
def smooth(y, box_pts):
    box = np.ones(box_pts)/box_pts
    y_smooth = np.convolve(y, box, mode='same')
    return y_smooth


# interpolate the data onto the standard depth grid given by x_int
def interpolate(x_int, xvals, yvals):
    yvals_int = []
    for n in range(0, len(yvals)):
        yvals_int.append(np.interp(x_int, xvals[n, :], yvals[n, :]))
    # convert the interpolated data from a list to numpy array
    return np.asarray(yvals_int)


# calculate the vertically integrated data column inventory using the composite trapezoidal rule
def integrate(zi, data, depth_range):
    n_profs   = len(data)
    zi_start  = abs(zi - depth_range[0]).argmin()
    zi_end    = abs(zi - depth_range[1]).argmin()
    zi_struct = np.ones((n_profs, 1)) * zi[zi_start : zi_end]
    data      = data[:, zi_start : zi_end]
    col_inv   = []
    
    for n in range(0, len(data)):
        col_inv.append(np.trapz(data[n,:][~np.isnan(data[n,:])], zi_struct[n,:][~np.isnan(data[n,:])]))
    return col_inv


# define a function that gets rid of repeated values 
def delete_rep(data):
        vals, inverse, count = np.unique(data, return_inverse=True,
                              return_counts=True)

        idx_vals_repeated = np.where(count > 1)[0]
        vals_repeated = vals[idx_vals_repeated]

        rows, cols = np.where(inverse == idx_vals_repeated[:, np.newaxis])
        _, inverse_rows = np.unique(rows, return_index=True)
        res = np.split(cols, inverse_rows[1:]) #res gives the indices of the repeated values
    
        for n in range(len(res)): 
            data[res[n-1]]=np.nan #set the repeated values to nans
        return data

In [47]:
# function to read in QC flags and interpolate fields onto standard depth grid (don't change this unless you want to modify which variables get read in)
def interpolate_data(dataset, zi):
        n_profs = dataset.N_PROF.size
        sal     = dataset.Salinity[:, ::-1] 
        temp    = dataset.Temperature[:, ::-1] 
        pres    = dataset.Pressure[:, ::-1] 
        pco2    = dataset.pCO2_LIAR[:, ::-1] 
        dic     = dataset.DIC_LIAR[:, ::-1] 
        chl     = dataset.Chl_a[:, ::-1] 
        talk    = dataset.TALK_LIAR[:, ::-1] 
        pH      = dataset.pHinsitu[:, ::-1] 
        nitr    = dataset.Nitrate[:, ::-1] 
        oxy     = dataset.Oxygen[:, ::-1] 
        
        #set to NaN if QC flag is 4 or 8 (bad data)
        chl.values[ dataset.Chl_a_QFA[:, ::-1].values==4]=np.nan
        chl.values[ dataset.Chl_a_QFA[:, ::-1].values==8]=np.nan
        sal.values[ dataset.Salinity_QFA[:, ::-1].values==4]=np.nan
        sal.values[ dataset.Salinity_QFA[:, ::-1].values==8]=np.nan
        temp.values[dataset.Temperature_QFA[:, ::-1].values==4]=np.nan
        temp.values[dataset.Temperature_QFA[:, ::-1].values==8]=np.nan
        dic.values[ dataset.DIC_LIAR_QFA[:, ::-1].values==4]=np.nan
        dic.values[ dataset.DIC_LIAR_QFA[:, ::-1].values==8]=np.nan
        pco2.values[dataset.pCO2_LIAR_QFA[:, ::-1].values==4]=np.nan
        pco2.values[dataset.pCO2_LIAR_QFA[:, ::-1].values==8]=np.nan
        talk.values[dataset.TALK_LIAR_QFA[:, ::-1].values==4]=np.nan
        talk.values[dataset.TALK_LIAR_QFA[:, ::-1].values==8]=np.nan
        pH.values[  dataset.pHinsitu_QFA[:, ::-1].values==4]=np.nan
        pH.values[  dataset.pHinsitu_QFA[:, ::-1].values==8]=np.nan
        nitr.values[dataset.Nitrate_QFA[:, ::-1].values==4]=np.nan
        nitr.values[dataset.Nitrate_QFA[:, ::-1].values==8]=np.nan
        oxy.values[ dataset.Oxygen_QFA[:, ::-1].values==4]=np.nan
        oxy.values[ dataset.Oxygen_QFA[:, ::-1].values==8]=np.nan
        
        #interpolate onto uniform pressure grid from 0 to 1600 m with 5 m spacing 
        temp_int = interpolate(zi, pres, temp)
        sal_int  = interpolate(zi, pres, sal)
        pres_int = interpolate(zi, pres, pres)
        dic_int  = interpolate(zi, pres, dic)
        talk_int = interpolate(zi, pres, talk)
        pH_int   = interpolate(zi, pres, pH)
        nitr_int = interpolate(zi, pres, nitr)
        oxy_int  = interpolate(zi, pres, oxy)
        chl_int  = interpolate(zi, pres, chl)
        pco2_int = interpolate(zi, pres, pco2)
        
        return temp_int, sal_int, pres_int, pco2_int, dic_int, talk_int, pH_int, oxy_int, nitr_int, chl_int 

In [None]:
datasets = []
for example in examples:
    ds = create_an_xarray_dataset(example)
    datasets.append(ds)
combined = xarray.concat(datasets, dim='example')

In [48]:
lat_all  = []
lon_all  = []
juld_all = []
temp_all = []
sal_all  = []
pres_all = []
pco2_all = []
dic_all  = []
talk_all = []
pH_all   = []
oxy_all  = []
nitr_all = []
chl_all  = []

#read in the data from all floats using a loop 
for n in range(len(list_of_paths)): 
    fd = xr.open_dataset(list_of_paths[n])
    lat_all.append(fd.Lat.values)
    lon_all.append(fd.Lon.values)
    juld_all.append(fd.JULD.values)
    
    #Interpolate temperature onto uniform depth grid using the function from above
    zi = np.arange(0, 1605, 5)
    int_fd = interpolate_data(fd, zi)
    
    #save the interpolated fields from all the floats
    temp_all.append(int_fd[0][:])
    sal_all.append( int_fd[1][:])
    pres_all.append(int_fd[2][:])
    pco2_all.append(int_fd[3][:])
    dic_all.append( int_fd[4][:])
    talk_all.append(int_fd[5][:])
    pH_all.append(  int_fd[6][:])
    oxy_all.append( int_fd[7][:])
    nitr_all.append(int_fd[8][:])
    chl_all.append( int_fd[9][:])
    
    
# Corrected semi-Pseudocode for modified cell 48:
#read in the data from all floats using a loop
for n in range(len(list_of_paths)):
    fn_zint = list_of_paths[n].replace('.nc', '_zint.nc')
    if not os.path.exists(fn_zint):
        fd = xr.open_dataset(list_of_paths[n])
        intp_fd = interpolate_data(fd, zi)
        write_zint_nc(fn_zint, intp_fd) # this function needs to be written
   
    int_fd = xr.open_dataset(fn_zint)
    append int_fd to overall xarray # translate to actual Python     
    

#save as arrays
lat  = np.asarray(lat_all,  dtype=object)
lon  = np.asarray(lon_all,  dtype=object)
juld = np.asarray(juld_all, dtype=object)
temp = np.asarray(temp_all, dtype=object)
sal  = np.asarray(sal_all,  dtype=object)
pres = np.asarray(pres_all, dtype=object)
pco2 = np.asarray(pco2_all, dtype=object)
dic  = np.asarray(dic_all,  dtype=object)
talk = np.asarray(talk_all, dtype=object)
pH   = np.asarray(pH_all,   dtype=object)
oxy  = np.asarray(oxy_all,  dtype=object)
nitr = np.asarray(nitr_all, dtype=object)
chl  = np.asarray(chl_all,  dtype=object)

# xarray dataset:
combined = xr.concat(datasets, dim='pressure')

In [None]:
# Semi-Pseudocode for modified cell 48:

#read in the data from all floats using a loop
for n in range(len(list_of_paths)):
    fn_zint = list_of_paths[n].replace('.nc', '_zint.nc')
    if os.path.exists(fn_zint):
        int_fd = xr.open_dataset(fn_zint)
    else:
        fd = xr.open_dataset(list_of_paths[n])
        int_fd = interpolate_data(fd)
        write_zint_nc(fn_zint, int_fd) # this function needs to be written
    append int_fd to overall xarray # translate to actual Python   
    
# Corrected semi-Pseudocode for modified cell 48:

#read in the data from all floats using a loop
for n in range(len(list_of_paths)):
    fn_zint = list_of_paths[n].replace('.nc', '_zint.nc')
    if not os.path.exists(fn_zint):
        fd = xr.open_dataset(list_of_paths[n])
        intp_fd = interpolate_data(fd)
        write_zint_nc(fn_zint, intp_fd) # this function needs to be written
   
    int_fd = xr.open_dataset(fn_zint)
    append int_fd to overall xarray # translate to actual Python 
    

# after this, do .concat by profile -- should be easy!!
# ds=ds.contact()

In [49]:
#so lat[n] is a 1-D array containing the latitude values for the nth float from list_of_paths
lat[0].shape

(41,)

In [50]:
#so temp[n] is a 2-D array containing the temperature values for the nth float from list_of_paths interpolated onto a constant depth grid with 5 m spacing from the surface to 1600 m
temp[0].shape

(41, 320)

In [96]:
zi # this should actually only be within the function

array([   0,    5,   10,   15,   20,   25,   30,   35,   40,   45,   50,
         55,   60,   65,   70,   75,   80,   85,   90,   95,  100,  105,
        110,  115,  120,  125,  130,  135,  140,  145,  150,  155,  160,
        165,  170,  175,  180,  185,  190,  195,  200,  205,  210,  215,
        220,  225,  230,  235,  240,  245,  250,  255,  260,  265,  270,
        275,  280,  285,  290,  295,  300,  305,  310,  315,  320,  325,
        330,  335,  340,  345,  350,  355,  360,  365,  370,  375,  380,
        385,  390,  395,  400,  405,  410,  415,  420,  425,  430,  435,
        440,  445,  450,  455,  460,  465,  470,  475,  480,  485,  490,
        495,  500,  505,  510,  515,  520,  525,  530,  535,  540,  545,
        550,  555,  560,  565,  570,  575,  580,  585,  590,  595,  600,
        605,  610,  615,  620,  625,  630,  635,  640,  645,  650,  655,
        660,  665,  670,  675,  680,  685,  690,  695,  700,  705,  710,
        715,  720,  725,  730,  735,  740,  745,  7

In [94]:
list_of_paths[3]

'/Users/hannah/Documents/UW-PMEL/Research/SO_BGC_clusters_repo/data/01_raw/SOCCOM_bgc_argo_float_data/SOCCOM_LoResQC_LIAR_30Aug2020_netcdf/0509SOOCNQC.nc'

In [69]:
xr.open_dataset(list_of_paths[3])

In [146]:
fd = xr.open_dataset(list_of_paths[3])
lat_all.append(fd.Lat.values)
lon_all.append(fd.Lon.values)
juld_all.append(fd.JULD.values)

#Interpolate temperature onto uniform depth grid using the function from above
zi = np.arange(0, 1600, 5)
int_fd = interpolate_data(fd)

#save the interpolated fields from all the floats
temp_all.append(int_fd[0][:])
sal_all.append( int_fd[1][:])
pres_all.append(int_fd[2][:])
pco2_all.append(int_fd[3][:])
dic_all.append( int_fd[4][:])
talk_all.append(int_fd[5][:])
pH_all.append(  int_fd[6][:])
oxy_all.append( int_fd[7][:])
nitr_all.append(int_fd[8][:])
chl_all.append( int_fd[9][:])

In [141]:
os.path.basename(list_of_paths[4]).replace('.nc', '')

'0510SOOCNQC'

In [132]:
fd.Lat.values

(155,)

In [147]:
temp_test = int_fd[0][:]
sal_test = int_fd[1][:]
pres_test = int_fd[2][:]
pco2_test = int_fd[3][:]
dic_test = int_fd[4][:]
talk_test = int_fd[5][:]
pH_test = int_fd[6][:]
oxy_test = int_fd[7][:]
nitr_test = int_fd[8][:]
chl_test = int_fd[9][:]

In [124]:
temp_test.shape

(155, 320)

In [54]:
int_fd[2]

array([[  nan,    5.,   10., ..., 1585., 1590., 1595.],
       [  nan,   nan,   10., ..., 1585., 1590., 1595.],
       [  nan,    5.,   10., ..., 1585., 1590., 1595.],
       ...,
       [  nan,    5.,   10., ..., 1585., 1590., 1595.],
       [  nan,    5.,   10., ..., 1585., 1590., 1595.],
       [  nan,    5.,   10., ..., 1585., 1590., 1595.]])

In [62]:
int_fd[2][0]

array([  nan,    5.,   10.,   15.,   20.,   25.,   30.,   35.,   40.,
         45.,   50.,   55.,   60.,   65.,   70.,   75.,   80.,   85.,
         90.,   95.,  100.,  105.,  110.,  115.,  120.,  125.,  130.,
        135.,  140.,  145.,  150.,  155.,  160.,  165.,  170.,  175.,
        180.,  185.,  190.,  195.,  200.,  205.,  210.,  215.,  220.,
        225.,  230.,  235.,  240.,  245.,  250.,  255.,  260.,  265.,
        270.,  275.,  280.,  285.,  290.,  295.,  300.,  305.,  310.,
        315.,  320.,  325.,  330.,  335.,  340.,  345.,  350.,  355.,
        360.,  365.,  370.,  375.,  380.,  385.,  390.,  395.,  400.,
        405.,  410.,  415.,  420.,  425.,  430.,  435.,  440.,  445.,
        450.,  455.,  460.,  465.,  470.,  475.,  480.,  485.,  490.,
        495.,  500.,  505.,  510.,  515.,  520.,  525.,  530.,  535.,
        540.,  545.,  550.,  555.,  560.,  565.,  570.,  575.,  580.,
        585.,  590.,  595.,  600.,  605.,  610.,  615.,  620.,  625.,
        630.,  635.,

In [63]:
int_fd[9].shape

(145, 320)

In [64]:
int_fd[9][0]

array([       nan,        nan,        nan,        nan,        nan,
              nan,        nan,        nan,        nan,        nan,
       0.3913    , 0.414     , 0.457     , 0.4408    , 0.42232381,
       0.38675   , 0.3721    , 0.4236    , 0.3832    , 0.40085   ,
       0.4671    , 0.3195    , 0.095     , 0.04195   , 0.0455    ,
       0.02377619, 0.0202    , 0.02025   , 0.0172    , 0.01315   ,
       0.0132    , 0.0162    , 0.0121    , 0.0177    , 0.0152    ,
       0.0137    , 0.01000476, 0.0096    , 0.0091    , 0.0106    ,
       0.0081    , 0.0066    , 0.0081    , 0.0086    , 0.0081    ,
       0.0096    , 0.0081    , 0.0086    , 0.0081    , 0.0091    ,
       0.0111    , 0.0091    , 0.0091    , 0.0076    , 0.0091    ,
       0.0071    , 0.0071    , 0.0081    , 0.0071    , 0.0066    ,
       0.0061    , 0.0076    , 0.0051    , 0.0076    , 0.0081    ,
       0.0086    , 0.0081    , 0.0066    , 0.0061    , 0.0051    ,
       0.0132    , 0.0076    , 0.0061    , 0.0046    , 0.0071 

In [101]:
temp_da = xr.DataArray(
    data=temp_test,
    dims=['profile','pressure'],
    coords=dict(
        pressure=zi,
    ),
    attrs=dict(
        description='Temperature',
        units="degC"))
temp_da

In [102]:
sal_da = xr.DataArray(
    data=sal_test,
    dims=['profile','pressure'],
    coords=dict(
        pressure=zi,
    ),
    attrs=dict(
        description='Salinity',
        units="PSU?"))
sal_da

In [88]:
pres_da = xr.DataArray(
    data=pres_test,
    dims=['n_prof','n_levels'],
    attrs=dict(
        description='Pressure',
        units="temp"))
pres_da

In [95]:
pres_test[-4]

array([  nan,    5.,   10.,   15.,   20.,   25.,   30.,   35.,   40.,
         45.,   50.,   55.,   60.,   65.,   70.,   75.,   80.,   85.,
         90.,   95.,  100.,  105.,  110.,  115.,  120.,  125.,  130.,
        135.,  140.,  145.,  150.,  155.,  160.,  165.,  170.,  175.,
        180.,  185.,  190.,  195.,  200.,  205.,  210.,  215.,  220.,
        225.,  230.,  235.,  240.,  245.,  250.,  255.,  260.,  265.,
        270.,  275.,  280.,  285.,  290.,  295.,  300.,  305.,  310.,
        315.,  320.,  325.,  330.,  335.,  340.,  345.,  350.,  355.,
        360.,  365.,  370.,  375.,  380.,  385.,  390.,  395.,  400.,
        405.,  410.,  415.,  420.,  425.,  430.,  435.,  440.,  445.,
        450.,  455.,  460.,  465.,  470.,  475.,  480.,  485.,  490.,
        495.,  500.,  505.,  510.,  515.,  520.,  525.,  530.,  535.,
        540.,  545.,  550.,  555.,  560.,  565.,  570.,  575.,  580.,
        585.,  590.,  595.,  600.,  605.,  610.,  615.,  620.,  625.,
        630.,  635.,

In [84]:
np.arange(0, 1600, 5).shape

(320,)

In [None]:
datasets = []
for example in examples:
    ds = create_an_xarray_dataset(example)
    datasets.append(ds)
combined = xarray.concat(datasets, dim='example')

In [134]:
xr.DataArray(data = fd.Lat.values, dims=['profile'])

In [151]:
temp_da = xr.DataArray(data = temp_test, dims=['profile','pressure'], coords=dict(pressure=zi))
sal_da  = xr.DataArray(data =  sal_test, dims=['profile','pressure'], coords=dict(pressure=zi))
pco2_da = xr.DataArray(data = pco2_test, dims=['profile','pressure'], coords=dict(pressure=zi))
dic_da  = xr.DataArray(data =  dic_test, dims=['profile','pressure'], coords=dict(pressure=zi))
talk_da = xr.DataArray(data = talk_test, dims=['profile','pressure'], coords=dict(pressure=zi))
pH_da   = xr.DataArray(data =   pH_test, dims=['profile','pressure'], coords=dict(pressure=zi))
oxy_da  = xr.DataArray(data =  oxy_test, dims=['profile','pressure'], coords=dict(pressure=zi))
nitr_da = xr.DataArray(data = nitr_test, dims=['profile','pressure'], coords=dict(pressure=zi))
chl_da  = xr.DataArray(data =  chl_test, dims=['profile','pressure'], coords=dict(pressure=zi))

latitude_da  = xr.DataArray(data = fd.Lat.values,  dims=['profile'])
longitude_da = xr.DataArray(data = fd.Lon.values,  dims=['profile'])
juld_da      = xr.DataArray(data = fd.JULD.values, dims=['profile'])

floatnum = os.path.basename(list_of_paths[3]).replace('.nc', '')
floatnum_da = xr.DataArray(data = np.repeat(floatnum, len(fd.JULD.values)),  dims=['profile']) # this could be done better

datasets = temp_da.to_dataset(name  = 'temperature')
datasets = datasets.assign(latitude  = latitude_da)
datasets = datasets.assign(longitude = longitude_da)
datasets = datasets.assign(juld = juld_da)
datasets = datasets.assign(floatnum = floatnum_da)
datasets = datasets.assign(salinity = sal_da)
datasets = datasets.assign(pco2     = pco2_da)
datasets = datasets.assign(dic      = dic_da)
datasets = datasets.assign(talk     = talk_da)
datasets = datasets.assign(pH       = pH_da)
datasets = datasets.assign(oxygen   = oxy_da)
datasets = datasets.assign(nitrate  = nitr_da)
datasets = datasets.assign(chl      = chl_da)

datasets

In [145]:
temp_da2 = xr.DataArray(data = temp_test, dims=['profile','pressure'], coords=dict(pressure=zi))
sal_da2  = xr.DataArray(data =  sal_test, dims=['profile','pressure'], coords=dict(pressure=zi))
pco2_da2 = xr.DataArray(data = pco2_test, dims=['profile','pressure'], coords=dict(pressure=zi))
dic_da2  = xr.DataArray(data =  dic_test, dims=['profile','pressure'], coords=dict(pressure=zi))
talk_da2 = xr.DataArray(data = talk_test, dims=['profile','pressure'], coords=dict(pressure=zi))
pH_da2   = xr.DataArray(data =   pH_test, dims=['profile','pressure'], coords=dict(pressure=zi))
oxy_da2  = xr.DataArray(data =  oxy_test, dims=['profile','pressure'], coords=dict(pressure=zi))
nitr_da2 = xr.DataArray(data = nitr_test, dims=['profile','pressure'], coords=dict(pressure=zi))
chl_da2  = xr.DataArray(data =  chl_test, dims=['profile','pressure'], coords=dict(pressure=zi))

latitude_da2  = xr.DataArray(data = fd.Lat.values,  dims=['profile'])
longitude_da2 = xr.DataArray(data = fd.Lon.values,  dims=['profile'])
juld_da2      = xr.DataArray(data = fd.JULD.values, dims=['profile'])

floatnum2 = os.path.basename(list_of_paths[4]).replace('.nc', '')
floatnum_da2 = xr.DataArray(data = np.repeat(floatnum2, len(fd.JULD.values)),  dims=['profile']) # this could be done better

### STOPPED HERE
# figure out how to get floatnum into xrray
# next, figure out how to combine datasets
# then build this into the function


datasets2 = temp_da2.to_dataset(name  = 'temperature')
datasets2 = datasets2.assign(latitude  = latitude_da2)
datasets2 = datasets2.assign(longitude = longitude_da2)
datasets2 = datasets2.assign(juld = juld_da2)
datasets2 = datasets2.assign(floatnum = floatnum_da2)
datasets2 = datasets2.assign(salinity = sal_da2)
datasets2 = datasets2.assign(pco2     = pco2_da2)
datasets2 = datasets2.assign(dic      = dic_da2)
datasets2 = datasets2.assign(talk     = talk_da2)
datasets2 = datasets2.assign(pH       = pH_da2)
datasets2 = datasets2.assign(oxygen   = oxy_da2)
datasets2 = datasets2.assign(nitrate  = nitr_da2)
datasets2 = datasets2.assign(chl      = chl_da2)

datasets2

In [138]:
fd.Lat.values.shape

(155,)

In [142]:
len(fd.JULD.values)

155

In [144]:
np.repeat('xyz', 5)

array(['xyz', 'xyz', 'xyz', 'xyz', 'xyz'], dtype='<U3')

In [152]:
ds=xr.concat([datasets, datasets2], dim='profile')
ds

In [118]:
sal_da

In [119]:
temp_da

In [89]:
xr.concat([pres_da.isel(n_levels=1), temp_da.isel(n_levels=1)], 'n_levels')

In [None]:
test_pddataframe

In [None]:
test_xrdataset = test_pddataframe.to_xarray()

In [66]:
temp_test

array([[        nan, -1.383     , -1.385     , ...,  0.7059072 ,
         0.7038272 ,  0.7017472 ],
       [        nan,         nan, -0.86104762, ...,  0.71660845,
         0.71421408,  0.71181972],
       [        nan, -1.76447059, -1.744     , ...,  0.74196761,
         0.74034789,  0.73872817],
       ...,
       [        nan, -1.055     , -1.056     , ...,  1.01112225,
         1.00904324,  1.00696424],
       [        nan, -1.49094737, -1.481     , ...,  0.99385211,
         0.99145775,  0.98906338],
       [        nan, -1.773     , -1.77216667, ...,  1.01465494,
         1.01242174,  1.01018854]])

In [75]:
temp_test.shape

(145, 320)

# Notes

1. Figure out how to get all arrays together into xarray
3. Pick out which parameters I want included. Options:
'Cruise',
 'Station',
 'Lon',
 'Lat',
 'Lat_QF',
 'Lat_QFA',
 'Pressure',
 'Pressure_QF',
 'Pressure_QFA',
 'Temperature',
 'Temperature_QF',
 'Temperature_QFA',
 'Salinity',
 'Salinity_QF',
 'Salinity_QFA',
 'Sigma_theta',
 'Sigma_theta_QF',
 'Sigma_theta_QFA',
 'Depth',
 'Depth_QF',
 'Depth_QFA',
 'Oxygen',
 'Oxygen_QF',
 'Oxygen_QFA',
 'OxygenSat',
 'OxygenSat_QF',
 'OxygenSat_QFA',
 'Nitrate',
 'Nitrate_QF',
 'Nitrate_QFA',
 'Chl_a',
 'Chl_a_QF',
 'Chl_a_QFA',
 'Chl_a_corr',
 'Chl_a_corr_QF',
 'Chl_a_corr_QFA',
 'b_bp700',
 'b_bp700_QF',
 'b_bp700_QFA',
 'b_bp_corr',
 'b_bp_corr_QF',
 'b_bp_corr_QFA',
 'POC',
 'POC_QF',
 'POC_QFA',
 'pHinsitu',
 'pHinsitu_QF',
 'pHinsitu_QFA',
 'pH25C',
 'pH25C_QF',
 'pH25C_QFA',
 'TALK_LIAR',
 'TALK_LIAR_QF',
 'TALK_LIAR_QFA',
 'DIC_LIAR',
 'DIC_LIAR_QF',
 'DIC_LIAR_QFA',
 'pCO2_LIAR',
 'pCO2_LIAR_QF',
 'pCO2_LIAR_QFA',
 'b_bp532',
 'b_bp532_QF',
 'b_bp532_QFA',
 'CDOM',
 'CDOM_QF',
 'CDOM_QFA',
 'Type',
 'mon_day_yr',
 'hh_mm',
 'Parameters',
 'JULD',
 'REFERENCE_DATE_TIME'
2. See if I can get code to work for new data
3. Change bottom depth to 1600


# Questions

1. Why is zi to 1600 m?


# Notes from Harmut

The pedestrian way would be to have function interpolate_data write out a new netcdf file with all variables on the same zi grid.
Then you could read in those netcdf files instead with xr.open_dataset and get everything into xarray form.
There should be an easy way to append one xr dataset to another, right?

Do you know how to write nc files or should I do it?

The side benefit of this approach is that you have to do the depth interpolation only once.
Then you can simply check if the zint.nc version is there instead of having to interpolate again.



In [53]:
temp_int

NameError: name 'temp_int' is not defined

In [35]:
list(fd.keys())

['Cruise',
 'Station',
 'Lon',
 'Lat',
 'Lat_QF',
 'Lat_QFA',
 'Pressure',
 'Pressure_QF',
 'Pressure_QFA',
 'Temperature',
 'Temperature_QF',
 'Temperature_QFA',
 'Salinity',
 'Salinity_QF',
 'Salinity_QFA',
 'Sigma_theta',
 'Sigma_theta_QF',
 'Sigma_theta_QFA',
 'Depth',
 'Depth_QF',
 'Depth_QFA',
 'Oxygen',
 'Oxygen_QF',
 'Oxygen_QFA',
 'OxygenSat',
 'OxygenSat_QF',
 'OxygenSat_QFA',
 'Nitrate',
 'Nitrate_QF',
 'Nitrate_QFA',
 'Chl_a',
 'Chl_a_QF',
 'Chl_a_QFA',
 'Chl_a_corr',
 'Chl_a_corr_QF',
 'Chl_a_corr_QFA',
 'b_bp700',
 'b_bp700_QF',
 'b_bp700_QFA',
 'b_bp_corr',
 'b_bp_corr_QF',
 'b_bp_corr_QFA',
 'POC',
 'POC_QF',
 'POC_QFA',
 'pHinsitu',
 'pHinsitu_QF',
 'pHinsitu_QFA',
 'pH25C',
 'pH25C_QF',
 'pH25C_QFA',
 'TALK_LIAR',
 'TALK_LIAR_QF',
 'TALK_LIAR_QFA',
 'DIC_LIAR',
 'DIC_LIAR_QF',
 'DIC_LIAR_QFA',
 'pCO2_LIAR',
 'pCO2_LIAR_QF',
 'pCO2_LIAR_QFA',
 'b_bp532',
 'b_bp532_QF',
 'b_bp532_QFA',
 'CDOM',
 'CDOM_QF',
 'CDOM_QFA',
 'Type',
 'mon_day_yr',
 'hh_mm',
 'Parameters',
 '

In [161]:
fd.Temperature

In [166]:
test = fd.Temperature
test

In [164]:
test[:,:] = np.NaN

In [168]:
test[:, ::-1] =np.NaN
test.rename('test')

In [155]:
fd.Lat.values

array([-56.946, -56.918, -57.001, -57.074, -57.167, -57.193, -57.026,
       -56.883, -56.819, -56.736, -56.53 , -56.323, -56.474, -55.665,
       -54.633, -54.241,     nan,     nan,     nan,     nan])

In [158]:
fd.Chl_a_QFA[:, ::-1].values in [4,8].any()

AttributeError: 'list' object has no attribute 'any'

In [160]:
fd.Chl_a.values[ fd.Chl_a_QFA[:, ::-1].values in [4,8]]=np.nan


ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

In [None]:
fd.Chl_a.values[ fd.Chl_a_QFA[:, ::-1].values==8]=np.nan