# Import SOCCOM BGC-Argo floats

Using package ArgoPY to import as xarray dataset

*Note, this only needs to be run if the dataset hasn't already been downloaded*

In [1]:
import os                                            # open folders
import re                                            # regex to keep only numeric values in filename string
import pandas as pd                                  # for converting datetime
import numpy  as np                                  # for linspec
from   datetime              import date             # for saving figures with today's date
import gsw                                           # to calculate depth from pressure

# To get Argo data
from argopy import DataFetcher as ArgoDataFetcher
argo_loader = ArgoDataFetcher()

## Select WMOs corresponding to SOCCOM BGC-Argo floats

This is done by selected file names already downloaded in one of my folders, but could be improved to be more usable for others.

In [2]:
directory_with_data  = '/Users/hannah/Documents/UW-PMEL/Research/so_co2_flux_repo/data/01_raw/SOCCOM_bgc_argo_float_data/SOCCOM_HiResQC_LIAR_21Dec2021_netcdf/'
save_directory       = '/Users/hannah/Documents/UW-PMEL/Research/so_co2_flux_repo/data/02_intermediate/SOCCOM_bgc_argo_float_data/'  
directory            = os.fsencode(directory_with_data)


# # this set has questionaly good data
# questionably_good = np.array([5904677, 5905991, 5905635, 5905375, 5905378, 5905636, 5906007, 5906031, 5906030, 
#                               5906002, 5906208, 5906207, 5906215, 5906206, 5906226, 5906222, 5906211, 5906210])
# # corresponding to the following float numbers:
# # quest_good = [9631; 12688; 12701; 12709; 12748; 12754; 12880; 12885; 12888; 12889; 17898; 18013; 18098; 18821; 18829; 18852; 18864; 18994]


### Loop through filenames to get floats to add in:
bgc_argo_float_WMOs = []

for file in os.listdir(directory):  # for all files in this directory
    
    filename = os.fsdecode(file)

    if filename.endswith(".nc"):  # only the netcdf files
        if not filename.startswith('NO_WMO'):   # this is to not select files that don't have a WMO (only 2), but if we can figure out how to keep them, that would be nice!
        
            # Select just the WMO
            numeric_portion = int(re.findall(r'\d+', filename)[0])

            # Add WMO to list
            bgc_argo_float_WMOs.append(numeric_portion)


## Retreive Argo float data corresponding to selected WMOs

Note that this step takes somewhere around 20-30 minutes

In [6]:
# # commented out so that this isn't accidentally run
# soccom_bgc_argo_ds_points = argo_loader.float(bgc_argo_float_WMOs).to_xarray()


# # Save original SOCCOM BGC Argo dataset:
# savedate = date.today().strftime('%Y%m%d')
# soccom_bgc_argo_ds_points.to_netcdf(save_directory + 'soccom_bgc_argo_ds_points.nc'.format('.nc'))
# soccom_bgc_argo_ds_points.to_netcdf(save_directory + 'soccom_bgc_argo_ds_points_' + savedate + '.nc'.format('.nc'))

# soccom_bgc_argo_ds_points

## Convert points to profiles using argopy function

Note that this step takes ~5 minutes

In [31]:
soccom_bgc_argo_ds_profiles = soccom_bgc_argo_ds_points.argo.point2profile()


# Save original SOCCOM BGC Argo dataset:
savedate = date.today().strftime('%Y%m%d')
soccom_bgc_argo_ds_profiles.to_netcdf(save_directory + 'soccom_bgc_argo_ds_profiles.nc'.format('.nc'))
soccom_bgc_argo_ds_profiles.to_netcdf(save_directory + 'soccom_bgc_argo_ds_profiles_' + savedate + '.nc'.format('.nc'))

soccom_bgc_argo_ds_profiles

In [73]:
# soccom_bgc_argo_ds_profiles_saved = soccom_bgc_argo_ds_profiles
soccom_bgc_argo_ds_profiles = soccom_bgc_argo_ds_profiles_saved

In [34]:
soccom_bgc_argo_ds_profiles

*Note: before interpolating data, check for any flags that are >2, indicating data with problems. During interpolation, flags are dropped. Currently only PSAL_QC has a 2 flag, which is still okay data.*

## Calculate depth from pressure

In [74]:
soccom_bgc_argo_ds_profiles = soccom_bgc_argo_ds_profiles.assign(depth  = -1 * gsw.conversions.z_from_p(p = soccom_bgc_argo_ds_profiles.PRES, lat = soccom_bgc_argo_ds_profiles.LATITUDE))

In [37]:
soccom_bgc_argo_ds_profiles

## Interpolate data onto standard pressure levels

Note that these levels can be changed. They were selected to match what Dan(i) chose for the Weddell Gyre clusters

In [75]:
soccom_bgc_argo_ds_profiles = soccom_bgc_argo_ds_profiles.argo.interp_std_levels(np.linspace(20, 2000, 100))
# soccom_bgc_argo_ds_profiles = soccom_bgc_argo_ds_profiles.assign(depth_interpolated  = -1 * gsw.conversions.z_from_p(p = soccom_bgc_argo_ds_profiles.PRES_INTERPOLATED, lat = soccom_bgc_argo_ds_profiles.lat))

In [76]:
soccom_bgc_argo_ds_profiles

## Rename variables

In [77]:
# Rename dataset 'profiles_antarctic' for changes that are needed for GMM code 
soccom_bgc_argo_ds_profiles = soccom_bgc_argo_ds_profiles.rename({'N_PROF'            : 'profile', 
                                                                  'LATITUDE'          : 'lat', 
                                                                  'LONGITUDE'         : 'lon',
                                                                  'TIME'              : 'datetime',
                                                                  'PRES_INTERPOLATED' : 'pressure_interp',
                                                                  'PRES'              : 'pressure',
                                                                  'PSAL'              : 'practical_salinity',
                                                                  'TEMP'              : 'temperature'})

In [42]:
soccom_bgc_argo_ds_profiles

## Add year, month, day arrays to dataset

In [80]:
# Year
soccom_bgc_argo_ds_profiles = soccom_bgc_argo_ds_profiles.assign(temp  = soccom_bgc_argo_ds_profiles.datetime)
soccom_bgc_argo_ds_profiles.temp.values = pd.to_datetime(soccom_bgc_argo_ds_profiles.temp.values).year
soccom_bgc_argo_ds_profiles = soccom_bgc_argo_ds_profiles.rename({'temp' : 'year'})

# Month
soccom_bgc_argo_ds_profiles = soccom_bgc_argo_ds_profiles.assign(temp  = soccom_bgc_argo_ds_profiles.datetime)
soccom_bgc_argo_ds_profiles.temp.values = pd.to_datetime(soccom_bgc_argo_ds_profiles.temp.values).month
soccom_bgc_argo_ds_profiles = soccom_bgc_argo_ds_profiles.rename({'temp' : 'month'})

# Day
soccom_bgc_argo_ds_profiles = soccom_bgc_argo_ds_profiles.assign(temp  = soccom_bgc_argo_ds_profiles.datetime)
soccom_bgc_argo_ds_profiles.temp.values = pd.to_datetime(soccom_bgc_argo_ds_profiles.temp.values).day
soccom_bgc_argo_ds_profiles = soccom_bgc_argo_ds_profiles.rename({'temp' : 'day'})

# # Season
# soccom_bgc_argo_ds_profiles = soccom_bgc_argo_ds_profiles.assign(temp  = soccom_bgc_argo_ds_profiles.datetime)
# soccom_bgc_argo_ds_profiles.temp.values = xr.where(soccom_bgc_argo_ds_profiles.month)
# soccom_bgc_argo_ds_profiles = soccom_bgc_argo_ds_profiles.rename({'temp' : 'day'})


# Date: YYYYMMDD
soccom_bgc_argo_ds_profiles = soccom_bgc_argo_ds_profiles.assign(temp  = soccom_bgc_argo_ds_profiles.datetime)
soccom_bgc_argo_ds_profiles.temp.values = pd.to_datetime(soccom_bgc_argo_ds_profiles.temp.values).strftime("%Y%m%d")
soccom_bgc_argo_ds_profiles = soccom_bgc_argo_ds_profiles.rename({'temp' : 'prof_YYYYMMDD'})

# Time: HHMMSS
soccom_bgc_argo_ds_profiles = soccom_bgc_argo_ds_profiles.assign(temp  = soccom_bgc_argo_ds_profiles.datetime)
soccom_bgc_argo_ds_profiles.temp.values = pd.to_datetime(soccom_bgc_argo_ds_profiles.datetime.values).strftime("%H%M%S")
soccom_bgc_argo_ds_profiles = soccom_bgc_argo_ds_profiles.rename({'temp' : 'prof_HHMMSS'})


# soccom_bgc_argo_ds_profiles = soccom_bgc_argo_ds_profiles.assign(year  = pd.to_datetime(soccom_bgc_argo_ds_profiles.datetime.values).year)
# soccom_bgc_argo_ds_profiles = soccom_bgc_argo_ds_profiles.assign(month = pd.to_datetime(soccom_bgc_argo_ds_profiles.datetime.values).month)
# soccom_bgc_argo_ds_profiles = soccom_bgc_argo_ds_profiles.assign(day   = pd.to_datetime(soccom_bgc_argo_ds_profiles.datetime.values).day)

# soccom_bgc_argo_ds_profiles = soccom_bgc_argo_ds_profiles.assign(prof_YYYYMMDD = pd.to_datetime(soccom_bgc_argo_ds_profiles.datetime.values).strftime("%Y%m%d"))
# soccom_bgc_argo_ds_profiles = soccom_bgc_argo_ds_profiles.assign(prof_HHMMSS   = pd.to_datetime(soccom_bgc_argo_ds_profiles.datetime.values).strftime("%H%M%S"))

In [89]:
soccom_bgc_argo_ds_profiles

## Make changes for GMM code

In [82]:
# Calculate absolute salinity, conservative temperature, and potential density anomaly
profiles_antarctic = soccom_bgc_argo_ds_profiles
profiles_antarctic = profiles_antarctic.assign(prof_SA = gsw.conversions.SA_from_SP(SP = profiles_antarctic.practical_salinity,     p = profiles_antarctic.pressure, lon = profiles_antarctic.lon, lat = profiles_antarctic.lat))
profiles_antarctic = profiles_antarctic.assign(prof_CT = gsw.conversions.CT_from_t( SA = profiles_antarctic.prof_SA,  t = profiles_antarctic.temperature,   p = profiles_antarctic.pressure))
profiles_antarctic = profiles_antarctic.assign(sig0    = gsw.density.sigma0(        SA = profiles_antarctic.prof_SA, CT = profiles_antarctic.prof_CT))

# Limit depth range to <= 1000 dbar:
profiles_antarctic = profiles_antarctic.where(profiles_antarctic.pressure <= 1000, drop=True)

# Limit latitude to <= -30:
profiles_antarctic = profiles_antarctic.where(profiles_antarctic.lat <= -30, drop=True)



In [83]:
profiles_antarctic

In [90]:
profiles_antarctic.prof_SA.T

In [None]:
# for depth, can drop profile dimension
# need to deal with any NaNs in profile data -- GMM can't take NaNs
# select depth range, get rid of any profiles that are missing any value within that depth range (eg: 0-1000 = no data on the shelf)
# missing values in the middle can be interpolated
# any NaN values at top or bottom of depth range, no clear way to interpolate/extrapolate. IF replace value with mean value it looks wierd 
# 

## Save SOCCOM BGC-Argo data

In [84]:
savedate = date.today().strftime('%Y%m%d')

# Save dataset with formatting for GMM code:
profiles_antarctic.to_netcdf(save_directory + 'soccom_bgc_argo_ds_-180to180lon_-85to-30lat_20to1000depth.nc'.format('.nc'))
profiles_antarctic.to_netcdf(save_directory + 'soccom_bgc_argo_ds_-180to180lon_-85to-30lat_20to1000depth_' + savedate + '.nc'.format('.nc'))

# Save original SOCCOM BGC Argo dataset:
soccom_bgc_argo_ds_profiles.to_netcdf(save_directory + 'soccom_bgc_argo_ds.nc'.format('.nc'))
soccom_bgc_argo_ds_profiles.to_netcdf(save_directory + 'soccom_bgc_argo_ds_' + savedate + '.nc'.format('.nc'))

In [40]:
soccom_bgc_argo_ds_profiles

In [50]:
np.min(soccom_bgc_argo_ds_profiles.LATITUDE)

In [48]:
np.max(soccom_bgc_argo_ds_profiles.LATITUDE)

In [51]:
np.min(soccom_bgc_argo_ds_profiles.LONGITUDE)

In [52]:
np.max(soccom_bgc_argo_ds_profiles.LONGITUDE)

In [None]:
1901378_HRQC.nc

In [93]:
from argopy import DataFetcher
f = DataFetcher(src='gdac', ds='bgc', mode='expert', ftp='/Users/hannah/Documents/UW-PMEL/Research/so_co2_flux_repo/data/01_raw/SOCCOM_bgc_argo_float_data/SOCCOM_HiResQC_LIAR_21Dec2021_netcdf/').float(1901378)
f.load().data

FtpPathError: This path is not GDAC compliant (no `dac` folder with legitimate sub-folder):
/Users/hannah/Documents/UW-PMEL/Research/so_co2_flux_repo/data/01_raw/SOCCOM_bgc_argo_float_data/SOCCOM_HiResQC_LIAR_21Dec2021_netcdf/

In [95]:
import argopy
from argopy import DataFetcher as ArgoDataFetcher

In [96]:
ftproot, flist = argopy.tutorial.open_dataset('localftp')

In [97]:
argopy.set_options(local_ftp=ftproot)

<argopy.options.set_options at 0x7f7d3bdd8730>

In [None]:
from argopy import DataFetcher
f = DataFetcher(src='gdac', ds='bgc', mode='expert', ftp='/Users/hannah/Documents/UW-PMEL/Research/so_co2_flux_repo/data/01_raw/SOCCOM_bgc_argo_float_data/SOCCOM_HiResQC_LIAR_21Dec2021_netcdf/').float(1901378)
f.load().data

In [98]:
with argopy.set_options(mode='expert'):
    ds = ArgoDataFetcher(src='localftp').profile(6901929, 2).to_xarray()
    print(ds.data_vars)

  self.fetcher = self.Fetchers["float"](WMO=wmo, **self.fetcher_options)


NetCDF4FileNotFoundError: "Couldn't find NetCDF4 file: /Users/hannah/.argopy_tutorial_data/ftp/dac/*/1901378/1901378_prof.nc"