In [7]:
import argopy
from argopy import DataFetcher
import numpy as np
import os

# Use argopy to download get data in specified region
Referenced from argppy's juptyer: https://nbviewer.org/github/euroargodev/argopy/blob/master/docs/examples/BGC_region_float_data.ipynb

To use argopy's bgc data set, the usermode needs to be set to 'expert' and the data source needs to be set to 'erddap'

In [8]:
argopy.set_options(src='erddap', mode='expert');

In [13]:
# region = {'LatS': -60,'LatN': -45, 'LonW': -180, 'LonE': 180,
#           'minP':0 ,'maxP': 100,
#           'startDate': '2023-01','endDate': '2023-08',
#           'BGCParams': ['BBP700', 'CHLA','DOXY', 'NITRATE']}

region = {'LatS': -80,'LatN': -45, 'LonW': -180, 'LonE': 180,
         'minP':0 ,'maxP': 2000,
         'BGCParams': ['BBP700', 'CHLA','DOXY', 'NITRATE','PH_IN_SITU_TOTAL','DOWNWELLING_PAR']}

Possible parameters
['BBP700',
 'CDOM',
 'CHLA',
 'DOWNWELLING_PAR',
 'DOWN_IRRADIANCE380',
 'DOWN_IRRADIANCE412',
 'DOWN_IRRADIANCE490',
 'DOXY',
 'NITRATE',
 'PH_IN_SITU_TOTAL']
 Notes:
 - PH is a little tricky...a lot of nan's

### Specify that we want data where **ALL** listed BGCParams are not NaNs
Smallet possible data set

In [14]:
#BGCDataFetcher = DataFetcher(ds='bgc', measured=region['BGCParams'], parallel=True,
#                            progress=True,chunks_maxsize={'time': 30})

### Specify that we want **ALL** data where listed BGCParams
Largest possible data set

In [15]:
BGCDataFetcher = DataFetcher(ds='bgc', params =region['BGCParams'], parallel=True,
                             progress=True,chunks_maxsize={'time': 30})

## Get data from specified region and output as a pandas dataframe
Notes: There is a timeout limit, so don't ask for too much data at once

In [16]:
if 'startDate' in list(region.keys()):
    ds = BGCDataFetcher.region([region['LonW'], region['LonE'], region['LatS'], region['LatN'], 
                               region['minP'], region['maxP'], 
                               region['startDate'], region['endDate']]).load()
else:
    # No date specified...search whole series
    ds = BGCDataFetcher.region([region['LonW'], region['LonE'], region['LatS'], region['LatN'], 
                               region['minP'], region['maxP']]).load()

df = ds.data.to_dataframe()

Error: 503, message='Service Unavailable', url=URL('https://erddap.ifremer.fr/erddap/info/ArgoFloats-synthetic-BGC/index.json')


FileNotFoundError: https://erddap.ifremer.fr/erddap/info/ArgoFloats-synthetic-BGC/index.json

## Reformat argopy table to quality control the data

In [None]:
def QualityControlByParam(p, df):
    # p: param name (ex: PRES)
    # df: argopy dataframe
    
    raw = df.loc[:,p].values
    raw_qc = df.loc[:,p+'_QC'].values
    adj = df.loc[:,p+'_ADJUSTED'].values
    adj_qc = df.loc[:,p+'_ADJUSTED_QC'].values
    dmode = df.loc[:,p+'_DATA_MODE'].values

    data = np.zeros(raw.shape[0])*np.NaN
    data_qc = np.zeros(raw.shape[0])*np.NaN

    # Determine if use real-time or delayed-mode data
    data = np.where(dmode=='R',raw,adj)
    data_qc = np.where(dmode=='R',raw_qc,adj_qc)

    # Quality control data
    data = np.where(np.logical_and(data_qc<=2, data_qc>0), data, np.NaN)

    return data, data_qc

In [None]:
# Reformat argopy data frame
cnames = ['CONFIG_MISSION_NUMBER',
 'CYCLE_NUMBER',
 'DIRECTION',
 'PLATFORM_NUMBER',
 'POSITION_QC',
 'LATITUDE',
 'LONGITUDE',
 'TIME']

df_new = df.loc[:,cnames]

In [None]:
params = ['PRES','TEMP','PSAL']+region['BGCParams']
for p in params:
    data, data_qc = QualityControlByParam(p, df)
    df_new[p] = data
    df_new[p+'_QC'] = data_qc

## Save data to csv file specifying data bounds in file name
Data will save in csv_output directory

In [None]:
if 'startDate' in list(region.keys()):
    fname = 'argopy_ouput_LatN_'+str(np.round(region['LatN'],0))+'_LatS_'+str(np.round(region['LatS'],0))+\
    '_LonE_'+str(np.round(region['LonE'],0))+'_LonW_'+str(np.round(region['LonW'],0))+\
    '_minP_'+str(np.round(region['minP'],0))+'_maxP'+str(np.round(region['maxP'],0))+\
    '_SDate_'+region['startDate']+'_EDate_'+region['endDate']+'_PARAMS_'+'_'.join(region['BGCParams'])+'.csv'
else:
    fname = 'argopy_ouput_LatN_'+str(np.round(region['LatN'],0))+'_LatS_'+str(np.round(region['LatS'],0))+\
    '_LonE_'+str(np.round(region['LonE'],0))+'_LonW_'+str(np.round(region['LonW'],0))+\
    '_minP_'+str(np.round(region['minP'],0))+'_maxP'+str(np.round(region['maxP'],0))+\
    '_Date_ALL_PARAMS_'+'_'.join(region['BGCParams'])+'.csv'

In [None]:
if os.path.exists('csv_output/'):
    df_new.to_csv('csv_output/'+fname)
else:
    os.makedirs('csv_output/')
    df_new.to_csv('csv_output/'+fname)