# Download time-series data via Copernicus Marine Client (CMC)

## Import libraries, functions and define paths

In [1]:
import os # a library that allows us access to basic operating system commands like making directories
import xarray as xr
import copernicus_marine_client as copernicus_marine

In [2]:
def sort_dimension(dataset, dim_name):
    """
    Get the values for the specified dimension and verify if they are unsorted. If so, the function sorts them.
    """
    # Get the coordinate values for the specified dimension.
    coords = dataset[dim_name].values

    # Check if the coordinates are unsorted.
    if (coords[0] >= coords[:-1]).all():
        dataset = dataset.sortby(dim_name, ascending=True)
        
    #print("Latitude = ", dataset[dim_name].values)
    return dataset

In [3]:
# Create a download directory for our outputs
NAME_DOWNLOAD_DIRECTORY = "data_timeseries_CMC_april_nc"
full_path_download_dir = os.path.join(os.getcwd(), NAME_DOWNLOAD_DIRECTORY)
os.makedirs(full_path_download_dir, exist_ok=True)

## Download parameters

In [4]:
# Define bounding box

#LONGITUDE_BOUNDS = [0.265, 1.801]
#LATITUDE_BOUNDS = [53.758, 54.656]

# Data won't be subsetted at those exact boundaries (too many digits). Thus, let's expand the margins 
# to make sure we encompass those lons and lats with the subsetting
LONGITUDE_BOUNDS = [0.20, 1.89]
LATITUDE_BOUNDS = [53.70, 54.70]

lon_range = slice(LONGITUDE_BOUNDS[0], LONGITUDE_BOUNDS[1])
lat_range = slice(LATITUDE_BOUNDS[0], LATITUDE_BOUNDS[1])

In [5]:
# List of datasets

LIST_DATASET_IDS = [
    
# L3 satellite observations, global, 4 km resolution, daily, OC-CCI algorithm
# Product ID: OCEANCOLOUR_GLO_BGC_L3_MY_009_107
    #"c3s_obs-oc_glo_bgc-plankton_my_l3-multi-4km_P1D",   # Chlorophyll a + Microphytoplankton + Nanophytoplankton + Picophytoplankton + uncertainty (%) + flags
    
# L3 satellite observations, global, various resolutions, daily, Copernicus-GlobColour algorithm
# Product ID: OCEANCOLOUR_GLO_BGC_L3_MY_009_103
    
    # 4 km res (multiple sensors merged)
    #"cmems_obs-oc_glo_bgc-plankton_my_l3-multi-4km_P1D", # Chlorophyll a + Diatoms + Dinoflagellates + Green algae + Haptophytes + Microphytoplankton + Nanophytoplankton + Picophytoplankton + Prochlorococcus + Prokaryotes (units: mg chla m-3) +  uncertainty (%) + flags
    #"cmems_obs-oc_glo_bgc-optics_my_l3-multi-4km_P1D",   # Particulate backscattering coeff (m-1) + Absorption coeff due to CDOM and non-algal particles (m-1) + uncertainty (%) + flags
    #"cmems_obs-oc_glo_bgc-transp_my_l3-multi-4km_P1D",   # Diffuse attenuation coeff kd490 (m-1) + SPM (g m-3) + Secchi disk depth (m) + uncertainty (%) + flags
    # 4 km resolution (OLCI sensor)
    #"cmems_obs-oc_glo_bgc-plankton_my_l3-olci-4km_P1D",  # Chlorophyll a (mg chla m-3) + uncertainty (%) + flags
    #"cmems_obs-oc_glo_bgc-transp_my_l3-olci-4km_P1D",    # Diffuse attenuation coeff kd490 (m-1) + uncertainty (%) + flags
    # 300 m resolution (OLCI sensor)
    #"cmems_obs-oc_glo_bgc-plankton_my_l3-olci-300m_P1D", # Chlorophyll a (mg chla m-3) + uncertainty (%) + flags
    
# L3 satellite observations, Atlantic-European NWS, various resolutions, daily
# Product ID: OCEANCOLOUR_ATL_BGC_L3_MY_009_113
    
    # 1 km resolution (multiple sensors merged)
    #"cmems_obs-oc_atl_bgc-plankton_my_l3-multi-1km_P1D", # Chlorophyll a (mg chla m-3) + uncertainty (%) + flags
    #"cmems_obs-oc_atl_bgc-optics_my_l3-multi-1km_P1D",   # Particulate backscattering coeff (m-1) + Absorption coeff due to CDOM and non-algal particles (m-1) + uncertainty (%) + flags
    #"cmems_obs-oc_atl_bgc-transp_my_l3-multi-1km_P1D",   # Diffuse attenuation coeff kd490 (m-1) + SPM (g m-3) + Secchi disk depth (m) + uncertainty (%) + flags
    # 300 m resolution (OLCI sensor)
    #"cmems_obs-oc_atl_bgc-plankton_my_l3-olci-300m_P1D", # Chlorophyll a (mg chla m-3) + uncertainty (%) + flags
      
# Biogeochemical reanalysis, Atlantic-European NWS, 7 km horizontal resolution, daily
# Product ID: NWSHELF_MULTIYEAR_BGC_004_011
    "cmems_mod_nws_bgc-chl_my_7km-3D_P1D-m",             # Chlorophyll a (mg chla m-3)
    "cmems_mod_nws_bgc-pft_my_7km-3D-diato_P1D-m",       # Diatoms (mg chla m-3)
    "cmems_mod_nws_bgc-pft_my_7km-3D-dino_P1D-m",        # Dinoflagellates (mg chla m-3)
    "cmems_mod_nws_bgc-pft_my_7km-3D-nano_P1D-m",        # Nanophytoplankton (mg chla m-3)
    "cmems_mod_nws_bgc-pft_my_7km-3D-pico_P1D-m",        # Picophytoplankton (mg chla m-3)
    "cmems_mod_nws_bgc-phyc_my_7km-3D_P1D-m",            # Phytoplankton concentration (mmol C m-3)
    "cmems_mod_nws_bgc-pp_my_7km-3D_P1D-m",              # Net primary production (mg C m-3 d-1)
    "cmems_mod_nws_bgc-kd_my_7km-3D_P1D-m",              # Attenuation coefficient kd (m-1)
    "cmems_mod_nws_bgc-no3_my_7km-3D_P1D-m",             # Nitrate (mmol m-3)
    "cmems_mod_nws_bgc-po4_my_7km-3D_P1D-m",             # Phosphate (mmol m-3)
    "cmems_mod_nws_bgc-o2_my_7km-3D_P1D-m",              # Oxygen (mmol m-3)
    "cmems_mod_nws_bgc-ph_my_7km-3D_P1D-m",              # pH
    "cmems_mod_nws_bgc-spco2_my_7km-2D_P1D-m",           # pCO2 (Pa)
    
# Physical reanalysis, Atlantic-European NWS, 7 km horizontal resolution
# Product ID: NWSHELF_MULTIYEAR_PHY_004_009
    "cmems_mod_nws_phy-mld_my_7km-2D_P1D-m",             # MLD (m), defined using sigma theta (daily, but there's also hourly)
    "cmems_mod_nws_phy-s_my_7km-3D_P1D-m",               # Salinity (PSU) (daily, but there's also hourly)
    "cmems_mod_nws_phy-t_my_7km-3D_P1D-m",               # Temperature (ºC) (daily, but there's also hourly)
    "cmems_mod_nws_phy-ssh_my_7km-2D_P1D-m",             # SSH (m) (daily, but there's also hourly)
    "cmems_mod_nws_phy-uv_my_7km-3D_P1D-m"               # Eastward + Northward seawater velocity (m s-1) (daily, but there's also hourly)
]

LIST_OUTPUT_NAMES = [
   # "obs_satell_glob_occci_multi_4km_plk",
   # "obs_satell_glob_cmems_multi_4km_plk",
   # "obs_satell_glob_cmems_multi_4km_opt",
   # "obs_satell_glob_cmems_multi_4km_trns",
   # "obs_satell_glob_cmems_olci_4km_plk",
   # "obs_satell_glob_cmems_olci_4km_trns",
   # "obs_satell_glob_cmems_olci_300m_plk",
   # "obs_satell_reg_cmems_multi_1km_plk",
   # "obs_satell_reg_cmems_multi_1km_opt",
   # "obs_satell_reg_cmems_multi_1km_trns",
   # "obs_satell_reg_cmems_olci_300m_plk",
    "mod_bgc_reg_chl",
    "mod_bgc_reg_diat",
    "mod_bgc_reg_dino",
    "mod_bgc_reg_nano",
    "mod_bgc_reg_pico",
    "mod_bgc_reg_phy",
    "mod_bgc_reg_npp",
    "mod_bgc_reg_kd",
    "mod_bgc_reg_no3",
    "mod_bgc_reg_po4",
    "mod_bgc_reg_o2",
    "mod_bgc_reg_ph",
    "mod_bgc_reg_pco2",
    "mod_phy_reg_mld",
    "mod_phy_reg_sal",
    "mod_phy_reg_temp",
    "mod_phy_reg_ssh",
    "mod_phy_reg_velo"
]

In [6]:
# Read dataset via a function of the CMC
DS = copernicus_marine.load_xarray_dataset(
    dataset_id = "cmems_mod_nws_bgc-phyc_my_7km-3D_P1D-m",
    vertical_dimension_as_originally_produced=True
)
DS

## Download data

We can't use the CMC function `subset()` because we can't invert back inverted (unsorted) axis in subset function. We must read data via `load_xarray_dataset()` and invert axis first. For the moment, the majority of oceancolour products are not in the Zarr format, so downloading from the **Copernicus Marine Client (CMC)** is equivalent to download via **OPeNDAP**.

In [7]:
# This is to see available parameters in a function
# help(copernicus_marine.subset)

In [22]:
%%time

for dataset_id, output_name in zip(LIST_DATASET_IDS, LIST_OUTPUT_NAMES):
    
    print("Downloading dataset: ", dataset_id)

    # The following piece of code was created by David from Copernicus

    # Read dataset with CMC
    ds = copernicus_marine.load_xarray_dataset(dataset_id = dataset_id, vertical_dimension_as_originally_produced=True)

    # Select surface and rename dimensions
    for coords in ds.coords:
        #if coords=='depth': # condition for depth (model products)
        #    ds = ds.isel(depth=0)
        if coords=='lon':
            ds = ds.rename({'lon': 'longitude'})
        if coords=='lat':
            ds = ds.rename({'lat': 'latitude'})

    # Sort axis that were inverted
    ds = sort_dimension(ds, 'latitude')
    ds = sort_dimension(ds, 'longitude')

    # Subset bounding box 
    ds = ds.sel(longitude=lon_range, latitude=lat_range)
    
    # Save into .nc file
    ds.to_netcdf(f"{full_path_download_dir}/{output_name}.nc")
    
    """
    This was my version of the code, which also runs fine:
    
    # Inspection of the data first 
    ds = copernicus_marine.load_xarray_dataset(
        dataset_id = dataset_id,
        minimal_longitude = LONGITUDE_BOUNDS[0],
        maximal_longitude = LONGITUDE_BOUNDS[1],
        minimal_latitude = LATITUDE_BOUNDS[0],
        maximal_latitude = LATITUDE_BOUNDS[1]
    )

    # Rename latitude –for some datasets it appears as 'lat', for some others as 'latitude'
    for coords in ds.coords:
        if coords=='lon':
            ds = ds.rename({'lon': 'longitude'})
        if coords=='lat':
            ds = ds.rename({'lat': 'latitude'})

    # If the latitude dimension is unsorted, the above call to load_xarray_dataset()
    # will generate an empty latitude vector. In that case, shift the order of the latitude call
    if len(ds.coords['latitude'].values) == 0:
        print(f"The latitude dimension has {len(ds.coords['latitude'].values)} entries, rearranging...")
        minlat = LATITUDE_BOUNDS[1]
        maxlat = LATITUDE_BOUNDS[0]
    else:
        print(f"The latitude dimension has {len(ds.coords['latitude'].values)} entries")
        minlat = LATITUDE_BOUNDS[0]
        maxlat = LATITUDE_BOUNDS[1]

    #print(f"The longitude dimension has these entries {ds.coords['lon'].values}, which are included in our boundaries 0.265 to 1.801")

    copernicus_marine.subset(
        dataset_id = dataset_id,
        minimal_longitude = LONGITUDE_BOUNDS[0],
        maximal_longitude = LONGITUDE_BOUNDS[1],
        minimal_latitude = minlat,
        maximal_latitude = maxlat,
        output_filename = f"{output_name}.nc",
        output_directory = download_dir,
        force_download = True, # skips prompt "Do you want to proceed with download? [Y/n]:"
    )
          
    """

print('All datasets downloaded!')

Downloading dataset:  cmems_mod_nws_bgc-chl_my_7km-3D_P1D-m
Downloading dataset:  cmems_mod_nws_bgc-pft_my_7km-3D-diato_P1D-m
Downloading dataset:  cmems_mod_nws_bgc-pft_my_7km-3D-dino_P1D-m
Downloading dataset:  cmems_mod_nws_bgc-pft_my_7km-3D-nano_P1D-m
Downloading dataset:  cmems_mod_nws_bgc-pft_my_7km-3D-pico_P1D-m
Downloading dataset:  cmems_mod_nws_bgc-phyc_my_7km-3D_P1D-m
Downloading dataset:  cmems_mod_nws_bgc-pp_my_7km-3D_P1D-m
Downloading dataset:  cmems_mod_nws_bgc-kd_my_7km-3D_P1D-m
Downloading dataset:  cmems_mod_nws_bgc-no3_my_7km-3D_P1D-m
Downloading dataset:  cmems_mod_nws_bgc-po4_my_7km-3D_P1D-m
Downloading dataset:  cmems_mod_nws_bgc-o2_my_7km-3D_P1D-m
Downloading dataset:  cmems_mod_nws_bgc-ph_my_7km-3D_P1D-m
Downloading dataset:  cmems_mod_nws_bgc-spco2_my_7km-2D_P1D-m
Downloading dataset:  cmems_mod_nws_phy-mld_my_7km-2D_P1D-m
Downloading dataset:  cmems_mod_nws_phy-s_my_7km-3D_P1D-m
Downloading dataset:  cmems_mod_nws_phy-t_my_7km-3D_P1D-m
Downloading dataset:  cm