# Download biogeochemical (BGC) and physical (PHYS) analysis products from CMEMS

**Last updated: 11/11/2024**

## Import libraries and define paths

In [5]:
import os 
from pathlib import Path
import copernicusmarine
from datetime import datetime

In [6]:
# Path to download directory
PATH_ROOT_DIR = Path.cwd().resolve().parents[0] # /absolute/path/to/two/levels/up
full_path_download_dir = os.path.join(PATH_ROOT_DIR,"data","raw","CMEMS")
os.makedirs(full_path_download_dir, exist_ok=True)

## Set the download parameters

In [7]:
# =====================================================
# List of datasets
# =====================================================

LIST_DATASET_IDS = [

# Global Ocean Colour (Copernicus-GlobColour), Bio-Geo-Chemical, L4 (monthly and interpolated) from Satellite Observations (1997-ongoing), 4 x 4 km, monthly (1997-2024)
# Product ID: OCEANCOLOUR_GLO_BGC_L4_MY_009_104
    "cmems_obs-oc_glo_bgc-plankton_my_l4-multi-4km_P1M",        # Chlorophyll (mg m-3)
    "cmems_obs-oc_glo_bgc-pp_my_l4-multi-4km_P1M",              # NPP (mg C m-2 d-1)
    "cmems_obs-oc_glo_bgc-transp_my_l4-multi-4km_P1M",          # kd (m-1)
    
# Global Ocean Physics Reanalysis, 0.083° × 0.083°, monthly climatology (1993-2016)
# Product ID: GLOBAL_MULTIYEAR_PHY_001_030
    "cmems_mod_glo_phy_my_0.083deg-climatology_P1M-m"           # MLD (m) (defined using sigma theta), sea ice fraction, temperature
]

# ===============================================================================
# List of output file names (should correspond to the variable names listed below)
# ===============================================================================

LIST_OUTPUT_NAMES = [
    "mod_bgc_glo_chla",
    "mod_bgc_glo_npp",
    "mod_bgc_glo_kd",
    "mod_phys_glo_mld",
    "mod_phys_glo_icefrac",
    "mod_phys_glo_temp"
]
 
# ===============================================================================
# List of variable names to download
# ===============================================================================

LIST_VARIABLES = [
    "CHL",
    "PP",
    "KD490",
    "mlotst",
    "siconc",
    "thetao"
]

#"PP_uncertainty",
#"KD490_uncertainty",

## Exploratory analysis of one dataset to get variable names

In [4]:
help(copernicusmarine.subset)

Help on function subset in module copernicusmarine.python_interface.subset:

subset(dataset_url: Optional[str] = None, dataset_id: Optional[str] = None, dataset_version: Optional[str] = None, dataset_part: Optional[str] = None, username: Optional[str] = None, password: Optional[str] = None, variables: Optional[List[str]] = None, minimum_longitude: Optional[float] = None, maximum_longitude: Optional[float] = None, minimum_latitude: Optional[float] = None, maximum_latitude: Optional[float] = None, minimum_depth: Optional[float] = None, maximum_depth: Optional[float] = None, vertical_dimension_as_originally_produced: bool = True, start_datetime: Union[datetime.datetime, str, NoneType] = None, end_datetime: Union[datetime.datetime, str, NoneType] = None, subset_method: Literal['nearest', 'strict'] = 'nearest', output_filename: Optional[str] = None, file_format: Literal['netcdf', 'zarr'] = 'netcdf', service: Optional[str] = None, request_file: Union[pathlib.Path, str, NoneType] = None, outp

In [9]:
DS = copernicusmarine.open_dataset(
    dataset_id = "cmems_obs-oc_glo_bgc-pp_my_l4-multi-4km_P1M"
)
DS

INFO - 2024-11-11T14:14:12Z - Dataset version was not specified, the latest one was selected: "202311"
INFO - 2024-11-11T14:14:12Z - Dataset part was not specified, the first one was selected: "default"
INFO - 2024-11-11T14:14:13Z - Service was not specified, the default one was selected: "arco-geo-series"


## Download data

In [8]:
%%time

for dataset_id in LIST_DATASET_IDS:

    DS = copernicusmarine.open_dataset(dataset_id = dataset_id)
    
    for variable_name, output_name in zip(LIST_VARIABLES, LIST_OUTPUT_NAMES):
        
        if variable_name in DS.data_vars: # if the variable exists, do something with it

            print(f"Downloading {variable_name} from {dataset_id}")
            copernicusmarine.subset(
                dataset_id=dataset_id,
                variables={variable_name},
                minimum_longitude=-180,
                maximum_longitude=180,
                minimum_latitude=-90,
                maximum_latitude=90,
                output_filename=f"{output_name}.nc",
                output_directory=f"{full_path_download_dir}",
                force_download=True,
            )

print('All datasets downloaded!')

Fetching catalog: 100%|███████████████████████████| 3/3 [00:12<00:00,  4.31s/it]
INFO - 2024-12-02T09:31:16Z - Dataset version was not specified, the latest one was selected: "202411"
INFO - 2024-12-02T09:31:16Z - Dataset part was not specified, the first one was selected: "default"
INFO - 2024-12-02T09:31:17Z - Service was not specified, the default one was selected: "arco-geo-series"


Downloading CHL from cmems_obs-oc_glo_bgc-plankton_my_l4-multi-4km_P1M


INFO - 2024-12-02T09:31:21Z - Dataset version was not specified, the latest one was selected: "202411"
INFO - 2024-12-02T09:31:21Z - Dataset part was not specified, the first one was selected: "default"
INFO - 2024-12-02T09:31:22Z - Service was not specified, the default one was selected: "arco-geo-series"
INFO - 2024-12-02T09:31:23Z - Downloading using service arco-geo-series...
INFO - 2024-12-02T09:31:23Z - Estimated size of the dataset file is 46442.308 MB.
INFO - 2024-12-02T09:31:23Z - Writing to local storage. Please wait...


  0%|          | 0/52814 [00:00<?, ?it/s]

INFO - 2024-12-02T09:56:14Z - Successfully downloaded to /Users/Anna/LocalDocuments/Academic/Projects/ocean-data-lab/data/raw/CMEMS/mod_bgc_glo_chla.nc
INFO - 2024-12-02T09:56:16Z - Dataset version was not specified, the latest one was selected: "202311"
INFO - 2024-12-02T09:56:16Z - Dataset part was not specified, the first one was selected: "default"
INFO - 2024-12-02T09:56:17Z - Service was not specified, the default one was selected: "arco-geo-series"


Downloading PP from cmems_obs-oc_glo_bgc-pp_my_l4-multi-4km_P1M


INFO - 2024-12-02T09:56:19Z - Dataset version was not specified, the latest one was selected: "202311"
INFO - 2024-12-02T09:56:19Z - Dataset part was not specified, the first one was selected: "default"
INFO - 2024-12-02T09:56:20Z - Service was not specified, the default one was selected: "arco-geo-series"
INFO - 2024-12-02T09:56:21Z - Downloading using service arco-geo-series...
INFO - 2024-12-02T09:56:22Z - Estimated size of the dataset file is 46442.308 MB.
INFO - 2024-12-02T09:56:22Z - Writing to local storage. Please wait...


  0%|          | 0/52814 [00:00<?, ?it/s]

INFO - 2024-12-02T10:21:57Z - Successfully downloaded to /Users/Anna/LocalDocuments/Academic/Projects/ocean-data-lab/data/raw/CMEMS/mod_bgc_glo_npp.nc
INFO - 2024-12-02T10:21:58Z - Dataset version was not specified, the latest one was selected: "202311"
INFO - 2024-12-02T10:21:58Z - Dataset part was not specified, the first one was selected: "default"
INFO - 2024-12-02T10:21:59Z - Service was not specified, the default one was selected: "arco-geo-series"


Downloading KD490 from cmems_obs-oc_glo_bgc-transp_my_l4-multi-4km_P1M


INFO - 2024-12-02T10:22:02Z - Dataset version was not specified, the latest one was selected: "202311"
INFO - 2024-12-02T10:22:02Z - Dataset part was not specified, the first one was selected: "default"
INFO - 2024-12-02T10:22:03Z - Service was not specified, the default one was selected: "arco-geo-series"
INFO - 2024-12-02T10:22:03Z - Downloading using service arco-geo-series...
INFO - 2024-12-02T10:22:04Z - Estimated size of the dataset file is 46442.308 MB.
INFO - 2024-12-02T10:22:04Z - Writing to local storage. Please wait...


  0%|          | 0/52814 [00:00<?, ?it/s]

INFO - 2024-12-02T10:48:36Z - Successfully downloaded to /Users/Anna/LocalDocuments/Academic/Projects/ocean-data-lab/data/raw/CMEMS/mod_bgc_glo_kd.nc


All datasets downloaded!
CPU times: user 15min, sys: 1h 5s, total: 1h 15min 6s
Wall time: 1h 17min 42s
