# EUMETSAT Archive API retrieval

Notes:
- 5 files, linear processing:
  1min 14s ± 8.45 s per loop (mean ± std. dev. of 7 runs, 1 loop each)
- 5 files, parallel processing (5 cores): 38.2 s ± 3.88 s per loop (mean ± std. dev. of 7 runs, 1 loop each)

May need to add sleep timer for parallel API calls.

In [18]:
import eumdac
import datetime
import shutil
import requests
import time

from pathos.threading import ThreadPool as Pool

## Set Paths

In [19]:
# OUTPUTDIR = '../../projects/4868_10/Datasets/MSG/raw/'
OUTPUTDIR = '.'

## Set API credentials
### Tokens can be found here: https://api.eumetsat.int/api-key/

In [20]:
# Insert your personal key and secret into the single quotes
consumer_key = 'ZfwfuaEstPlGT6_fgAThBQa6Wrka'
consumer_secret = 'H3dqlrWiP228vkEAw2fTCW5JPOwa'

credentials = (consumer_key, consumer_secret)

token = eumdac.AccessToken(credentials)

try:
    print(f"This token '{token}' expires {token.expiration}")
except requests.exceptions.HTTPError as error:
    print(f"Unexpected error: {error}")

This token '3c6b7c8f-45f2-39ef-81ba-83c52c2e5e27' expires 2024-09-10 12:30:38.385592


## Browse DataStore collections
### See: https://api.eumetsat.int/data/browse/collections

## Set configuration

In [21]:
# Define collection
collection = 'EO:EUM:DAT:MSG:HRSEVIRI'

# Set sensing start and end time
start = datetime.datetime(2022, 1, 1, 0, 0)
end = datetime.datetime(2022, 1, 1, 0, 30)

## Retrieve dataset info

In [22]:
# Select collection from datastore
datastore = eumdac.DataStore(token)

try:    
    selected_collection = datastore.get_collection(collection)
    print(f"{selected_collection} - {selected_collection.title}")
except eumdac.datastore.DataStoreError as error:
    print(f"Error related to the data store: '{error.msg}'")
except eumdac.collection.CollectionError as error:
    print(f"Error related to the collection: '{error.msg}'")
except requests.exceptions.ConnectionError as error:
    print(f"Error related to the connection: '{error.msg}'")
except requests.exceptions.RequestException as error:
    print(f"Unexpected error: {error}")

EO:EUM:DAT:MSG:HRSEVIRI - High Rate SEVIRI Level 1.5 Image Data - MSG - 0 degree


## Retrieve filenames

In [23]:
# Retrieve datasets that match our filter
products = selected_collection.search(
    dtstart=start,
    dtend=end)

for product in products:
    try:
        print(product)
    except eumdac.collection.CollectionError as error:
        print(f"Error related to the collection: '{error.msg}'")
    except requests.exceptions.ConnectionError as error:
        print(f"Error related to the connection: '{error.msg}'")
    except requests.exceptions.RequestException as error:
        print(f"Unexpected error: {error}")

MSG4-SEVI-MSG15-0100-NA-20220101002742.635000000Z-NA
MSG4-SEVI-MSG15-0100-NA-20220101001242.759000000Z-NA


In [7]:
print(len(products))

2


## Download datasets

### Linear processing

In [8]:
def download_api_products(products, download_dir):

    for product in products:
        try:
            with product.open() as fsrc, \
                    open(f'{download_dir}/{fsrc.name}', mode='wb') as fdst:
                shutil.copyfileobj(fsrc, fdst)
                print(f'Download of product {product} finished.')
                
        except eumdac.product.ProductError as error:
            print(f"Error related to the product '{product}' while trying to download it: '{error.msg}'")
        except requests.exceptions.ConnectionError as error:
            print(f"Error related to the connection: '{error.msg}'")
        except requests.exceptions.RequestException as error:
            print(f"Unexpected error: {error}")
        
    return 

### Parallel processing

In [9]:
def parallel_download_api_products(list_of_products, list_of_dirs, threads=6):
    # Set number of threads (cores) used for parallel run and map threads
    if threads is None:
        pool = Pool()
    else:
        pool = Pool(nodes=threads)
    # Run parallel function
    results = pool.map( download_api_products,
                        list_of_products, list_of_dirs) 
    
    return results

In [10]:
# Create nested list of products for parallel pool
nested_products = [[x] for x in products]
list_of_dirs = [OUTPUTDIR] * len(nested_products)

In [11]:
print(len(nested_products))

2


In [12]:
# Parallel processing with timing
start = time.time()
parallel_download_api_products(nested_products, list_of_dirs)
stop = time.time()
print(f'Execution time (minutes): {(stop-start)/60}')

Download of product MSG4-SEVI-MSG15-0100-NA-20220101001242.759000000Z-NA finished.
Download of product MSG4-SEVI-MSG15-0100-NA-20220101002742.635000000Z-NA finished.
Execution time (minutes): 0.9396833221117655
