In [1]:
import requests
from datetime import datetime
import os
import json
from tqdm.notebook import tqdm
wd = '../data/ecmwf/ensemble'

In [11]:
def extract(year, month, day, hour, step):
    
    d = datetime(year, month, day, hour)
    date_str = f'{d.year}{d.month:0>2}{d.day:0>2}'
    hour_str = f'{d.hour:0>2}'
    dirpath = os.path.join(wd, date_str)
    if not os.path.exists(dirpath):
        os.mkdir(dirpath)
    # Closest run
    run = f'{(d.hour // 6) * 6:0>2}z'
    
    proxies = {'http': 'http://proxy:3128', 'https': 'http://proxy:3128'}
    
    # First get index to prepare precise extraction
    filename = f'{date_str}{hour_str}0000-{step}h-enfo-ef.index'
    #root = 'https://ecmwf-forecasts.s3.eu-central-1.amazonaws.com'
    #root = 'https://data.ecmwf.int/forecasts'
    root = 'https://ai4edataeuwest.blob.core.windows.net/ecmwf'
    http_request = f'{root}/{date_str}/{run}/0p4-beta/enfo/{filename}'
    res = requests.get(http_request, proxies=proxies)
    res.raise_for_status()
    

    # Read list of JSON parameters describing members of ensemble forecast
    pl = []
    for line in res.text.split('\n'):
        if len(line) == 0:
            continue
        pl.append(json.loads(line))

    # Note bytes positions for precise extraction
    bytes_d = {i: {'u': (0, 0), 'v': (0, 0)} for i in range(51)}
    for p in pl:
        if 'number' not in p.keys():
            i_member = 0
        else:
            i_member = int(p['number'])
        if p['param'] in ['u', 'v']:
            if p['levelist'] == '1000':
                bytes_d[i_member][p['param']] = (p['_offset'], p['_length'])
    def flatten(d):
        s = ''
        for v in d.values():
            for t in v.values():
                s += f'{t[0]}-{t[0] + t[1] - 1}, '
        return s[:-2]
    f_bytes = flatten(bytes_d)
    
    headers = {'Range': f'bytes={f_bytes}'}
    filename = f'{date_str}{hour_str}0000-{step}h-enfo-ef.grib2'
    http_request = f'https://data.ecmwf.int/forecasts/{date_str}/{run}/0p4-beta/enfo/{filename}'
    res = requests.get(http_request, proxies={'http': 'http://proxy:3128', 'https': 'http://proxy:3128'}, headers=headers)
    res.raise_for_status()
    with open(os.path.join(dirpath, filename), 'wb') as f:
        f.write(res.content)

In [16]:
year, month, day, hour = 2023, 6, 19, 6
for step in tqdm(range(0, 15)):
    extract(year, month, day, hour, 3*step)

  0%|          | 0/15 [00:00<?, ?it/s]