Download data from DMI:
=========================

https://dmigw.govcloud.dk/v2/metObs/bulk/?api-key=d40321ee-7de5-4417-a0bf-108bd34061ab

https://dmigw.govcloud.dk/v2/climateData/bulk/?api-key=fa9056ec-2f41-4042-828b-91750e966966

This notebook opens and transforms bulk climateData from DMI (Danish Meteorological Institute) into a pandas DataFrame and then saves it as a CSV file for further analysis.

_Please note as this is loading the bulk data from DMI when applied to the full dataset it loads >5000 files with >170 gb of data and takes ~25 minutes to run_

In [5]:
import requests
import pandas as pd
import pygrib
import xarray as xr

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
# api keys for different DMI services
forecast_api_key = 'e55dced6-70fb-44f1-a658-cdef62c74b6a'
climate_api_key = 'fa9056ec-2f41-4042-828b-91750e966966'
metobs_api_key = 'd40321ee-7de5-4417-a0bf-108bd34061ab'

In [95]:
url = 'https://dmigw.govcloud.dk/v1/forecastdata/collections/harmonie_dini_sf/items?api-key=e55dced6-70fb-44f1-a658-cdef62c74b6a'

# get the data
response = requests.get(url)

print(response.json())

{'type': 'FeatureCollection', 'features': [{'stac_version': '1.0.0', 'bbox': [-43.169833, 37.688397, 40.069849, 69.907191], 'geometry': {'coordinates': [[[-43.169833, 37.688397], [40.069849, 37.688397], [40.069849, 69.907191], [-43.169833, 69.907191], [-43.169833, 37.688397]]], 'type': 'Polygon'}, 'collection': 'harmonie_dini_sf', 'id': 'HARMONIE_DINI_SF_2024-10-19T090000Z_2024-10-19T090000Z.grib', 'asset': {'data': {'href': 'https://dmigw.govcloud.dk/v1/forecastdata/download/HARMONIE_DINI_SF_2024-10-19T090000Z_2024-10-19T090000Z.grib?api-key=e55dced6-70fb-44f1-a658-cdef62c74b6a', 'title': 'Forecast file download resource', 'type': 'application/x-grib', 'roles': ['data']}}, 'type': 'Feature', 'properties': {'created': '2024-10-19T10:43:46.31Z', 'datetime': '2024-10-19T09:00:00Z', 'modelRun': '2024-10-19T09:00:00Z'}}, {'stac_version': '1.0.0', 'bbox': [-43.169833, 37.688397, 40.069849, 69.907191], 'geometry': {'coordinates': [[[-43.169833, 37.688397], [40.069849, 37.688397], [40.069849,

In [12]:
grib_file = '/Users/johan/Downloads/HARMONIE_DINI_ML_2024-10-21T060000Z_2024-10-23T180000Z.grib'

grbs = pygrib.open(grib_file)

: 

In [None]:
# Initialize an empty list to store DataFrames
data_frames = []

# Iterate over all messages in the GRIB file
for grb in grbs:
    # Extract data and metadata
    data, lats, lons = grb.data()
    parameter_name = grb.name
    parameter_units = grb.units
    level = grb.level
    short_name = grb.shortName
    valid_date = grb.validDate

    # Flatten the data arrays
    data_flat = data.flatten()
    lats_flat = lats.flatten()
    lons_flat = lons.flatten()

    # Create a DataFrame for the current parameter
    df = pd.DataFrame({
        'latitude': lats_flat,
        'longitude': lons_flat,
        'value': data_flat,
        'parameter': parameter_name,
        'units': parameter_units,
        'level': level,
        'short_name': short_name,
        'valid_date': valid_date
    })

    # Append the DataFrame to the list
    data_frames.append(df)

# Concatenate all DataFrames into one
full_df = pd.concat(data_frames, ignore_index=True)

# Display the first few rows
display(full_df.head())

In [6]:
grib_file = '/Users/johan/Downloads/HARMONIE_DINI_ML_2024-10-21T060000Z_2024-10-23T180000Z.grib'

grbs = pygrib.open(grib_file)

In [8]:
for grb in grbs:
    print(grb.keys())

['globalDomain', 'GRIBEditionNumber', 'tablesVersionLatestOfficial', 'tablesVersionLatest', 'grib2divider', 'angleSubdivisions', 'missingValue', 'ieeeFloats', 'isHindcast', 'section0Length', 'identifier', 'discipline', 'editionNumber', 'totalLength', 'sectionNumber', 'section1Length', 'numberOfSection', 'centre', 'centreDescription', 'subCentre', 'tablesVersion', 'masterDir', 'localTablesVersion', 'significanceOfReferenceTime', 'year', 'month', 'day', 'hour', 'minute', 'second', 'dataDate', 'julianDay', 'dataTime', 'productionStatusOfProcessedData', 'typeOfProcessedData', 'md5Section1', 'selectStepTemplateInterval', 'selectStepTemplateInstant', 'stepType', 'is_chemical', 'is_chemical_distfn', 'is_chemical_srcsink', 'is_aerosol', 'is_aerosol_optical', 'setCalendarId', 'deleteCalendarId', 'sectionNumber', 'grib2LocalSectionPresent', 'deleteLocalDefinition', 'sectionNumber', 'gridDescriptionSectionPresent', 'section3Length', 'numberOfSection', 'sourceOfGridDefinition', 'numberOfDataPoints

In [11]:
for grb in grbs:
    print(grb.name)

In [109]:
full_df['parameter'].unique()

array(['High cloud cover', 'Low cloud cover', 'Medium cloud cover',
       'Fraction of cloud cover', 'Temperature', '2 metre temperature',
       'Maximum temperature at 2 metres since previous post-processing',
       'Minimum temperature at 2 metres since previous post-processing',
       '2 metre dewpoint temperature', 'Visibility',
       '10 metre U wind component', 'U component of wind',
       '100 metre U wind component', '10 metre V wind component',
       'V component of wind', '100 metre V wind component',
       '10 metre wind speed', '10 metre wind direction',
       'Total Precipitation', 'Snow depth water equivalent',
       'Time integral of total solid precipitation flux', 'Pressure',
       '2 metre specific humidity', 'Specific humidity',
       'Total column vertically-integrated water vapour',
       'Total column cloud ice water', 'Geopotential',
       'Geometrical height', 'Mixed layer depth',
       'Convective available potential energy', 'Convective inhibiti

In [110]:
display(full_df[full_df['parameter']=='Temperature'] & )

Unnamed: 0,latitude,longitude,value,parameter,units,level,short_name,valid_date
15305180,39.671000,-25.422000,293.777740,Temperature,K,0,t,2024-10-19 09:00:00
15305181,39.675306,-25.400131,293.779694,Temperature,K,0,t,2024-10-19 09:00:00
15305182,39.679607,-25.378258,293.781647,Temperature,K,0,t,2024-10-19 09:00:00
15305183,39.683902,-25.356381,293.787506,Temperature,K,0,t,2024-10-19 09:00:00
15305184,39.688193,-25.334501,293.801178,Temperature,K,0,t,2024-10-19 09:00:00
...,...,...,...,...,...,...,...,...
39793463,62.713052,39.950011,,Temperature,K,2,t,2024-10-19 09:00:00
39793464,62.701702,39.980009,,Temperature,K,2,t,2024-10-19 09:00:00
39793465,62.690345,40.009985,,Temperature,K,2,t,2024-10-19 09:00:00
39793466,62.678982,40.039940,,Temperature,K,2,t,2024-10-19 09:00:00


In [98]:
grb = grbs.select(name='2 metre temperature')[0]


In [101]:
grbs.values()

AttributeError: 'pygrib._pygrib.open' object has no attribute 'values'