# Packages

In [2]:
import numpy as np
import pandas as pd
import requests

# API - test metObsAPI

In [106]:
# Settings
api_key = 'a5dfc496-b64b-4a35-9b04-685462e6e426'
# metObsAPI: 'a5dfc496-b64b-4a35-9b04-685462e6e426'
# climateDataAPI: 'cfaf5acf-a58b-44a2-af7c-793ca531edf1'
DMI_URL = 'https://dmigw.govcloud.dk/v2/metObs/collections/observation/items'

start_time = pd.Timestamp(year = 2022, month = 1, day = 1, hour = 0) # år, måned, dag
end_time = pd.Timestamp(year = 2022, month = 1, day = 2, hour = 0)
datetime_str = start_time.tz_localize('UTC').isoformat() + '/' + end_time.tz_localize('UTC').isoformat()

stationIds = ['06030'] # Aalborg Lufthavn
parameterIds = ['temp_dry', 'wind_speed'] # Check

In [107]:
# Check available parameters
r = requests.get('https://dmigw.govcloud.dk/v2/metObs/collections/station/items', params={'api-key': api_key, 'stationId': ['06030']})
df = pd.json_normalize(r.json()['features'])

parameter_ids = np.unique(df['properties.parameterId'][0])  # Generate a list of unique parameter ids
print(parameter_ids)  # Print all unique parameter ids

['cloud_cover' 'cloud_height' 'humidity' 'humidity_past1h'
 'precip_dur_past10min' 'precip_dur_past1h' 'precip_past10min'
 'precip_past1h' 'precip_past1min' 'pressure' 'pressure_at_sea' 'temp_dew'
 'temp_dry' 'temp_grass' 'temp_grass_max_past1h' 'temp_grass_mean_past1h'
 'temp_grass_min_past1h' 'temp_max_past12h' 'temp_max_past1h'
 'temp_mean_past1h' 'temp_min_past12h' 'temp_min_past1h'
 'visib_mean_last10min' 'visibility' 'weather' 'wind_dir'
 'wind_dir_past1h' 'wind_gust_always_past1h' 'wind_max'
 'wind_max_per10min_past1h' 'wind_min' 'wind_min_past1h' 'wind_speed'
 'wind_speed_past1h']


In [108]:
parameterIds = ['temp_dry', 'wind_speed'] # Based on check

In [109]:
dfs = []
for station in stationIds:
    for parameter in parameterIds:
        # Specify query parameters
        params = {
            'api-key' : api_key,
            'datetime' : datetime_str,
            'stationId' : station,
            'parameterId' : parameter,
            'limit' : '300000',  # max limit
        }

        # Submit GET request with url and parameters
        r = requests.get(DMI_URL, params=params)
        # Extract JSON object
        json = r.json() # Extract JSON object
        # Convert JSON object to a MultiIndex DataFrame and add to list
        dfi = pd.json_normalize(json['features'])
        if dfi.empty is False:
            dfi['time'] = pd.to_datetime(dfi['properties.observed'])
            # Drop other columns
            dfi = dfi[['time', 'properties.value', 'properties.stationId', 'properties.parameterId']]
            # Rename columns, e.g., 'properties.stationId' becomes 'stationId'
            dfi.columns = [c.replace('properties.', '') for c in dfi.columns]
            # Drop identical rows (considers both value and time stamp)
            dfi = dfi[~dfi.duplicated()]
            dfi = dfi.set_index(['parameterId', 'stationId', 'time'])
            dfi = dfi['value'].unstack(['stationId','parameterId'])
            dfs.append(dfi)

df = pd.concat(dfs, axis='columns').sort_index()
df.head()

stationId,06030,06030
parameterId,temp_dry,wind_speed
time,Unnamed: 1_level_2,Unnamed: 2_level_2
2022-01-01 00:00:00+00:00,6.1,5.1
2022-01-01 00:10:00+00:00,6.0,4.6
2022-01-01 00:20:00+00:00,6.3,5.1
2022-01-01 00:30:00+00:00,6.3,5.7
2022-01-01 00:40:00+00:00,6.0,5.1


In [110]:
df

stationId,06030,06030
parameterId,temp_dry,wind_speed
time,Unnamed: 1_level_2,Unnamed: 2_level_2
2022-01-01 00:00:00+00:00,6.1,5.1
2022-01-01 00:10:00+00:00,6.0,4.6
2022-01-01 00:20:00+00:00,6.3,5.1
2022-01-01 00:30:00+00:00,6.3,5.7
2022-01-01 00:40:00+00:00,6.0,5.1
...,...,...
2022-01-01 23:20:00+00:00,8.1,5.1
2022-01-01 23:30:00+00:00,8.0,5.1
2022-01-01 23:40:00+00:00,7.9,5.7
2022-01-01 23:50:00+00:00,7.8,5.7


# API - climateAPI

In [111]:
# Settings 10kmGridValue
api_key = 'cfaf5acf-a58b-44a2-af7c-793ca531edf1'
# metObsAPI: 'a5dfc496-b64b-4a35-9b04-685462e6e426'
# climateDataAPI: 'cfaf5acf-a58b-44a2-af7c-793ca531edf1'
DMI_URL = 'https://dmigw.govcloud.dk/v2/climateData/collections/10kmGridValue/items'

start_time = pd.Timestamp(year = 2022, month = 1, day = 1, hour = 0) # år, måned, dag
end_time = pd.Timestamp(year = 2022, month = 1, day = 2, hour = 4)
datetime_str = start_time.tz_localize('UTC').isoformat() + '/' + end_time.tz_localize('UTC').isoformat()

cellIds = ['10km_621_72'] # Helsingør
parameterIds = ['temp_dry', 'wind_speed'] # Check
time_resolutions = ['day']

In [112]:
r = requests.get(DMI_URL, params={'api-key': api_key, 'cellId': '10km_621_72', 'parameterId': 'mean_wind_speed', 'timeResolution': time_resolutions})
print(r)

<Response [200]>


In [113]:
r = requests.get(DMI_URL, params={'api-key': api_key})
print(r)

df = pd.json_normalize(r.json()['features'])
parameter_ids = np.unique(df['properties.parameterId'])  # Generate a list of unique parameter ids
print(parameter_ids)  # Print all unique parameter ids

<Response [200]>
['acc_precip' 'bright_sunshine' 'drought_index' 'leaf_moisture'
 'max_precip_30m' 'max_temp_w_date' 'max_wind_speed_10min'
 'max_wind_speed_3sec' 'mean_cloud_cover' 'mean_pressure' 'mean_radiation'
 'mean_relative_hum' 'mean_temp' 'mean_wind_dir' 'mean_wind_speed'
 'min_temp' 'no_days_acc_precip_01' 'no_days_acc_precip_1'
 'no_days_acc_precip_10' 'no_ice_days' 'no_summer_days'
 'no_tropical_nights' 'pot_evaporation_makkink' 'snow_depth' 'temp_grass'
 'temp_soil_10' 'temp_soil_30' 'vapour_pressure_deficit_mean']


In [114]:
parameterIds = ['mean_temp', 'mean_wind_speed', 'no_ice_days'] # Based on check

In [115]:
dfs = []
for cellId in cellIds:
    for parameter in parameterIds:
        # Specify query parameters
        params = {
            'api-key' : api_key,
            'datetime' : datetime_str,
            'cellId' : cellId,
            'parameterId' : parameter,
            'limit' : '300000'  # max limit
            , 'timeResolution' : time_resolutions
        }

        # Submit GET request with url and parameters
        r = requests.get(DMI_URL, params=params)
        # Extract JSON object
        json = r.json() # Extract JSON object
        # Convert JSON object to a MultiIndex DataFrame and add to list
        dfi = pd.json_normalize(json['features'])
        if dfi.empty is False:
            dfi['time'] = pd.to_datetime(dfi['properties.from'])
            # Drop other columns
            dfi = dfi[['time', 'properties.value', 'properties.cellId', 'properties.parameterId']]
            # Rename columns, e.g., 'properties.stationId' becomes 'stationId'
            dfi.columns = [c.replace('properties.', '') for c in dfi.columns]
            # Drop identical rows (considers both value and time stamp)
            dfi = dfi[~dfi.duplicated()]
            dfi = dfi.set_index(['parameterId', 'cellId', 'time'])
            dfi = dfi['value'].unstack(['cellId','parameterId'])
            dfs.append(dfi)

df = pd.concat(dfs, axis='columns').sort_index()
df.head()

cellId,10km_621_72,10km_621_72,10km_621_72
parameterId,mean_temp,mean_wind_speed,no_ice_days
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
2022-01-02 00:00:00.001000+01:00,8.5,3.4,0.0


In [116]:
df

cellId,10km_621_72,10km_621_72,10km_621_72
parameterId,mean_temp,mean_wind_speed,no_ice_days
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
2022-01-02 00:00:00.001000+01:00,8.5,3.4,0.0


# Create columns in CSV

In [134]:
data = pd.read_parquet(path = "Data/DSB_BDK_trainingset.parquet")
data = data.reset_index()


In [137]:
data[['dato', 'station']]

Unnamed: 0,dato,station
0,2016-01-01,0
1,2016-01-01,0
2,2016-01-01,1
3,2016-01-01,2
4,2016-01-01,2
...,...,...
199441,2022-12-31,44
199442,2022-12-31,45
199443,2022-12-31,46
199444,2022-12-31,46


In [143]:
stations_data = pd.read_csv('Data/Stationskoder.csv', sep = ';')
stations_data = stations_data.rename(columns={'Nummer': 'station'})

In [144]:
stations_data

Unnamed: 0,Forkortelse,station,Stationsnavn,10km_cell
0,AB,0,Aalborg,10km_632_55
1,ABL,1,Aalborg Lufthavn,10km_632_55
2,AR,2,Aarhus H,10km_622_57
3,BM,3,Bramming,10km_614_48
4,CPH,4,Københavns Lufthavn,10km_616_72
5,ES,5,Esbjerg,10km_614_46
6,FA,6,Fredericia,10km_615_54
7,FH,7,Frederikshavn,10km_636_59
8,HB,8,Hobro,10km_627_54
9,HG,9,Helsingør,10km_621_72


In [149]:
merged = pd.merge(data[['dato', 'station']], stations_data[['station', '10km_cell']], on='station')

In [160]:
merged['dato'][0] + pd.DateOffset(days=-1)

Timestamp('2015-12-31 00:00:00')

In [162]:
# Settings 10kmGridValue
api_key = 'cfaf5acf-a58b-44a2-af7c-793ca531edf1'
# metObsAPI: 'a5dfc496-b64b-4a35-9b04-685462e6e426'
# climateDataAPI: 'cfaf5acf-a58b-44a2-af7c-793ca531edf1'
DMI_URL = 'https://dmigw.govcloud.dk/v2/climateData/collections/10kmGridValue/items'
parameterIds = parameterIds = ['mean_temp', 'mean_wind_speed'] # Based on check
time_resolutions = ['day']

for index, row in merged.iterrows():
    datetime_str = (row['dato'] + pd.DateOffset(days=-1)).tz_localize('UTC').isoformat() + '/' + (row['dato']).tz_localize('UTC').isoformat()
    cellId = row['10km_cell']
    dfs = []
    for parameter in parameterIds:
        # Specify query parameters
        params = {
            'api-key' : api_key,
            'datetime' : datetime_str,
            'cellId' : cellId,
            'parameterId' : parameter,
            'limit' : '300000'  # max limit
            , 'timeResolution' : time_resolutions
        }

        # Submit GET request with url and parameters
        r = requests.get(DMI_URL, params=params)
        # Extract JSON object
        json = r.json() # Extract JSON object
        # Convert JSON object to a MultiIndex DataFrame and add to list
        dfi = pd.json_normalize(json['features'])
        if dfi.empty is False:
            dfi['time'] = pd.to_datetime(dfi['properties.from'])
            # Drop other columns
            dfi = dfi[['time', 'properties.value', 'properties.cellId', 'properties.parameterId']]
            # Rename columns, e.g., 'properties.stationId' becomes 'stationId'
            dfi.columns = [c.replace('properties.', '') for c in dfi.columns]
            # Drop identical rows (considers both value and time stamp)
            dfi = dfi[~dfi.duplicated()]
            dfi = dfi.set_index(['parameterId', 'cellId', 'time'])
            dfi = dfi['value'].unstack(['cellId','parameterId'])
            dfs.append(dfi)

df = pd.concat(dfs, axis='columns').sort_index()
df.head()

#cellIds = ['10km_621_72'] # Helsingør


KeyboardInterrupt: 

In [163]:
dfi

cellId,10km_609_68
parameterId,mean_wind_speed
time,Unnamed: 1_level_2
2016-01-01 00:00:00.001000+01:00,2.9
