# Upload HYRAS data

In [1]:
from metacatalog import api, ext
import pandas as pd
import xarray as xr
import datetime
import os

In [2]:
UPLOAD = True
CONNECTION = 'test'
# CONNECTION = 'default'

In [3]:
session = api.connect_database(CONNECTION)
print('Using: %s' % session.bind)

Using: Engine(postgresql://postgres:***@localhost:5432/test)


In [4]:
# check if the IO extension is activate
try:
    print(ext.extension('io'))
except AttributeError:
    ext.activate_extension('io', 'metacatalog.ext.io', 'IOExtension')
    from metacatalog.ext.io import IOExtension
    ext.extension('io', IOExtension)

<class 'metacatalog.ext.io.extension.IOExtension'>


## We have 6 variables

We build a DataFrame with a row for each variable, so we can loop over the rows at the end to add an Entry for each variable to metacatalog.

In [5]:
metadata = pd.DataFrame()

In [6]:
var_mapping = {
    "Humidity": "hurs",
    "Precipitation": "pr",
    "RadiationGlobal": "rsds",
    "TemperatureMax": "tasmax",
    "TemperatureMin": "tasmin",
    "TemperatureMean": "tas",
}

In [7]:
# add to metadata
metadata['variable'] = var_mapping.keys()
metadata['variable_in_nc'] = var_mapping.values()
metadata

Unnamed: 0,variable,variable_in_nc
0,Humidity,hurs
1,Precipitation,pr
2,RadiationGlobal,rsds
3,TemperatureMax,tasmax
4,TemperatureMin,tasmin
5,TemperatureMean,tas


## Title

In [8]:
metadata.loc[metadata["variable"] == "TemperatureMax", "title"] = "HYRAS-DE-TASMAX - Raster data set of daily maximum temperature in °C for Germany"
metadata.loc[metadata["variable"] == "TemperatureMean", "title"] = "HYRAS-DE-TAS - Raster data set of daily mean temperature in °C for Germany"
metadata.loc[metadata["variable"] == "TemperatureMin", "title"] = "HYRAS-DE-TASMIN - Raster data set of daily minimum temperature in °C for Germany"
metadata.loc[metadata["variable"] == "Humidity", "title"] = "HYRAS-DE-HURS - Raster data set of daily mean relative humidity in % for Germany"
metadata.loc[metadata["variable"] == "Precipitation", "title"] = "HYRAS-DE-PRE - Raster data set of daily sums of precipitation in mm for Germany"
metadata.loc[metadata["variable"] == "RadiationGlobal", "title"] = "HYRAS-DE-RSDS - Raster data set of daily mean global radiation in W/m^2 for Germany"

metadata

Unnamed: 0,variable,variable_in_nc,title
0,Humidity,hurs,HYRAS-DE-HURS - Raster data set of daily mean ...
1,Precipitation,pr,HYRAS-DE-PRE - Raster data set of daily sums o...
2,RadiationGlobal,rsds,HYRAS-DE-RSDS - Raster data set of daily mean ...
3,TemperatureMax,tasmax,HYRAS-DE-TASMAX - Raster data set of daily max...
4,TemperatureMin,tasmin,HYRAS-DE-TASMIN - Raster data set of daily min...
5,TemperatureMean,tas,HYRAS-DE-TAS - Raster data set of daily mean t...


## Author

In [9]:
author = api.find_organisation(session, organisation_name='Deutscher Wetterdienst', return_iterator=True).first()

if author is None and UPLOAD:
    author = api.add_organisation(session, organisation_name='Deutscher Wetterdienst',
                                  organisation_abbrev='DWD'
                                  )

print(author)

Deutscher Wetterdienst (Org.) <ID=7>


In [10]:
# add to metadata
metadata['author_id'] = author.id
metadata

Unnamed: 0,variable,variable_in_nc,title,author_id
0,Humidity,hurs,HYRAS-DE-HURS - Raster data set of daily mean ...,7
1,Precipitation,pr,HYRAS-DE-PRE - Raster data set of daily sums o...,7
2,RadiationGlobal,rsds,HYRAS-DE-RSDS - Raster data set of daily mean ...,7
3,TemperatureMax,tasmax,HYRAS-DE-TASMAX - Raster data set of daily max...,7
4,TemperatureMin,tasmin,HYRAS-DE-TASMIN - Raster data set of daily min...,7
5,TemperatureMean,tas,HYRAS-DE-TAS - Raster data set of daily mean t...,7


## Spatial and temporal scale

In [11]:
for var, var_nc in var_mapping.items():
    ds = xr.open_mfdataset(f"data/{var}/*.nc")

    # get bounding box for spatial extent
    min_lon = float(ds[var_nc].lon.min().values)
    min_lat = float(ds[var_nc].lat.min().values)
    max_lon = float(ds[var_nc].lon.max().values)
    max_lat = float(ds[var_nc].lat.max().values)

    bbox=f"POLYGON(({min_lon} {min_lat},{min_lon} {max_lat},{max_lon} {max_lat},{max_lon} {min_lat}, {min_lon} {min_lat}))"
    
    # add to metadata
    metadata.loc[metadata['variable'] == var, 'bbox'] = bbox

    # get temporal extent
    min_time = ds[var_nc].time.min().values
    max_time = ds[var_nc].time.max().values

    min_time = pd.to_datetime(str(min_time)).to_pydatetime()
    max_time = pd.to_datetime(str(max_time)).to_pydatetime()

    # add to metadata as datetime object without time (00:00:00)
    metadata.loc[metadata['variable'] == var, 'temporal_extent_min'] = datetime.datetime(min_time.year, min_time.month, min_time.day, 0, 0, 0)
    metadata.loc[metadata['variable'] == var, 'temporal_extent_max'] = datetime.datetime(max_time.year, max_time.month, max_time.day, 0, 0, 0)

# temporal resolution is always daily
metadata['temporal_resolution'] = '1440min'

# set spatial resolution to 1000 if variable is Precipitation, else 5000
metadata.loc[metadata['variable'] == 'Precipitation', 'spatial_resolution'] = 1000
metadata.loc[metadata['variable'] != 'Precipitation', 'spatial_resolution'] = 5000

metadata


Unnamed: 0,variable,variable_in_nc,title,author_id,bbox,temporal_extent_min,temporal_extent_max,temporal_resolution,spatial_resolution
0,Humidity,hurs,HYRAS-DE-HURS - Raster data set of daily mean ...,7,POLYGON((1.8707000017166138 45.110599517822266...,1951-01-01,2020-12-31,1440min,5000.0
1,Precipitation,pr,HYRAS-DE-PRE - Raster data set of daily sums o...,7,POLYGON((1.8346999883651733 45.089900970458984...,1931-01-01,2023-11-15,1440min,1000.0
2,RadiationGlobal,rsds,HYRAS-DE-RSDS - Raster data set of daily mean ...,7,POLYGON((1.8707000017166138 45.110599517822266...,1951-01-01,2020-12-31,1440min,5000.0
3,TemperatureMax,tasmax,HYRAS-DE-TASMAX - Raster data set of daily max...,7,POLYGON((1.8707000017166138 45.110599517822266...,1951-01-01,2020-12-31,1440min,5000.0
4,TemperatureMin,tasmin,HYRAS-DE-TASMIN - Raster data set of daily min...,7,POLYGON((1.8707000017166138 45.110599517822266...,1951-01-01,2020-12-31,1440min,5000.0
5,TemperatureMean,tas,HYRAS-DE-TAS - Raster data set of daily mean t...,7,POLYGON((1.8707000017166138 45.110599517822266...,1951-01-01,2020-12-31,1440min,5000.0


## Location

In [12]:
metadata["location"] = None # Datasource.spatial_scale.extent to locate spatial data

## License

HYRAS data is licensed under the `GeoNutzV`: 
- https://www.dwd.de/DE/service/copyright/copyright_artikel
- https://www.gesetze-im-internet.de/geonutzv/GeoNutzV.pdf  


### ABER!!!
netCDF files have attribute license:  
*The HYRAS data set is freely available for research and education as well as for governmental purposes under the condition that Deutscher Wetterdienst is cited as the data source (see https://www.dwd.de/EN/service/copyright/copyright_artikel.html) together with the data set version. However, the data should only be used for the designated project and must not be stored and reused for any other purpose. **Redistribution to third parties** and **commercial use** of these data are specifically prohibited.*  

Das weicht von GeoNutzV ab, was laut Quellen oben die Lizenz ist.

In [13]:
# Verordnung zur Festlegung der Nutzungsbestimmungen für die Bereitstellung von Geodaten des Bundes (GeoNutzV)
license = api.find_license(session, title='Verordnung zur Festlegung der Nutzungsbestimmungen für die Bereitstellung von Geodaten des Bundes (GeoNutzV)', return_iterator=True).first()

if license is None and UPLOAD:
    license = api.add_license(session,
                              short_title='GeoNutzV',
                              title='Verordnung zur Festlegung der Nutzungsbestimmungen für die Bereitstellung von Geodaten des Bundes (GeoNutzV)',
                              link='https://www.bmuv.de/fileadmin/Daten_BMU/Download_PDF/Strategien_Bilanzen_Gesetze/130309_geonutzv_bgbi_englisch_bf.pdf',
                              summary='You are free to copy, print, present, alter, process and transmit this work to third parties for commercial and non-commercial use, to merge this work with own data and with the data of others and to combine it to form new and independent datasets, to integrate this work in internal and external business processes, products and applications in public and non-public electronic networks under the following conditions: You have to guarantee that all source notices, which are in the geodata, metadata or geodata service, or other legal notices are recognizable integrated in the optical background, You have to guarantee that all changes, deratived, new designs, or other variants be provided with a changing notice in the source notice, You have to guarantee that, if geodata holding authority requests this, the source notice will be deleted', 
                              full_text="""# Ordinance to Determine the Conditions for Use for the Provision of Spatial Data of the Federation (Verordnung zur Festlegung der Nutzungsbestimmungen für die Bereitstellung von Geodaten  – GeoNutzV)

Of 19 March 2013
On the basis of section 14 number 2 in conjunction with section 11 subs. 3 of the Spatial Data Access Act, of which section 14 number 2 was re-drafted by Article 1 number 4 and section 11 subs. 3 by Article 1 number 2 of the Act of 7 November 2012 (Federal Law Gazette [BGBl.] Part I p. 2289), the Federal Government herewith issues the following ordinance:

## Section 1: Objective and scope

This ordinance governs the conditions under which spatial data and services, including the appropriate metadata, in accordance with section 11 subs. 1 and 2 of the Spatial Data Access Act, are provided by the agencies holding spatial data in accordance with section 2 subs. 1 in conjunction with section 3 subs. 8 of the Spatial Data Access Act.


## Section 2: Uses

(1) Spatial data and services, including the appropriate metadata, shall be provided free of charge fo r all currently known purposes and for all purposes becoming known in the future of commercial and non-commercial use unless provided otherwise by special legal provision or third-party contractual or statutory rights stand in the way of such arrangement.


(2) The spatial data and metadata provided may in particular
1. be duplicated, printed out, presented, altered, processed and forwarded to third parties;
2. be placed together with own data and third-party data and combined to produce separate, new data sets;
3. be incorporated into internal and external business processes, products and applications in public and non-public electronic networks.

(3) The spatial data services provided may in particular
1. be combined with own services and third-party services;
2. be incorporated into internal and external business processes, products and applications in public and non-public electronic networks.


## Section 3: Sources references

Users shall ensure that

1. all source references and other legal information enclosed with the spatial data, metadata
    and spatial services are incorporated recognisably and optically linked;
2. alterations, processing, new designs or other adaptations are affixed with an indication of
    the alteration or, where the agency holding the spatial data so requires, the enclosed
    source reference is deleted.

## Section 4: Limited liability

If the agency holding the spatial data violates an obligation under public law
incumbent on it vis-à-vis the user, its financing entity shall not be liable towards the user for
the damage resulting therefrom if the agency holding the spatial data is only guilty of
negligence. This shall not apply in the event of an injury to life, limb and health.

## Section 5: Entry into force
This ordinance shall come into force on the day after its promulgation.

Berlin, 19 March 2013
The Federal Chancellor
Dr. Angela Merkel
The Federal Minister
for the Environment, Nature Conservation and Nuclear Safety)
Peter Altmaier



"""
)

print(license)

Verordnung zur Festlegung der Nutzungsbestimmungen für die Bereitstellung von Geodaten des Bundes (GeoNutzV) <ID=10001>


In [14]:
# add to metadata
metadata["license_id"] = license.id

## Variable & Unit

- all air temperature variables (min, max, mean) have the same variable(?) -> all just air temperature
- RadiationGlobal is shortwave radiation -> add variable

In [15]:
# precipitation could be missing
var_precipitation =  api.find_variable(session, name='precipitation', return_iterator=True).first()
if var_precipitation is None and UPLOAD:
    keyword_precipitation_id = api.find_keyword(session, value='PRECIPITATION AMOUNT', return_iterator=True).first().id
    var_precipitation = api.add_variable(session, name='precipitation', symbol='P', column_names=['precipitation'], unit="millimeter", keyword=keyword_precipitation_id)

# global radiation (here: shortwave radiation) could be missing
var_radiation =  api.find_variable(session, name='shortwave radiation', return_iterator=True).first()
if var_radiation is None and UPLOAD:
    keyword_radiation_id = api.find_keyword(session, full_path="EARTH SCIENCE > ATMOSPHERE > ATMOSPHERIC RADIATION > SHORTWAVE RADIATION", return_iterator=True).first().id
    var_radiation = api.add_variable(session, name='shortwave radiation', symbol='SR', column_names=['shortwave_radiation'], unit="watt per squaremeter", keyword=keyword_radiation_id)

for var in var_mapping.keys():
    # all air temperature variables (min, max, mean) have the same variable(?) -> all just air temperature
    if "Temperature" in var:
        variable_id = api.find_variable(session, name=f"air temperature", return_iterator=True).first().id

    elif var == "RadiationGlobal":
        variable_id = api.find_variable(session, name="shortwave radiation", return_iterator=True).first().id

    else:
        variable_id = api.find_variable(session, name=f"*{var.lower()}*", return_iterator=True).first().id

    # add to metadata
    metadata.loc[metadata["variable"] == var, "variable_id"] = variable_id

metadata["variable_id"] = metadata["variable_id"].astype(int)
metadata

Unnamed: 0,variable,variable_in_nc,title,author_id,bbox,temporal_extent_min,temporal_extent_max,temporal_resolution,spatial_resolution,location,license_id,variable_id
0,Humidity,hurs,HYRAS-DE-HURS - Raster data set of daily mean ...,7,POLYGON((1.8707000017166138 45.110599517822266...,1951-01-01,2020-12-31,1440min,5000.0,,10001,6
1,Precipitation,pr,HYRAS-DE-PRE - Raster data set of daily sums o...,7,POLYGON((1.8346999883651733 45.089900970458984...,1931-01-01,2023-11-15,1440min,1000.0,,10001,10007
2,RadiationGlobal,rsds,HYRAS-DE-RSDS - Raster data set of daily mean ...,7,POLYGON((1.8707000017166138 45.110599517822266...,1951-01-01,2020-12-31,1440min,5000.0,,10001,10008
3,TemperatureMax,tasmax,HYRAS-DE-TASMAX - Raster data set of daily max...,7,POLYGON((1.8707000017166138 45.110599517822266...,1951-01-01,2020-12-31,1440min,5000.0,,10001,1
4,TemperatureMin,tasmin,HYRAS-DE-TASMIN - Raster data set of daily min...,7,POLYGON((1.8707000017166138 45.110599517822266...,1951-01-01,2020-12-31,1440min,5000.0,,10001,1
5,TemperatureMean,tas,HYRAS-DE-TAS - Raster data set of daily mean t...,7,POLYGON((1.8707000017166138 45.110599517822266...,1951-01-01,2020-12-31,1440min,5000.0,,10001,1


## Abstract

In [16]:
metadata.loc[metadata["variable"] == "TemperatureMax", "abstract"] = "HYRAS-DE-TASMAX is a temperature product for Germany in a 5 km x 5 km grid for the period 1951-2020 and is based on daily measured values of temperature maxima. The data set can be used, for example, for the analysis of past climate, for bias adjustment of regionalized climate projection data and as input data for hydrological modeling."
metadata.loc[metadata["variable"] == "TemperatureMean", "abstract"] = "HYRAS-DE-TAS is a temperature product for Germany in a 5 km x 5 km grid for the period 1951-2020 and is based on daily measured values of temperature. The data set can be used, for example, for the analysis of past climate, for bias adjustment of regionalized climate projection data and as input data for hydrological modeling."
metadata.loc[metadata["variable"] == "TemperatureMin", "abstract"] = "HYRAS-DE-TASMIN is a temperature product for Germany in a 5 km x 5 km grid for the period 1951-2020 and is based on daily measured values of minimum temperature. The data set can be used, for example, for the analysis of past climate, for bias adjustment of regionalized climate projection data and as input data for hydrological modeling."
metadata.loc[metadata["variable"] == "Humidity", "abstract"] = "HYRAS-DE-HURS is a relative humidity product for Germany in a 5 km x 5 km grid for the period 1951-2020 and is based on daily measured values of relative humidity. The data set can be used, for example, for the analysis of past climate, for bias adjustment of regionalized climate projection data and as input data for hydrological modeling."
metadata.loc[metadata["variable"] == "Precipitation", "abstract"] = "HYRAS-DE-PRE is a precipitation product for Germany in a 1 km x 1 km grid for the period 1931 to the previous day and is based on daily measured values of precipitation height. The data set can be used, for example, for the analysis of past climate, for bias adjustment of regionalized climate projection data and as input data for hydrological modeling."
metadata.loc[metadata["variable"] == "RadiationGlobal", "abstract"] = "HYRAS-DE-RSDS is a global radiation product for Germany in a 5 km x 5 km grid for the period 1951-2020 and is based on daily measured values of sunshine duration and global radiation. The data set can be used, for example, for the analysis of past climate, for bias adjustment of regionalized climate projection data and as input data for hydrological modeling."

# add data origin to all abstracts
metadata["abstract"] = metadata["abstract"].apply(lambda x: x + "  \nData Origin: Temperature (TAS, TASMIN, TASMAX) and relative humidity (HURS) are based on a combination of nonlinear temperature profiles with non-Euclidean residual interpolation (Krähenmann et al., 2019). The creation of the background field is based on a nonlinear regression at each time step (estimation of regional vertical profiles for 13 subregions subdivided based on weather divides, coastal distance and north-south extent). This also allows temperature inversions to be taken into account. Cold pole stations were identified separately for each time step and excluded for the profile determination. For the residual interpolation, a 5-dimensional inverse distance weighting (5D-IDW cf. Eiselt et al., 2017) is applied, which depends on the geographical longitude and latitude, altitude, coastal distance and heat island effect. For the interpolation of the minimum (TASMIN) and maximum (TASMAX) temperature, the deviations between extreme temperature and mean temperature are interpolated (non-linear regression + residual interpolation) as in TAS and then added to the mean temperature field. This ensures the consistency of the temperature fields with each other (TASMIN <= TAS <= TASMAX). For the interpolation of the relative humidity, the temperature station data was first converted into the dew point temperature and then interpolated in the same way as the temperature data and finally calculated back into the relative humidity using the temperature grid data. During the interpolation, it is ensured afterwards that a maximum humidity of 100 % is achieved")

metadata

Unnamed: 0,variable,variable_in_nc,title,author_id,bbox,temporal_extent_min,temporal_extent_max,temporal_resolution,spatial_resolution,location,license_id,variable_id,abstract
0,Humidity,hurs,HYRAS-DE-HURS - Raster data set of daily mean ...,7,POLYGON((1.8707000017166138 45.110599517822266...,1951-01-01,2020-12-31,1440min,5000.0,,10001,6,HYRAS-DE-HURS is a relative humidity product f...
1,Precipitation,pr,HYRAS-DE-PRE - Raster data set of daily sums o...,7,POLYGON((1.8346999883651733 45.089900970458984...,1931-01-01,2023-11-15,1440min,1000.0,,10001,10007,HYRAS-DE-PRE is a precipitation product for Ge...
2,RadiationGlobal,rsds,HYRAS-DE-RSDS - Raster data set of daily mean ...,7,POLYGON((1.8707000017166138 45.110599517822266...,1951-01-01,2020-12-31,1440min,5000.0,,10001,10008,HYRAS-DE-RSDS is a global radiation product fo...
3,TemperatureMax,tasmax,HYRAS-DE-TASMAX - Raster data set of daily max...,7,POLYGON((1.8707000017166138 45.110599517822266...,1951-01-01,2020-12-31,1440min,5000.0,,10001,1,HYRAS-DE-TASMAX is a temperature product for G...
4,TemperatureMin,tasmin,HYRAS-DE-TASMIN - Raster data set of daily min...,7,POLYGON((1.8707000017166138 45.110599517822266...,1951-01-01,2020-12-31,1440min,5000.0,,10001,1,HYRAS-DE-TASMIN is a temperature product for G...
5,TemperatureMean,tas,HYRAS-DE-TAS - Raster data set of daily mean t...,7,POLYGON((1.8707000017166138 45.110599517822266...,1951-01-01,2020-12-31,1440min,5000.0,,10001,1,HYRAS-DE-TAS is a temperature product for Germ...


## Citation

from description pdf files on DWD opendataserver.

In [17]:
metadata.loc[metadata["variable"] == "TemperatureMax", "citation"] = "Raster data set of daily maximum temperature in °C for Germany - HYRAS-DE-TASMAX, Version v5.0"
metadata.loc[metadata["variable"] == "TemperatureMean", "citation"] = "Raster data set of daily mean temperature in °C for Germany - HYRAS-DE-TAS, Version v5.0"
metadata.loc[metadata["variable"] == "TemperatureMin", "citation"] = "Raster data set of daily minimum temperature in °C for Germany - HYRAS-DE-TASMIN, Version v5.0"
metadata.loc[metadata["variable"] == "Humidity", "citation"] = "Raster data set of daily mean relative humidity in % for Germany - HYRAS-DE-HURS, Version v5.0"
metadata.loc[metadata["variable"] == "Precipitation", "citation"] = "Raster data set of daily sums of precipitation in mm for Germany - HYRAS-DE-PRE, Version v5.0"
metadata.loc[metadata["variable"] == "RadiationGlobal", "citation"] = "Raster data set of daily mean global radiation in W/m^2 for Germany - HYRAS-DE-RSDS, Version v3.0"

metadata

Unnamed: 0,variable,variable_in_nc,title,author_id,bbox,temporal_extent_min,temporal_extent_max,temporal_resolution,spatial_resolution,location,license_id,variable_id,abstract,citation
0,Humidity,hurs,HYRAS-DE-HURS - Raster data set of daily mean ...,7,POLYGON((1.8707000017166138 45.110599517822266...,1951-01-01,2020-12-31,1440min,5000.0,,10001,6,HYRAS-DE-HURS is a relative humidity product f...,Raster data set of daily mean relative humidit...
1,Precipitation,pr,HYRAS-DE-PRE - Raster data set of daily sums o...,7,POLYGON((1.8346999883651733 45.089900970458984...,1931-01-01,2023-11-15,1440min,1000.0,,10001,10007,HYRAS-DE-PRE is a precipitation product for Ge...,Raster data set of daily sums of precipitation...
2,RadiationGlobal,rsds,HYRAS-DE-RSDS - Raster data set of daily mean ...,7,POLYGON((1.8707000017166138 45.110599517822266...,1951-01-01,2020-12-31,1440min,5000.0,,10001,10008,HYRAS-DE-RSDS is a global radiation product fo...,Raster data set of daily mean global radiation...
3,TemperatureMax,tasmax,HYRAS-DE-TASMAX - Raster data set of daily max...,7,POLYGON((1.8707000017166138 45.110599517822266...,1951-01-01,2020-12-31,1440min,5000.0,,10001,1,HYRAS-DE-TASMAX is a temperature product for G...,Raster data set of daily maximum temperature i...
4,TemperatureMin,tasmin,HYRAS-DE-TASMIN - Raster data set of daily min...,7,POLYGON((1.8707000017166138 45.110599517822266...,1951-01-01,2020-12-31,1440min,5000.0,,10001,1,HYRAS-DE-TASMIN is a temperature product for G...,Raster data set of daily minimum temperature i...
5,TemperatureMean,tas,HYRAS-DE-TAS - Raster data set of daily mean t...,7,POLYGON((1.8707000017166138 45.110599517822266...,1951-01-01,2020-12-31,1440min,5000.0,,10001,1,HYRAS-DE-TAS is a temperature product for Germ...,Raster data set of daily mean temperature in °...


## External ID

In [18]:
metadata.loc[metadata["variable"] == "TemperatureMax", "external_id"] = "urn:x-wmo:md:de.dwd.cdc::GRD_DEU_P1D_T2M-X_HYRAS-DE"
metadata.loc[metadata["variable"] == "TemperatureMean", "external_id"] = "urn:x-wmo:md:de.dwd.cdc::GRD_DEU_P1D_T2M_HYRAS-DE"
metadata.loc[metadata["variable"] == "TemperatureMin", "external_id"] = "urn:x-wmo:md:de.dwd.cdc::GRD_DEU_P1D_T2M-N_HYRAS-DE"
metadata.loc[metadata["variable"] == "Humidity", "external_id"] = "urn:x-wmo:md:de.dwd.cdc::GRD_DEU_P1D_RH_HYRAS-DE"
metadata.loc[metadata["variable"] == "Precipitation", "external_id"] = "urn:x-wmo:md:de.dwd.cdc::GRD_DEU_P1D_RR_HYRAS-DE"
metadata.loc[metadata["variable"] == "RadiationGlobal", "external_id"] = "urn:x-wmo:md:de.dwd.cdc::GRD_DEU_P1D_RAD-G_HYRAS-DE"

metadata

Unnamed: 0,variable,variable_in_nc,title,author_id,bbox,temporal_extent_min,temporal_extent_max,temporal_resolution,spatial_resolution,location,license_id,variable_id,abstract,citation,external_id
0,Humidity,hurs,HYRAS-DE-HURS - Raster data set of daily mean ...,7,POLYGON((1.8707000017166138 45.110599517822266...,1951-01-01,2020-12-31,1440min,5000.0,,10001,6,HYRAS-DE-HURS is a relative humidity product f...,Raster data set of daily mean relative humidit...,urn:x-wmo:md:de.dwd.cdc::GRD_DEU_P1D_RH_HYRAS-DE
1,Precipitation,pr,HYRAS-DE-PRE - Raster data set of daily sums o...,7,POLYGON((1.8346999883651733 45.089900970458984...,1931-01-01,2023-11-15,1440min,1000.0,,10001,10007,HYRAS-DE-PRE is a precipitation product for Ge...,Raster data set of daily sums of precipitation...,urn:x-wmo:md:de.dwd.cdc::GRD_DEU_P1D_RR_HYRAS-DE
2,RadiationGlobal,rsds,HYRAS-DE-RSDS - Raster data set of daily mean ...,7,POLYGON((1.8707000017166138 45.110599517822266...,1951-01-01,2020-12-31,1440min,5000.0,,10001,10008,HYRAS-DE-RSDS is a global radiation product fo...,Raster data set of daily mean global radiation...,urn:x-wmo:md:de.dwd.cdc::GRD_DEU_P1D_RAD-G_HYR...
3,TemperatureMax,tasmax,HYRAS-DE-TASMAX - Raster data set of daily max...,7,POLYGON((1.8707000017166138 45.110599517822266...,1951-01-01,2020-12-31,1440min,5000.0,,10001,1,HYRAS-DE-TASMAX is a temperature product for G...,Raster data set of daily maximum temperature i...,urn:x-wmo:md:de.dwd.cdc::GRD_DEU_P1D_T2M-X_HYR...
4,TemperatureMin,tasmin,HYRAS-DE-TASMIN - Raster data set of daily min...,7,POLYGON((1.8707000017166138 45.110599517822266...,1951-01-01,2020-12-31,1440min,5000.0,,10001,1,HYRAS-DE-TASMIN is a temperature product for G...,Raster data set of daily minimum temperature i...,urn:x-wmo:md:de.dwd.cdc::GRD_DEU_P1D_T2M-N_HYR...
5,TemperatureMean,tas,HYRAS-DE-TAS - Raster data set of daily mean t...,7,POLYGON((1.8707000017166138 45.110599517822266...,1951-01-01,2020-12-31,1440min,5000.0,,10001,1,HYRAS-DE-TAS is a temperature product for Germ...,Raster data set of daily mean temperature in °...,urn:x-wmo:md:de.dwd.cdc::GRD_DEU_P1D_T2M_HYRAS-DE


## Thesaurus

We do not use any additional keywords here, as the relevant keywords are already connected to the **variable** of each entry.

## Details

In [19]:
details_dict = [
        {
            "key": "Projection",
            "value": "EPSG:3034",
            "description": "Projection of the grid"
        },
        {
            "key": "Format description",
            "value": "The grids are written to a NetCDF file. The name of the NetCDF file is defined as follows: parameter_productname_resolution(in km)_year_version_region.nc (e.g. tasmax_hyras_5_2020_v5-0_de.nc)",
            "description": "Description of the format"
        },
        {
            "key": "Uncertainties",
            "value": "Uncertainties may result from the interpolation method used. Incorrect measurements also result in uncertainties in the grid field. For the interpolation of the grids, a different number of stations were used over time, as the measurement network has changed. This must be considered when comparing different years.",
            "description": "Uncertainties of the data"
        },
        {
            "key": "Literature",
            "value": "Razafimaharo, C., Krähenmann, S., Höpp, Rauthe, M., Deutschländer, T. (2020): New high-resolution gridded dataset of daily mean, minimum, and maximum temperature and relative humidity for Central Europe (HYRAS). Theor Appl Climatol 142, 1531–1553",
            "description": "Literature describing the data"
    }
]

details_dict

[{'key': 'Projection',
  'value': 'EPSG:3034',
  'description': 'Projection of the grid'},
 {'key': 'Format description',
  'value': 'The grids are written to a NetCDF file. The name of the NetCDF file is defined as follows: parameter_productname_resolution(in km)_year_version_region.nc (e.g. tasmax_hyras_5_2020_v5-0_de.nc)',
  'description': 'Description of the format'},
 {'key': 'Uncertainties',
  'value': 'Uncertainties may result from the interpolation method used. Incorrect measurements also result in uncertainties in the grid field. For the interpolation of the grids, a different number of stations were used over time, as the measurement network has changed. This must be considered when comparing different years.',
  'description': 'Uncertainties of the data'},
 {'key': 'Literature',
  'value': 'Razafimaharo, C., Krähenmann, S., Höpp, Rauthe, M., Deutschländer, T. (2020): New high-resolution gridded dataset of daily mean, minimum, and maximum temperature and relative humidity for

## Add Entries with details and datasource

In [20]:
for _, row in metadata.iterrows():

    entry = api.find_entry(session, title=row["title"], return_iterator=True).first()

    # add the entry
    if entry is None and UPLOAD:
        entry = api.add_entry(session,
                              title=row["title"],
                              author=row["author_id"],
                              abstract=row["abstract"],
                              location=row["location"],
                              variable=row["variable_id"],
                              citation=row["citation"],
                              license=row["license_id"],
                              external_id=row["external_id"],
                              is_partial=False,
                              embargo=False
                              )
    
        # add details to entry
        api.add_details_to_entries(session, entry, details_dict)

        # datasource
        # path where .nc files are stored
        datasource_path = f"/data/qt7760/hyras/{row['variable']}"

        # create datasource
        entry.create_datasource(type="netCDF", 
                                path=os.path.abspath(datasource_path),
                                datatype='raster',
                                commit=True
                                )

        # create spatial scale
        entry.datasource.create_scale(
            resolution=row['spatial_resolution'],
            extent=row['bbox'], 
            support=1.0, # TODO: no one knows
            scale_dimension='spatial',
            commit=True
        )

        # create temporal scale
        entry.datasource.create_scale(
            resolution=row['temporal_resolution'],
            extent=(row['temporal_extent_min'], row['temporal_extent_max']),
            support=1.0, # TODO: no one knows
            scale_dimension='temporal',
            commit=True
        )
    
    print(entry)

<ID=55 HYRAS-DE-HURS - Rast [relative humidity] >
<ID=56 HYRAS-DE-PRE - Raste [precipitation] >
<ID=57 HYRAS-DE-RSDS - Rast [shortwave radiation] >
<ID=58 HYRAS-DE-TASMAX - Ra [air temperature] >
<ID=59 HYRAS-DE-TASMIN - Ra [air temperature] >
<ID=60 HYRAS-DE-TAS - Raste [air temperature] >
