# Upload HYRAS data

In [1]:
from metacatalog import api, ext
import pandas as pd
import xarray as xr
from glob import glob
import datetime

In [2]:
UPLOAD = True
CONNECTION = 'test_dem'
# CONNECTION = 'default'

In [3]:
session = api.connect_database(CONNECTION)
print('Using: %s' % session.bind)

Using: Engine(postgresql://postgres:***@localhost:5432/test_dem)


In [4]:
# check if the IO extension is activate
try:
    print(ext.extension('io'))
except AttributeError:
    ext.activate_extension('io', 'metacatalog.ext.io', 'IOExtension')
    from metacatalog.ext.io import IOExtension
    ext.extension('io', IOExtension)

<class 'metacatalog.ext.io.extension.IOExtension'>


## We have 6 variables

We build a DataFrame with a row for each variable, so we can loop over the rows at the end to add an Entry for each variable to metacatalog.

In [5]:
metadata = pd.DataFrame()

In [6]:
var_mapping = {
    "Humidity": "hurs",
    "Precipitation": "pr",
    "RadiationGlobal": "rsds",
    "TemperatureMax": "tasmax",
    "TemperatureMin": "tasmin",
    "TemperatureMean": "tas",
}

In [7]:
# add to metadata
metadata['variable'] = var_mapping.keys()
metadata['variable_in_nc'] = var_mapping.values()
metadata

Unnamed: 0,variable,variable_in_nc
0,Humidity,hurs
1,Precipitation,pr
2,RadiationGlobal,rsds
3,TemperatureMax,tasmax
4,TemperatureMin,tasmin
5,TemperatureMean,tas


## Author

In [8]:
author = api.find_organisation(session, organisation_name='Deutscher Wetterdienst', return_iterator=True).first()

if author is None and UPLOAD:
    author = api.add_organisation(session, organisation_name='Deutscher Wetterdienst',
                                  organisation_abbrev='DWD'
                                  )

print(author)

Deutscher Wetterdienst (Org.) <ID=2>


In [9]:
# add to metadata
metadata['author_id'] = author.id
metadata

Unnamed: 0,variable,variable_in_nc,author_id
0,Humidity,hurs,2
1,Precipitation,pr,2
2,RadiationGlobal,rsds,2
3,TemperatureMax,tasmax,2
4,TemperatureMin,tasmin,2
5,TemperatureMean,tas,2


## Spatial and temporal scale

In [29]:
for var, var_nc in var_mapping.items():
    ds = xr.open_mfdataset(f"data/{var}/*.nc")

    # get bounding box for spatial extent
    min_lon = float(ds[var_nc].lon.min().values)
    min_lat = float(ds[var_nc].lat.min().values)
    max_lon = float(ds[var_nc].lon.max().values)
    max_lat = float(ds[var_nc].lat.max().values)

    bbox=f"POLYGON(({min_lon} {min_lat},{min_lon} {max_lat},{max_lon} {max_lat},{max_lon} {min_lat}, {min_lon} {min_lat}))"
    
    # add to metadata
    metadata["bbox"] = bbox

    # get temporal extent
    min_time = ds[var_nc].time.min().values
    max_time = ds[var_nc].time.max().values

    min_time = pd.to_datetime(str(min_time)).to_pydatetime()
    max_time = pd.to_datetime(str(max_time)).to_pydatetime()

    # add to metadata as datetime object without time (00:00:00)
    metadata["temporal_extent_min"] = datetime.datetime(min_time.year, min_time.month, min_time.day, 0, 0, 0)
    metadata["temporal_extent_max"] = datetime.datetime(max_time.year, max_time.month, max_time.day, 0, 0, 0)

metadata


Unnamed: 0,variable,variable_in_nc,author_id,bbox,temporal_extent_min,temporal_extent_max
0,Humidity,hurs,2,POLYGON((1.8707000017166138 45.110599517822266...,1951-01-01,2020-12-31
1,Precipitation,pr,2,POLYGON((1.8707000017166138 45.110599517822266...,1951-01-01,2020-12-31
2,RadiationGlobal,rsds,2,POLYGON((1.8707000017166138 45.110599517822266...,1951-01-01,2020-12-31
3,TemperatureMax,tasmax,2,POLYGON((1.8707000017166138 45.110599517822266...,1951-01-01,2020-12-31
4,TemperatureMin,tasmin,2,POLYGON((1.8707000017166138 45.110599517822266...,1951-01-01,2020-12-31
5,TemperatureMean,tas,2,POLYGON((1.8707000017166138 45.110599517822266...,1951-01-01,2020-12-31


## Location

In [None]:
metadata["location"] = None # Datasource.spatial_scale.extent to locate spatial data

## License

HYRAS data is licensed under the `GeoNutzV`: 
- https://www.dwd.de/DE/service/copyright/copyright_artikel
- https://www.gesetze-im-internet.de/geonutzv/GeoNutzV.pdf  


### ABER!!!
netCDF files have attribute license:  
*The HYRAS data set is freely available for research and education as well as for governmental purposes under the condition that Deutscher Wetterdienst is cited as the data source (see https://www.dwd.de/EN/service/copyright/copyright_artikel.html) together with the data set version. However, the data should only be used for the designated project and must not be stored and reused for any other purpose. **Redistribution to third parties** and **commercial use** of these data are specifically prohibited.*  

Das weicht von GeoNutzV ab, was laut Quellen oben die Lizenz ist.

In [None]:
# Verordnung zur Festlegung der Nutzungsbestimmungen für die Bereitstellung von Geodaten des Bundes (GeoNutzV)
license = api.find_license(session, title='Verordnung zur Festlegung der Nutzungsbestimmungen für die Bereitstellung von Geodaten des Bundes (GeoNutzV)', return_iterator=True).first()

if license is None and UPLOAD:
    license = api.add_license(session,
                              short_title='GeoNutzV',
                              title='Verordnung zur Festlegung der Nutzungsbestimmungen für die Bereitstellung von Geodaten des Bundes (GeoNutzV)',
                              link='https://www.bmuv.de/fileadmin/Daten_BMU/Download_PDF/Strategien_Bilanzen_Gesetze/130309_geonutzv_bgbi_englisch_bf.pdf',
                              summary='You are free to copy, print, present, alter, process and transmit this work to third parties for commercial and non-commercial use, to merge this work with own data and with the data of others and to combine it to form new and independent datasets, to integrate this work in internal and external business processes, products and applications in public and non-public electronic networks under the following conditions: You have to guarantee that all source notices, which are in the geodata, metadata or geodata service, or other legal notices are recognizable integrated in the optical background, You have to guarantee that all changes, deratived, new designs, or other variants be provided with a changing notice in the source notice, You have to guarantee that, if geodata holding authority requests this, the source notice will be deleted', 
                              full_text="""# Ordinance to Determine the Conditions for Use for the Provision of Spatial Data of the Federation (Verordnung zur Festlegung der Nutzungsbestimmungen für die Bereitstellung von Geodaten  – GeoNutzV)

Of 19 March 2013
On the basis of section 14 number 2 in conjunction with section 11 subs. 3 of the Spatial Data Access Act, of which section 14 number 2 was re-drafted by Article 1 number 4 and section 11 subs. 3 by Article 1 number 2 of the Act of 7 November 2012 (Federal Law Gazette [BGBl.] Part I p. 2289), the Federal Government herewith issues the following ordinance:

## Section 1: Objective and scope

This ordinance governs the conditions under which spatial data and services, including the appropriate metadata, in accordance with section 11 subs. 1 and 2 of the Spatial Data Access Act, are provided by the agencies holding spatial data in accordance with section 2 subs. 1 in conjunction with section 3 subs. 8 of the Spatial Data Access Act.


## Section 2: Uses

(1) Spatial data and services, including the appropriate metadata, shall be provided free of charge fo r all currently known purposes and for all purposes becoming known in the future of commercial and non-commercial use unless provided otherwise by special legal provision or third-party contractual or statutory rights stand in the way of such arrangement.


(2) The spatial data and metadata provided may in particular
1. be duplicated, printed out, presented, altered, processed and forwarded to third parties;
2. be placed together with own data and third-party data and combined to produce separate, new data sets;
3. be incorporated into internal and external business processes, products and applications in public and non-public electronic networks.

(3) The spatial data services provided may in particular
1. be combined with own services and third-party services;
2. be incorporated into internal and external business processes, products and applications in public and non-public electronic networks.


## Section 3: Sources references

Users shall ensure that

1. all source references and other legal information enclosed with the spatial data, metadata
    and spatial services are incorporated recognisably and optically linked;
2. alterations, processing, new designs or other adaptations are affixed with an indication of
    the alteration or, where the agency holding the spatial data so requires, the enclosed
    source reference is deleted.

## Section 4: Limited liability

If the agency holding the spatial data violates an obligation under public law
incumbent on it vis-à-vis the user, its financing entity shall not be liable towards the user for
the damage resulting therefrom if the agency holding the spatial data is only guilty of
negligence. This shall not apply in the event of an injury to life, limb and health.

## Section 5: Entry into force
This ordinance shall come into force on the day after its promulgation.

Berlin, 19 March 2013
The Federal Chancellor
Dr. Angela Merkel
The Federal Minister
for the Environment, Nature Conservation and Nuclear Safety)
Peter Altmaier



"""
    )

In [None]:
# add to metadata
metadata["license_id"] = license.id

## Variable & Unit

In [43]:
for var in var_mapping.keys():
    # all air temperature variables (min, max, mean) have the same variable(?) -> all just air temperature
    if "Temperature" in var:
        variable = api.find_variable(session, name=f"air temperature", return_iterator=True).first()
        print(variable)
    else:
        var = var.lower()
        print(var)
        variable = api.find_variable(session, name=f"*{var}*", return_iterator=True).first()
        print(variable)

humidity
relative humidity [%] <ID=6>
precipitation
None
radiationglobal
None
air temperature [C] <ID=1>
air temperature [C] <ID=1>
air temperature [C] <ID=1>


In [34]:
api.find_variable(session, name='*humidity*')

[<metacatalog.models.variable.Variable at 0x7fc6d0264050>]

In [32]:
for v in api.find_variable(session):
    print(v)

air temperature [C] <ID=1>
soil temperature [C] <ID=2>
water temperature [C] <ID=3>
discharge [m3/s] <ID=4>
air pressure [10^2*Pa] <ID=5>
relative humidity [%] <ID=6>
daily rainfall sum [mm/d] <ID=7>
rainfall intensity [mm/h] <ID=8>
solar irradiance [W/m2] <ID=9>
net radiation [W/m2] <ID=10>
gravimetric water content [kg/kg] <ID=11>
volumetric water content [cm3/cm3] <ID=12>
precision [-] <ID=13>
sap flow [cm^3/cm^2h] <ID=14>
matric potential [MPa] <ID=15>
bulk electrical conductivity [mS/cm] <ID=16>
specific electrical conductivity [mS/cm] <ID=17>
river water level [m] <ID=18>
evapotranspiration [mm/d] <ID=19>
drainage [mm/d] <ID=20>
surface elevation [m] <ID=10001>
elevation [m] <ID=10002>


In [38]:
var_mapping

{'Humidity': 'hurs',
 'Precipitation': 'pr',
 'RadiationGlobal': 'rsds',
 'TemperatureMax': 'tasmax',
 'TemperatureMin': 'tasmin',
 'TemperatureMean': 'tas'}

In [49]:
xr.open_dataset("./data/RadiationGlobal/rsds_hyras_5_1951_v3-0_de.nc")