In [1]:
import requests
import pandas as pd
import json
import datetime
from datetime import datetime
import netCDF4

print(netCDF4.__version__)

1.5.3


### Use Thredds and opendap for fetching metadata

In [2]:
opendap_url = 'https://thredds.nilu.no/thredds/dodsC/ebas/NO0042G.20180101000000.20190508191242.dmps.particle_number_size_distribution.pm10.1y.1h.NO01L_NILU_DMPSmodel2_ZEP.NO01L_dmps_DMPS_ZEP01.lev2.nc'
dataset = netCDF4.Dataset(opendap_url)

In [3]:
# Fetch ebas specific metadata
ebas_metadata = json.loads(dataset.getncattr('comment'))

### get country name
Function to fetch country name using gaw id and the gawsis API.
Example on using external apis to enrich the data

In [4]:

def get_country_name(gawid):

    station_identifier = gawid
    wigos_api = 'https://oscardepl.wmo.int/surface/rest/api/search/station?wigosId=0-20008-0-{0}'.format(station_identifier) 
    response = requests.get(wigos_api)
    wigos_md = response.json()  
    wigos_md = wigos_md[0]
    country_name = wigos_md['territory']
    
    return country_name

### md_metadata
Metadata related to the metadata

In [5]:
md_metadata = {}

file_identifier = dataset.id
language = 'en'
hierarchy_level = 'dataset' 
datestamp = datetime.strptime(dataset.getncattr('date_metadata_modified'),'%Y-%m-%dT%H:%M:%S UTC').isoformat()

contact = []
person = {}

person['first_name'] = 'Markus'
person['last_name'] = 'Fiebig'
person['organisation_name'] = 'Norwegian Institute for Air Research (NILU)'
person['position_name'] = 'Senior Scientist'
person['role_code'] = ['custodian']
person['delivery_point'] = 'Insituttveien 18'
person['address_city'] = 'Kjeller'
person['administrative_area'] = 'Viken'
person['postal_code'] = '2007'
person['country'] = 'Norway'
person['email'] = 'ebas@nilu.no'

contact.append(person)

online_resource = {'linkage':'https://www.nilu.no/'}

md_metadata['file_identifier'] = file_identifier
md_metadata['language'] = language
md_metadata['hierarchy_level'] = hierarchy_level
md_metadata['datestamp'] = datestamp
md_metadata['contact'] = contact
md_metadata['online_resource'] = online_resource

print(md_metadata)

{'file_identifier': 'NO0042G.20180101000000.20190508191242.dmps.particle_number_size_distribution.pm10.1y.1h.NO01L_NILU_DMPSmodel2_ZEP.NO01L_dmps_DMPS_ZEP01.lev2.nc', 'language': 'en', 'hierarchy_level': 'dataset', 'datestamp': '2019-05-08T19:12:42', 'contact': [{'first_name': 'Markus', 'last_name': 'Fiebig', 'organisation_name': 'Norwegian Institute for Air Research (NILU)', 'position_name': 'Senior Scientist', 'role_code': ['custodian'], 'delivery_point': 'Insituttveien 18', 'address_city': 'Kjeller', 'administrative_area': 'Viken', 'postal_code': '2007', 'country': 'Norway', 'email': 'ebas@nilu.no'}], 'online_resource': {'linkage': 'https://www.nilu.no/'}}


### md_identification
Metadata related to the dataset

In [6]:
md_identification = {}

abstract = dataset.getncattr('summary')
title = dataset.getncattr('title')
identifier = 'https://doi.org/10.21336/some.doi.1' # This should be some sort of PID, like DOI or similar
date = datetime.strptime(dataset.getncattr('date_created'),'%Y-%m-%dT%H:%M:%S UTC').isoformat()
date_type = 'creation'

# creator = originator
#contact

contacts = []

i = 0

for person in dataset.getncattr('creator_name').split(','):
    
    _dict = {}
    
    creator_name = dataset.getncattr('creator_name').split(',')[i]
    creator_name = creator_name.lstrip()
    creator_name = creator_name.split(' ')
    
    creator_email = dataset.getncattr('creator_email').split(',')[i]
    
    if not creator_email:
        creator_email = None
    else:
        pass
    
    first_name = creator_name[0]
    last_name = creator_name[1]
    organisation_name = dataset.getncattr('creator_institution').split(',')[i]
        
    if not organisation_name:
        organisation_name = 'Organization name not available'
    else:
        pass
        
    position_name = None
    role_code = 'creator'
    delivery_point = None
    address_city = None
    administrative_area = None
    postal_code = None
    country = 'Norway' #get_country_name(ebas_metadata['Station GAW-ID'])
    
    
    _dict['first_name'] = first_name
    _dict['last_name'] = last_name
    _dict['organisation_name'] = organisation_name     
    _dict['position_name'] = position_name
    _dict['role_code'] = ['principalInvestigator']
    _dict['delivery_point'] = delivery_point
    _dict['address_city'] = address_city
    _dict['administrative_area'] = administrative_area
    _dict['postal_code'] = postal_code
    _dict['country'] = country
    _dict['email'] = creator_email
    
    contacts.append(_dict)
    
    i+=1

online_resource = {'linkage':'http://ebas.nilu.no/'}

md_identification['abstract'] = abstract
md_identification['title'] = title
md_identification['identifier'] = identifier
md_identification['date'] = date
md_identification['date_type'] = date_type
md_identification['contact'] = contacts
md_identification['online_resource'] = online_resource

print(md_identification)

{'abstract': 'Ground based in situ observations of particle_number_size_distribution at Zeppelin mountain (Ny-Ålesund) (NO0042G) using dmps. These measurements are gathered as a part of the following projects ACTRIS, EMEP, NILU, GAW-WDCA and they are stored in the EBAS database (http://ebas.nilu.no/). Parameters measured are: particle_number_size_distribution in pm10', 'title': 'Ground based in situ observations of particle_number_size_distribution at Zeppelin mountain (Ny-Ålesund) (NO0042G) using dmps', 'identifier': 'https://doi.org/10.21336/some.doi.1', 'date': '2019-05-08T19:12:42', 'date_type': 'creation', 'contact': [{'first_name': 'Markus', 'last_name': 'Fiebig', 'organisation_name': '"Norwegian Institute for Air Research', 'position_name': None, 'role_code': ['principalInvestigator'], 'delivery_point': None, 'address_city': None, 'administrative_area': None, 'postal_code': None, 'country': 'Norway', 'email': 'Markus.Fiebig@nilu.no'}, {'first_name': 'Chris', 'last_name': 'Lunder

### md_constraints
Metadata describing access constraints, data policy and licence

In [7]:
md_constraints = {}

access_constraints = 'otherRestrictions'
use_constraints = 'otherRestrictions'
other_constraints = dataset.getncattr('license')
data_licence = None
metadata_licence = None

md_constraints['access_constraints'] = access_constraints
md_constraints['use_constraints'] = use_constraints
md_constraints['other_constraints'] = other_constraints
md_constraints['data_licence'] = data_licence
md_constraints['metadata_licence'] = metadata_licence

print(md_constraints)

{'access_constraints': 'otherRestrictions', 'use_constraints': 'otherRestrictions', 'other_constraints': 'ACTRIS: http://actris.nilu.no/Content/Documents/DataPolicy.pdf, EMEP: Public open access. We encourage contacting data originators if substatial use of individual time series is planned (fair use data policy)., NILU: Public open access. We encourage contacting data originators if substatial use of individual time series is planned (fair use data policy)., GAW-WDCA: ', 'data_licence': None, 'metadata_licence': None}


### md_keywords
Keywords for describing the data

In [8]:
md_keywords = {}

keywords = dataset.getncattr('keywords').split()
keywords = [k[:60] for k in keywords]

md_keywords['keywords'] =  keywords

print(md_keywords)

{'keywords': ['NO0042G,', 'Zeppelin', 'mountain', '(Ny-Ålesund),', 'pm10,', 'particle_number_size_distribution,', 'GAW-WDCA,', 'ACTRIS,', 'EMEP,', 'NILU']}


### md_data_identification
Metadata related to the spatial and temporal information of the data generation

In [9]:
try:

    # get station metadata

    station_identifier = ebas_metadata['Station GAW-ID']

    wigos_api = 'https://oscardepl.wmo.int/surface/rest/api/search/station?wigosId=0-20008-0-{0}'.format(station_identifier) 

    # here we use the wigos API to get country name
    response = requests.get(wigos_api)
    wigos_md = response.json()  
    wigos_md = wigos_md[0]
    
except:
    pass

In [10]:
md_data_identification = {}

language = 'en'
topic_category = 'climatologyMeteorologyAtmosphere'
description = 'time series of point measurements at surface'
station_wmo_region = ebas_metadata['Station WMO region']
country_name = 'Norway' #wigos_md['territory'] # currently now direct mapping of country, this could be a part of station list in the future

try:
    station_name = ebas_metadata['Station GAW-Name']
except:
    station_name = wigos_md['name']
    
station_identifier = ebas_metadata['Station GAW-ID']

md_data_identification['language'] = language
md_data_identification['topic_category'] = topic_category
md_data_identification['description'] = description
md_data_identification['station_wmo_region'] = station_wmo_region
md_data_identification['country_name'] = country_name
md_data_identification['station_name'] = station_name
md_data_identification['station_identifier'] = station_identifier

print(md_data_identification)

{'language': 'en', 'topic_category': 'climatologyMeteorologyAtmosphere', 'description': 'time series of point measurements at surface', 'station_wmo_region': '6', 'country_name': 'Norway', 'station_name': 'Zeppelin Mountain (Ny Ålesund)', 'station_identifier': 'ZEP'}


### ex_geographic_bounding_box
Metadata related to the spatial and temporal information of the data generation

In [11]:
ex_geographic_bounding_box = {}

west_bound_longitude = dataset.getncattr('geospatial_lon_min')
east_bound_longitude = dataset.getncattr('geospatial_lon_max')
south_bound_latitude = dataset.getncattr('geospatial_lat_min')
north_bound_latitude = dataset.getncattr('geospatial_lat_max')

ex_geographic_bounding_box['west_bound_longitude'] = west_bound_longitude
ex_geographic_bounding_box['east_bound_longitude'] = east_bound_longitude
ex_geographic_bounding_box['south_bound_latitude'] = south_bound_latitude
ex_geographic_bounding_box['north_bound_latitude'] = north_bound_latitude

print(ex_geographic_bounding_box)

{'west_bound_longitude': 11.88934, 'east_bound_longitude': 11.88934, 'south_bound_latitude': 78.90669, 'north_bound_latitude': 78.90669}


### ex_temporal_extent
Metadata describing the temporal information on the content of the dataset

In [12]:
ex_temporal_extent = {}

time_period_begin = datetime.strptime(dataset.getncattr('time_coverage_start'),'%Y-%m-%dT%H:%M:%S UTC').isoformat()
time_period_end = datetime.strptime(dataset.getncattr('time_coverage_end'),'%Y-%m-%dT%H:%M:%S UTC').isoformat()

ex_temporal_extent['time_period_begin'] = time_period_begin
ex_temporal_extent['time_period_end'] = time_period_end

print(ex_temporal_extent)

{'time_period_begin': '2018-01-01T00:00:00', 'time_period_end': '2019-01-01T00:00:00'}


### ex_vertical_extent
Metadata describing the vertical information of instrument

In [13]:
ex_vertical_extent = {}

minimum_value = dataset.getncattr('geospatial_vertical_min')
maximum_value = dataset.getncattr('geospatial_vertical_max')
unit_of_measure = 'm above sea level'
#consider including geospatial_vertical_positive. E.g. geospatial_vertical_positive = "up" ;

ex_vertical_extent['minimum_value'] = None #minimum_value
ex_vertical_extent['maximum_value'] = None #maximum_value
ex_vertical_extent['unit_of_measure'] = unit_of_measure

print(ex_vertical_extent)

{'minimum_value': None, 'maximum_value': None, 'unit_of_measure': 'm above sea level'}


### md_content_information
Metadata describing the list of parameters

In [14]:
md_content_information = {}

try:
    attribute_description = ebas_metadata['Component']
except:
    attribute_description = 'Not available'
    
content_type = 'physicalMeasurement'

md_content_information['attribute_description'] = [attribute_description]
md_content_information['content_type'] = content_type

print(md_content_information)

{'attribute_description': ['particle_number_size_distribution'], 'content_type': 'physicalMeasurement'}


### md_distribution_information
Metadata describing how to access the file, and if there are restrictions

In [15]:
md_distribution_information = {}

data_format = dataset.file_format
version_data_format = dataset.file_format
transfersize = None
dataset_url = dataset.filepath()
protocol = 'http'
description = 'Direct download of data file'
function = 'download'
restriction = {"set": False, "description_url": 'https://ebas-submit.nilu.no/Data-Policy'}

md_distribution_information['data_format'] = data_format
md_distribution_information['version_data_format'] = version_data_format
md_distribution_information['transfersize'] = transfersize
md_distribution_information['dataset_url'] = dataset_url
md_distribution_information['protocol'] = protocol
md_distribution_information['description'] = description
md_distribution_information['function'] = function
md_distribution_information['restriction'] = restriction

print(md_distribution_information)

{'data_format': 'NETCDF3_CLASSIC', 'version_data_format': 'NETCDF3_CLASSIC', 'transfersize': None, 'dataset_url': 'https://thredds.nilu.no/thredds/dodsC/ebas/NO0042G.20180101000000.20190508191242.dmps.particle_number_size_distribution.pm10.1y.1h.NO01L_NILU_DMPSmodel2_ZEP.NO01L_dmps_DMPS_ZEP01.lev2.nc', 'protocol': 'http', 'description': 'Direct download of data file', 'function': 'download', 'restriction': {'set': False, 'description_url': 'https://ebas-submit.nilu.no/Data-Policy'}}


### dq_data_quality_information
Metadata on data quality and collection procedures

In [16]:
dq_data_quality_information = {}

level = 'dataset'
statement = 'Data collected according to instrument specific standard operating procedures, checked on import into data base.'
description = dataset.getncattr('processing_level')

dq_data_quality_information['level'] = level
dq_data_quality_information['statement'] = statement
dq_data_quality_information['description'] = description

print(dq_data_quality_information)

{'level': 'dataset', 'statement': 'Data collected according to instrument specific standard operating procedures, checked on import into data base.', 'description': 'processing_level_test'}


### md_actris_specific
ACTRIS specific metadata, beyond what is required in the WIS profile

In [17]:
md_actris_specific = {}

projects = dataset.getncattr('project')


if 'ACTRIS' in projects:
    legacy = False
else:
    legacy = True

projects = projects.split(',')


platform_type = 'surface_station'
product_type = 'observation'
matrix = 'particle'
sub_matrix = ebas_metadata['Matrix']
instrument_type = ebas_metadata['Instrument type']
program_affiliation = projects
legacy_data = legacy
data_level = ebas_metadata['Data level']
data_sublevel = None
data_product = 'quality assured data'

md_actris_specific['platform_type'] = platform_type
md_actris_specific['product_type'] = product_type
md_actris_specific['matrix'] = matrix
md_actris_specific['sub_matrix'] = sub_matrix
md_actris_specific['instrument_type'] = [instrument_type]
md_actris_specific['program_affiliation'] = ['ACTRIS']
md_actris_specific['legacy_data'] = legacy_data
md_actris_specific['data_level'] = int(data_level)
md_actris_specific['data_sublevel'] = data_sublevel
md_actris_specific['data_product'] = data_product

print(md_actris_specific)

{'platform_type': 'surface_station', 'product_type': 'observation', 'matrix': 'particle', 'sub_matrix': 'pm10', 'instrument_type': ['dmps'], 'program_affiliation': ['ACTRIS'], 'legacy_data': False, 'data_level': 2, 'data_sublevel': None, 'data_product': 'quality assured data'}


### create json
Merge all the metadata into a dictionary

In [18]:
metadata = {}

metadata['md_metadata'] = md_metadata
metadata['md_identification'] = md_identification
metadata['md_constraints'] = md_constraints
metadata['md_keywords'] = md_keywords
metadata['md_data_identification'] = md_data_identification
metadata['ex_geographic_bounding_box'] = ex_geographic_bounding_box
metadata['ex_temporal_extent'] = ex_temporal_extent
metadata['ex_vertical_extent'] = ex_vertical_extent
metadata['md_content_information'] = md_content_information
metadata['md_distribution_information'] = md_distribution_information
metadata['dq_data_quality_information'] = dq_data_quality_information
metadata['md_actris_specific'] = md_actris_specific

# Convert python dictionary to json
md_json = json.dumps(metadata)

print(md_json)

{"md_metadata": {"file_identifier": "NO0042G.20180101000000.20190508191242.dmps.particle_number_size_distribution.pm10.1y.1h.NO01L_NILU_DMPSmodel2_ZEP.NO01L_dmps_DMPS_ZEP01.lev2.nc", "language": "en", "hierarchy_level": "dataset", "datestamp": "2019-05-08T19:12:42", "contact": [{"first_name": "Markus", "last_name": "Fiebig", "organisation_name": "Norwegian Institute for Air Research (NILU)", "position_name": "Senior Scientist", "role_code": ["custodian"], "delivery_point": "Insituttveien 18", "address_city": "Kjeller", "administrative_area": "Viken", "postal_code": "2007", "country": "Norway", "email": "ebas@nilu.no"}], "online_resource": {"linkage": "https://www.nilu.no/"}}, "md_identification": {"abstract": "Ground based in situ observations of particle_number_size_distribution at Zeppelin mountain (Ny-\u00c5lesund) (NO0042G) using dmps. These measurements are gathered as a part of the following projects ACTRIS, EMEP, NILU, GAW-WDCA and they are stored in the EBAS database (http://eb

### Add metadata to API
Post metadata to API

In [19]:
url = "https://dev-actris-md.nilu.no/metadata/add"

headers = {'Content-type': 'application/json', 'Accept': 'application/json'}
r = requests.post(url, json=metadata, headers=headers)

print(r.headers)
#print(r.text)
#print(r.status_code)

{'Server': 'nginx/1.10.3 (Ubuntu)', 'Date': 'Fri, 31 Jan 2020 08:12:24 GMT', 'Content-Length': '0', 'Connection': 'keep-alive', 'Location': 'http://dev-actris-md.nilu.no/Metadata/398', 'Strict-Transport-Security': 'max-age=31536; includeSubDomains'}


# List metadata
Note that not all metadata is available in the metadata view, will implement the rest of the elements likely during next week (end of January)

In [24]:
url = r.headers['Location']

In [25]:
response = requests.get(url)

metadata = response.json()

In [26]:
print(metadata)

[{'md_keywords': {'keywords': ['NO0042G,', 'Zeppelin', 'mountain', '(Ny-Ålesund),', 'pm10,', 'particle_number_size_distribution,', 'GAW-WDCA,', 'ACTRIS,', 'EMEP,', 'NILU']}, 'md_metadata': {'id': 349, 'file_identifier': 'NO0042G.20180101000000.20190508191242.dmps.particle_number_size_distribution.pm10.1y.1h.NO01L_NILU_DMPSmodel2_ZEP.NO01L_dmps_DMPS_ZEP01.lev2.nc', 'language': 'en', 'hierarchy_level': 'dataset', 'online_resource': {'linkage': 'https://www.nilu.no/'}, 'contact': [{'first_name': 'Markus', 'last_name': 'Fiebig', 'organisation_name': 'Norwegian Institute for Air Research (NILU)', 'role_code': ['custodian'], 'country': 'Norway', 'delivery_point': 'Insituttveien 18', 'address_city': 'Kjeller', 'administrative_area': 'Viken', 'postal_code': 2007, 'email': 'ebas@nilu.no', 'position_name': 'Senior Scientist'}], 'datestamp': '2019-05-08T17:12:42.0000000Z'}}]


In [27]:
# all metadata

url = 'http://dev-actris-md.nilu.no/Metadata'

response = requests.get(url)

metadata = response.json()

In [28]:
print(metadata)

[{'md_keywords': {'keywords': ['NO0042G', 'Zeppelin mountain (Ny-Ålesund)', 'pm10', 'particle_number_size_distribution', 'GAW-WDCA', 'ACTRIS', 'EMEP', 'NILU']}, 'md_metadata': {'id': 214, 'file_identifier': 'EBAS240209145', 'language': 'en', 'hierarchy_level': 'dataset', 'online_resource': {'linkage': 'https://www.nilu.no/'}, 'contact': [{'first_name': 'Markus', 'last_name': 'Fiebig', 'organisation_name': 'Norwegian Institute for Air Research', 'role_code': ['custodian', 'originator', 'processor'], 'country': 'Norway', 'delivery_point': 'Insituttveien 18', 'address_city': 'Kjeller', 'administrative_area': 'Viken', 'postal_code': 2007, 'email': 'some.name@email.com', 'position_name': 'Senior Scientist'}], 'datestamp': '2012-05-20T07:45:00.0000000Z'}}, {'md_keywords': {'keywords': ['NO0042G', 'Zeppelin mountain (Ny-Ålesund)', 'pm10', 'particle_number_size_distribution', 'GAW-WDCA', 'ACTRIS', 'EMEP', 'NILU']}, 'md_metadata': {'id': 215, 'file_identifier': 'EBAS24020914123', 'language': 'e