In [1]:
import numpy as np
import pandas as pd
import h5py

In [2]:
from pycmlh5.io import _load_metadata

# Check metadata CSV files 

In [3]:
pd.read_csv('metadata_def_root_level.csv', delimiter=';', index_col=0)

Unnamed: 0_level_0,Units,Type,Mandatory,Description
Metadata name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
file_format,,string,True,This must always be set to ‘cmlh5’
file_format_version,,string,True,"examples: ‘0.1, ‘1.2’, ..."
author_name,,string,False,
author_email,,string,False,


In [4]:
pd.read_csv('metadata_def_cml_level.csv', delimiter=';', index_col=0)

Unnamed: 0_level_0,Units,Type,Mandatory,Description
#Metadata name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
site_a_latitude,Decimal degrees,float32,True,
site_a_longitude,Decimal degrees,float32,True,
site_a_altitude,Meter,float32,False,
site_a_antenna_above_ground,Meter,float32,False,
site_a_id,,string,False,
site_b_latitude,Decimal degrees,float32,True,
site_b_longitude,Decimal degrees,float32,True,
site_b_altitude,Meter,float32,False,
site_b_antenna_above_ground,Meter,float32,False,
site_b_id,,string,False,


In [5]:
pd.read_csv('metadata_def_channel_level.csv', delimiter=';', index_col=0)

Unnamed: 0_level_0,Units,Type,Mandatory,Description
Metadata name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
frequency,GHz,float32,True,
polarization,"[‘V’, ‘H’, ‘v’, ‘h’]",string,True,
tx_site,,string,False,
rx_site,,string,False,
channel_id,,string,True,
atpc,"[‘True’, ‘False’]",bool,True,
tx_quantization,dBm,float32,False,
rx_quantization,dBm,float32,False,
tx_const,dBm,float32,False,value of the TX level if ATPC is off
channel_name,,string,False,


# Parse all metadata definitions into a dict 

In [6]:
metadata_def = _load_metadata()

# Define functions to check metadata 

In [76]:
def _read_metadata(h5_group, level):
    metadata = {}
    for metadata_name in metadata_def[level]:
        try:
            metadata_entry = h5_group.attrs[metadata_name]
            metadata_entry = _convert_missing_values(metadata_entry,
                                                     metadata_def[level][metadata_name]['Type'])
            _check_metadata_type(metadata_name,
                                 metadata_entry,
                                 metadata_def[level][metadata_name]['Type'])
            metadata[metadata_name] = metadata_entry
        except KeyError:
            if metadata_def[level][metadata_name]['Mandatory'] == True:
                raise KeyError('Mandatory metadata `%s` is missing' % metadata_name)
            else:
                continue
    return metadata

def _read_root_metadata(root_g):
    return _read_metadata(h5_group=root_g, level='root')

def _read_cml_metadata(cml_g):
    return _read_metadata(h5_group=cml_g, level='cml')

def _read_channel_metadata(chan_g):
    return _read_metadata(h5_group=chan_g, level='channel')

def _check_metadata_type(metadata_name, metadata, type_str):
    if type_str == 'float32':
        if not np.isnan(metadata):
            if not type(metadata) == np.float64:
                raise TypeError('Metadata `%s` is `%s` with type `%s` which should be np.float32' %
                                (metadata_name, metadata, type(metadata)))
    elif type_str == 'string':
        if not metadata is None:
            if not ((type(metadata) == np.string_) or 
                    (type(metadata) == str)):
                raise TypeError('Metadata `%s` is `%s` with type `%s` which should be a string' %
                                (metadata_name, metadata, type(metadata)))

def _convert_missing_values(value, type_str):
    if type_str == 'float32':
        if (value == 'NA') or (value == 'NaN') or (value == 'nan'):
            value = np.nan
    if type_str == 'string':
        if value == 'NA':
            value = None
    return value

In [79]:
fn = '/Users/chwala-c/Desktop/cml_martin2.h5'

fn = '/Users/chwala-c/code/dfg_imap/data/kit/cml/cmls_158_20150620_20150701.h5'

h5_reader = h5py.File(fn, mode='r')

cml_list = []
for cml_g_name in h5_reader['/']:
    cml_dict = {}
    cml_dict[cml_g_name] = {}
    cml_g = h5_reader['/' + cml_g_name]
    cml_dict[cml_g_name]['metadata'] = _read_cml_metadata(cml_g)
    for chan_g_name, chan_g in cml_g.items():
        cml_dict[cml_g_name][chan_g_name] = {}
        cml_dict[cml_g_name][chan_g_name]['metadata'] = _read_channel_metadata(chan_g)
    cml_list.append(cml_dict)

In [80]:
cml_list

[{u'cml_0': {u'channel_1': {'metadata': {'frequency': 18.085000000000001}},
   u'channel_2': {'metadata': {'frequency': 19.094999999999999}},
   'metadata': {'id': 'MY2301_2_MY3044_2',
    'length': 9.9664763732756949,
    'site_a_latitude': 47.773000000000003,
    'site_a_longitude': 10.7721,
    'site_b_latitude': 47.819899999999997,
    'site_b_longitude': 10.885899999999999,
    'system_manufacturer': 'Ericsson',
    'system_model': 'MINI LINK Traffic Node'}}},
 {u'cml_1': {u'channel_1': {'metadata': {'frequency': 18.195}},
   u'channel_2': {'metadata': {'frequency': 19.204999999999998}},
   'metadata': {'id': 'MY0551_2_MY2105_2',
    'length': 13.196548723922792,
    'site_a_latitude': 48.484299999999998,
    'site_a_longitude': 11.2067,
    'site_b_latitude': 48.4876,
    'site_b_longitude': 11.0276,
    'system_manufacturer': 'Ericsson',
    'system_model': 'MINI LINK Traffic Node'}}},
 {u'cml_10': {u'channel_1': {'metadata': {'frequency': nan}},
   u'channel_2': {'metadata': {'