In [1]:
# !pip install cfchecker
# !pip install cfunits 
# !pip install cfunits --upgrade

import xarray as xr
import numpy as np
import warnings

# import cfchecker
# import cfunits

# CF Checker
Here will be a collection of modules used to test the CF compliance of a particular NETCDF File.

In [2]:
from zutils import cf_conventions as zcf
dir(cf_conventions)
cf_conventions.get_canonical_units()


{'temperature': ['degree', 'deg', 'Kelvin', 'K'],
 'air_temperature': ['degree', 'deg', 'Kelvin', 'K'],
 'seawater_temperature': ['degree', 'deg', 'Kelvin', 'K'],
 'velocity': ['m/s', 'm s-1'],
 'speed': ['m/s', 'm s-1'],
 'seawater_velocity': ['m/s', 'm s-1'],
 'seawater_speed': ['m/s', 'm s-1'],
 'eastward_seawater_velocity': ['m/s', 'm s-1'],
 'northward_seawater_velocity': ['m/s', 'm s-1'],
 'wind_velocity': ['m/s', 'm s-1'],
 'wind_speed': ['m/s', 'm s-1'],
 'easterly_wind_velocity': ['m/s', 'm s-1'],
 'northerly_wind_speed': ['m/s', 'm s-1'],
 'thermocline_depth_below_sea_surface': ['m'],
 'pressure_sensor_depth_below_sea_surface': ['m'],
 'sensor_depth_below_sea_surfacesea_floor_depth_below_sea_surface': ['m'],
 'distance': ['m'],
 'distance_from_instrument': ['m'],
 'distance_from_seabed': ['m'],
 'distance_from_surface': ['m'],
 'sea_surface_wave_directional_spread': ['degree', 'deg'],
 'sea_surface_wave_directional_spread_at_variance_spectral_density_maximum': ['degree',
  'd

In [3]:

# """
# Module for checking CF compliance of netcdf files or xarray Datasets. 

# The conventions are not coded in entirity, and are not coded perfectly. This may lead to both false positives and negatives.
#     e.g. 1: Only canonical units are coded, so a height of millimetres (mm) would fail.
#     e.g. 2: Some additional Standard Names have been added which are not (yet) in the CF Conventions.

# Currently it does not check variables called time for CF compliance as xarray kind of messes with this stuff. Need to think about this.

# Variables with the attribute cf_compliant set to False will not be checked for cf_compliance. This is typically 
# used for intermediate processing quantities, QAQC flags, and plotting aid tools. 

# Data variables used to assist with QAQC, plotting, or processing can take a cf_compliance flag. If this 
# is set to boolean false, the variable will not be checked for CF compliance. 

# """

# class not_implemented_error(Exception):
#     pass

# class cf_version_error(Exception):
#     pass

# class attribute_error(Exception):
#     pass

# class unit_error(Exception):
#     pass

# def check_all(file, CF_version):
#     """
#     Pass a netcdf file and a CF version to check compliance.
    
#     If the netcdf file is a path to a file it will be opened as an xarray Dataset. 
    
#     If the netcdf can also be passed as an xarray Dataset directly.
    
#     Passing xarray DataArrays and netCDF4 objects is not yet implemented. 
#     """
    
#     if isinstance(file, str):
#         print('Filename given, opening dataset')
#         ds = xr.open_dataset(file)
#     elif isinstance(file, xr.Dataset):
#         ds = file
#     elif isinstance(file, xr.DataArray):
#         raise(not_implemented_error('Passing xarray DataArrays has not yet been implemented'))
              
#     check_var_names(ds, CF_version)
#     check_global_attrs(ds, CF_version)
#     check_var_attrs(ds, CF_version)
    
#     print('All checks passed')
    
# def check_global_attrs(ds, CF_version):

#     CF_version = CF_version.lower()
    
#     required_attrs = get_global_attrs(CF_version)
    
#     for required_attr in required_attrs:
#         if not required_attr in ds.attrs.keys():
#             raise(attribute_error('No global attribute "{}"'.format(required_attr)))
#         elif ds.attrs[required_attr] in ['?', '', 'blank']:
#             warnings.warn('Global attribute {} is {}'.format(required_attr, ds.attrs[required_attr]))
    
#     print('Global attribute checks passed')
    
# def check_var_names(ds, CF_version):
    
#     CF_version = CF_version.lower()
    
#     if CF_version in ['1.7']:
#         "There are no particular requirements for variable names in this version."
#         pass
#     else:
#         raise(cf_version_error("CF checker module has not been developed for version {}".format(CF_version)))
        
#     print('Variable name checks passed')
    
# def check_var_attrs(ds, CF_version):
#     """
#     Checks that all data variables and coordinates are CF compliant.
    
#     NOTE: if a variable has an attribute cf_compliant, and this is False, the variable will not be checked.
#     """
    
#     stardard_name_to_long_name(ds)
    
#     CF_version = CF_version.lower()
    
#     required_attrs = get_var_attrs(CF_version)
    
#     keys = list(ds.data_vars.keys()) + list(ds.coords.keys())
    
#     if CF_version in ['1.7']:
#         for key in keys:
            
#             if key == 'time':
#                 print('Variable {} is not being checked for CF compliance'.format(key))
#                 continue            
            
#             if 'cf_compliant' in ds[key].attrs.keys() and not ds[key].attrs['cf_compliant']:
#                 print('Variable {} is not being checked for CF compliance'.format(key))
#                 continue
                
#             for required_attr in required_attrs:
#                 if not required_attr in ds[key].attrs.keys():
#                     raise(attribute_error('No attribute "{}" in variable {}'.format(required_attr, key))) 
                
                
#                 if ds[key].attrs[required_attr] in ['?', '', 'blank']:
#                     warnings.warn('Attribute {} in variable {} is {}'.format(required_attr, key, ds[key].attrs[required_attr]))
         
#             check_standard_names_and_canonical_units(ds[key])
#     else:
#         raise(cf_version_error("CF checker module has not been developed for version {}".format(CF_version)))
        
#     print('Variable name checks passed')

#     ds.coords
    
# def check_standard_names_and_canonical_units(da):
#     """
#     For a given DataArray we check that the standard name exists within a subset standard names defined in this module. 
#     We will also check whether the units are equal to or at least equivalent to canonical units.
    
    
#     NOTE: THESE ARE NOT REAL CF STANDARD NAMES. MANY ARE MADE UP. 
#     """
    
#     standard_name = da.attrs['standard_name']
#     units = da.attrs['units']
    
#     temperature_list = ['temperature', 'air_temperature', 'seawater_temperature']
#     velocity_list = ['velocity', 
#                      'speed', 
#                      'seawater_velocity',
#                      'seawater_speed',
#                      'eastward_seawater_velocity',
#                      'northward_seawater_velocity',
#                      'wind_velocity',
#                      'wind_speed',
#                      'easterly_wind_velocity',
#                      'northerly_wind_speed']
    
# #     depth_list = ['depth',
# #                   'water_depth',
# #                   'total_water_depth',
# #                   'thermocline_depth',
# #                   'pressure_sensor_depth',
# #                   'sensor_depth']
    
#     depth_list = ['thermocline_depth_below_sea_surface',
#                   'pressure_sensor_depth_below_sea_surface',
#                   'sensor_depth_below_sea_surface'
#                   'sea_floor_depth_below_sea_surface']
    
#     distance_list = ['distance',
#                   'distance_from_instrument',
#                   'distance_from_seabed',
#                   'distance_from_surface']
    
#     surface_wave_types = ['sea_surface_wave', 
#                   'sea_surface_wind_wave', 
#                   'sea_surface_swell_wave', 
#                   'sea_surface_primary_swell_wave', 
#                   'sea_surface_secondary_swell_wave', 
#                   'sea_surface_tertiary_swell_wave', 
#                   'sea_surface_infra_gravity_wave']
    
#     ## DEFINED FOR EACH WAVE TYPE
#     surface_wave_direction_calc_list = [
#         'directional_spread',
#         'directional_spread_at_variance_spectral_density_maximum',
#         'energy_at_variance_spectral_density_maximum',
#         'from_direction',
#         'from_mean_direction',
#         'from_direction_at_variance_spectral_density_maximum', # Peak period
#     ]
    
#     surface_wave_height_calc_list = [
#             'significant_height',
#             'mean_height',
#             'mean_height_of_highest_third',
#             'mean_height_of_highest_tenth',
#             'maximum_height',
#             'maximum_crest_height',
#             'maximum_trough_depth',
#     ]
    

#     surface_wave_period_calc_list = [
#         'mean_period',
#         'mean_period_from_variance_spectral_density_first_frequency_moment',
#         'mean_period_from_variance_spectral_density_inverse_frequency_moment',
#         'mean_period_from_variance_spectral_density_second_frequency_moment',
#         'mean_period_of_highest_third',
#         'mean_period_of_highest_tenth',
#         'significant_period',
#         'period_at_variance_spectral_density_maximum',
#         'period_of_highest_wave',
#         'maximum_period',
#     ]

    
#     surface_wave_slope_calc_list = [
#         'maximum_steepness',
#     ]
    
#     wavedirection_list = []
#     for wave in surface_wave_types:
#         for calc in surface_wave_direction_calc_list:
#             wavedirection_list.append(wave + '_' + calc)
            
#     waveheight_list = []
#     for wave in surface_wave_types:
#         for calc in surface_wave_height_calc_list:
#             waveheight_list.append(wave + '_' + calc)
            
#     waveperiod_list = []
#     for wave in surface_wave_types:
#         for calc in surface_wave_period_calc_list:
#             waveperiod_list.append(wave + '_' + calc)
            
#     waveslope_list = []
#     for wave in surface_wave_types:
#         for calc in surface_wave_slope_calc_list:
#             waveslope_list.append(wave + '_' + calc)
        
    
#     if standard_name in temperature_list:
#         if not units in ['degree', 'deg','Kelvin', 'K']:
#             raise(Exception('{} is not a recognised unit for {} [Check use of Capitals]'.format(units, standard_name)))
#         else:
#             return
    
#     if standard_name in velocity_list:
#         if not units in ['m/s', 'm s-1']:
#             raise(Exception('{} is not a recognised unit for {} [Check use of Capitals]'.format(units, standard_name)))
#         else:
#             return
        
#     if standard_name in depth_list:
#         if not units in ['m']:
#             raise(Exception('{} is not a recognised unit for {} [Check use of Capitals]'.format(units, standard_name)))
#         else:
#             return
        
#     if standard_name in wavedirection_list:
#         if not units in ['degree', 'deg']:
#             raise(Exception('{} is not a recognised unit for {} [Check use of Capitals]'.format(units, standard_name)))
#         else:
#             return
        
#     if standard_name in waveheight_list:
#         if not units in ['m']:
#             raise(Exception('{} is not a recognised unit for {} [Check use of Capitals]'.format(units, standard_name)))
#         else:
#             return
        
#     if standard_name in waveperiod_list:
#         if not units in ['s']:
#             raise(Exception('{} is not a recognised unit for {} [Check use of Capitals]'.format(units, standard_name)))
#         else:
#             return
        
#     if standard_name in waveslope_list:
#         if not units in ['1']:
#             raise(Exception('{} is not a recognised unit for {} [Check use of Capitals]'.format(units, standard_name)))
#         else:
#             return

#     raise(Exception('{} is not a recognised standard_name'.format(standard_name)))
    
# """
# GET_* methods. Generally return lists.
# """
# def get_global_attrs(CF_version):
    
#     if CF_version == '1.7':
#         required_attrs = ['Conventions', 'title', 'institution', 'source', 'history', 'references', 'comment']
#     else:
#         raise(cf_version_error("CF checker module has not been developed for version {}".format(CF_version)))
        
#     return required_attrs

# def get_var_attrs(CF_version):
    
#     if CF_version in ['1.7']:
#         required_attrs = ['units', 'long_name', 'standard_name']
#     else:
#         raise(cf_version_error("CF checker module has not been developed for version {}".format(CF_version)))
        
#     return required_attrs

# """
# FORCE_*_COMPLIANCE methods. Prepare Netcdfs for CF compliance. Generally add the members of GET Method lists to some other lists.
# """
# def force_compliance(ds, CF_version):
    
#     pass
    
# def force_global_attrs_compliance(ds, CF_version):
    
#     required_attrs = get_global_attrs(CF_version)
    
#     for required_attr in required_attrs:
#         if not required_attr in ds.attrs.keys():
#             ds.attrs[required_attr] = '?'
            
#     ds.attrs['Conventions'] = 'CF-' + CF_version
        
# def force_var_attrs_compliance(ds, CF_version):
    
#     required_attrs = get_var_attrs(CF_version)
    
#     for data_var in ds.data_vars:
#         for required_attr in required_attrs:
#             if not required_attr in ds[data_var].attrs.keys():
#                 ds[data_var].attrs[required_attr] = '?'
                    
# """
# Add methods. Append timestamped strings to attributes.
# """
# def add_history(ds, author, string):
        
#     attr = 'history'
#     add_string(ds, attr, author, string)
        
# def add_comment(ds, author, string):
        
#     attr = 'comment'
#     add_string(ds, attr, author, string)
        
# def add_string(ds, attr, author, string):
    
#     new_string = datetime.datetime.now().isoformat() + ': ' + '[{}]'.format(author) + ' ' + string
    
#     if not attr in ds.attrs.keys():
#         ds.attrs[attr] = new_string
#     elif ds.attrs[attr] in ['', '?', 'blank']:
#         ds.attrs[attr] = new_string
#     else:
#         ds.attrs[attr] = ds.attrs[attr] + ';' + new_string
        
#     return new_string

# """
# Misc methods
# """
# def stardard_name_to_long_name(ds):
#     """
#     Every variable with a standard name but no long_name will have the long name set to the standard name.
#     """

#     keys = list(ds.data_vars.keys()) + list(ds.coords.keys())
    
#     for key in keys:
#         if 'standard_name' in ds[key].attrs:
#             if not 'long_name' in ds[key].attrs:
#                 ds[key].attrs['long_name'] = ds[key].attrs['standard_name']
#             elif  ds[key].attrs['long_name'] in ['', '?', 'blank']:
#                 ds[key].attrs['long_name'] = ds[key].attrs['standard_name']


# Check the file as is
The file is not netcdf compliant, so it will fail early. 

In [10]:
fn = 'SBMD_SB_FT02_Sunset 1_currents.nc'
cf_conventions.check_all(fn, '1.7')

Filename given, opening dataset
Variable name checks passed


attribute_error: No global attribute "Conventions"

# Actually make the file compliant
Should be minimal warnings now. 

In [12]:
ds = xr.open_dataset(fn)
cf_conventions.force_global_attrs_compliance(ds, '1.7')
# force_var_attrs_compliance(ds, '1.7')

# Still need to give real values to the attributes
# ds.attrs['title'] = 'Measured data from an RDI ADCP'
# ds.attrs['institution'] = 'O2 Metocean'
# ds.attrs['source'] = 'RDI ADCP'
# ds.attrs['history'] = ''
# ds.attrs['references'] = 'O2 Metocean QAQC COnventions; CF Conventions version 1.7'
# ds.attrs['comment'] = ''
cf_conventions.add_history(ds, 'AZ', 'Modified metadata')

# ds.U.attrs['standard_name'] = 'eastward_seawater_velocity' 
# ds.U.attrs['long_name'] = 'eastward_seawater_velocity' 
# ds.U.attrs['units'] = 'm/s' 

# ds.V.attrs['standard_name'] = 'northward_seawater_velocity' 
# ds.V.attrs['long_name'] = 'northward_seawater_velocity' 
# ds.V.attrs['units'] = 'm/s' 

# ds.depth.attrs['standard_name'] = 'pressure_sensor_depth_below_sea_surface' 
# ds.depth.attrs['long_name'] = 'pressure_sensor_depth_below_sea_surface' 
# ds.depth.attrs['units'] = 'm' 

# # Specify variables which do not need to be CF compliant. 
# ds.flag.attrs['cf_compliant'] = False
# ds.depth_last_good.attrs['cf_compliant'] = False
# ds.depth_blank.attrs['cf_compliant'] = False
# ds.distance.attrs['cf_compliant'] = False
# ds.time.attrs['cf_compliant'] = False

cf_conventions.check_var_attrs(ds, '1.7')

# check_all(ds, '1.7')


attribute_error: No attribute "units" in variable flag

In [13]:
ds

In [367]:
ds = xr.open_dataset('SBMD_SB_FT05_Sunset 1_waves.nc')
force_global_attrs_compliance(ds, '1.7')
force_var_attrs_compliance(ds, '1.7')

ds.attrs['title'] = 'Measured data from an RDI ADCP'
ds.attrs['institution'] = 'O2 Metocean'
ds.attrs['source'] = 'RDI ADCP'
ds.attrs['history'] = ''
ds.attrs['references'] = 'O2 Metocean QAQC COnventions; CF Conventions version 1.7'
ds.attrs['comment'] = ''
add_history(ds, 'AZ', 'Modified metadata')
add_comment(ds, 'AZ', 'Modified metadata')

ds.Hs.attrs['standard_name'] = 'sea_surface_wave_significant_height'
ds.Hs.attrs['units'] = 'm'
ds.Hs_Sea.attrs['standard_name'] = 'sea_surface_wind_wave_significant_height'
ds.Hs_Sea.attrs['units'] = 'm'
ds.Hs_Swell.attrs['standard_name'] = 'sea_surface_swell_wave_significant_height'
ds.Hs_Swell.attrs['units'] = 'm'

ds.Tp.attrs['standard_name'] = 'sea_surface_wave_period_at_variance_spectral_density_maximum'
ds.Tp.attrs['units'] = 's'
ds.Tp_Sea.attrs['standard_name'] = 'sea_surface_wind_wave_period_at_variance_spectral_density_maximum'
ds.Tp_Sea.attrs['units'] = 's'
ds.Tp_Swell.attrs['standard_name'] = 'sea_surface_swell_wave_period_at_variance_spectral_density_maximum'
ds.Tp_Swell.attrs['units'] = 's'

ds.Dp.attrs['standard_name'] = 'sea_surface_wave_from_direction_at_variance_spectral_density_maximum'
ds.Dp.attrs['units'] = 'deg'
ds.Dp_Sea.attrs['standard_name'] = 'sea_surface_wave_from_direction_at_variance_spectral_density_maximum'
ds.Dp_Sea.attrs['units'] = 'deg'
ds.Dp_Swell.attrs['standard_name'] = 'sea_surface_wave_from_direction_at_variance_spectral_density_maximum'
ds.Dp_Swell.attrs['units'] = 'deg'

ds.Depth.attrs['standard_name'] = 'pressure_sensor_depth_below_sea_surface'
ds.Depth.attrs['units'] = 'm'

ds.Hmax.attrs['standard_name'] = 'sea_surface_wave_maximum_height'
ds.Hmax.attrs['units'] = 'm'
ds.Hmean.attrs['standard_name'] = 'sea_surface_wave_mean_height'
ds.Hmean.attrs['units'] = 'm'
ds.H_1_3.attrs['standard_name'] = 'sea_surface_wave_mean_height_of_highest_third'
ds.H_1_3.attrs['units'] = 'm'
ds.H_1_10.attrs['standard_name'] = 'sea_surface_wave_mean_height_of_highest_tenth'
ds.H_1_10.attrs['units'] = 'm'

ds.Tmax.attrs['standard_name'] = 'sea_surface_wave_maximum_period'
ds.Tmax.attrs['units'] = 's'
ds.Tmean.attrs['standard_name'] = 'sea_surface_wave_mean_period'
ds.Tmean.attrs['units'] = 's'
ds.T_1_3.attrs['standard_name'] = 'sea_surface_wave_mean_period_of_highest_third'
ds.T_1_3.attrs['units'] = 's'
ds.T_1_10.attrs['standard_name'] = 'sea_surface_wave_mean_period_of_highest_tenth'
ds.T_1_10.attrs['units'] = 's'

ds.Dmean.attrs['standard_name'] = 'sea_surface_wave_from_mean_direction'
ds.Dmean.attrs['units'] = 'deg'

ds.Temp.attrs['standard_name'] = 'seawater_temperature'
ds.Temp.attrs['units'] = 'deg'


for i in np.arange(0, ds.bins.values[0]):
    ds['depthlevel{}Magnitude'.format(i+1)].attrs['cf_compliant'] = False
    ds['depthlevel{}Direction'.format(i+1)].attrs['cf_compliant'] = False

# Variables which do not need to be CF compliant
ds.bins.attrs['cf_compliant'] = False
ds.flag.attrs['cf_compliant'] = False
ds.flag_Hs.attrs['cf_compliant'] = False
ds.flag_Hs_Sea.attrs['cf_compliant'] = False
ds.flag_Hs_Swell.attrs['cf_compliant'] = False
ds.SPflag_Hs.attrs['cf_compliant'] = False
ds.SPflag_Hs_Sea.attrs['cf_compliant'] = False
ds.SPflag_Hs_Swell.attrs['cf_compliant'] = False
ds.burst.attrs['cf_compliant'] = False
ds.yy.attrs['cf_compliant'] = False
ds.mm.attrs['cf_compliant'] = False
ds.dd.attrs['cf_compliant'] = False
ds.HH.attrs['cf_compliant'] = False
ds.MM.attrs['cf_compliant'] = False
ds.SS.attrs['cf_compliant'] = False
ds.CC.attrs['cf_compliant'] = False

check_all(ds, '1.7')

Variable name checks passed
Global attribute checks passed
Variable burst is not being checked for CF compliance
Variable yy is not being checked for CF compliance
Variable mm is not being checked for CF compliance
Variable dd is not being checked for CF compliance
Variable HH is not being checked for CF compliance
Variable MM is not being checked for CF compliance
Variable SS is not being checked for CF compliance
Variable CC is not being checked for CF compliance
Variable bins is not being checked for CF compliance
Variable depthlevel1Magnitude is not being checked for CF compliance
Variable depthlevel1Direction is not being checked for CF compliance
Variable depthlevel2Magnitude is not being checked for CF compliance
Variable depthlevel2Direction is not being checked for CF compliance
Variable depthlevel3Magnitude is not being checked for CF compliance
Variable depthlevel3Direction is not being checked for CF compliance
Variable depthlevel4Magnitude is not being checked for CF compl

In [376]:
ds.attrs.update(test=1, test2=2)
ds.attrs.update({'test3':3, 'test4':4})
dic = {'test5':5, 'test6':6}
ds.attrs.update(dic)

ds.attrs

{'project': 'Sunset Beach Metocean Deployments',
 'trip_deployed': 'SB_FT04',
 'trip_recovered': 'SB_FT05',
 'site': 'Sunset 1',
 'instrument_make': 'RDI',
 'instrument_model': 'Workhorse',
 'root': 'C:\\Users\\AZulberti\\O2 Marine\\OneDrive - Documents\\DATA\\PROJECTS\\19MET-0010 Sunset Beach Metocean Measurements\\4_Analysis',
 'Conventions': 'CF-1.7',
 'title': 'Measured data from an RDI ADCP',
 'institution': 'O2 Metocean',
 'source': 'RDI ADCP',
 'history': '2021-02-24T18:24:17.073342: [AZ] Modified metadata',
 'references': 'O2 Metocean QAQC COnventions; CF Conventions version 1.7',
 'comment': '2021-02-24T18:24:17.073342: [AZ] Modified metadata',
 'test': 1,
 'test2': 2,
 'test3': 3,
 'test4': 4,
 'test5': 5,
 'test6': 6}