In [18]:
import os
import xarray as xr
import pandas as pd
import datetime
import numpy as np

### Load ISMIP Variables

In [19]:
ismip  = pd.read_csv('/mnt/d/1_protect/0_sanity_check/ISMIP6/ismip_compliance_check.csv',delimiter=';',decimal=",")
ismip_meta = ismip.to_dict('records')
# get the list of variables
ismip_var = [dic['variable'] for dic in ismip_meta]
ismip.head()

Unnamed: 0,variable,dim,type,variable_name,standard_name,alias_name,units,mandatory,min_value,max_value
0,acabf,"x,y,t",FL,Surface mass balance flux,land_ice_surface_specific_mass_balance_flux,,kg m-2 s-1,1,-0.0001,10
1,base,"x,y,t",ST,Base elevation,base_altitude,,m,1,-3000.0,4000
2,dlithkdt,"x,y,t",FL,Ice thickness imbalance,tendency_of_land_ice_thickness,,m s-1,0,-5.5e-08,55
3,hfgeoubed,"x,y,t",FL,Geothermal heat flux,upward_geothermal_heat_flux_in_land_ice,upward_geothermal_heat_flux_at_ground_level,W m-2,0,0.0,3
4,libmassbffl,"x,y,t",FL,Basal mass balance flux beneath floating ice,land_ice_basal_specific_mass_balance_flux,,kg m-2 s-1,1,-0.004,1


### Experiments list

In [221]:
experiments =[{'experiment':'hist', 'startinf':datetime.datetime(1979, 6, 30),'startsup':datetime.datetime(1980, 1, 1),'endinf':datetime.datetime(2014, 6, 30),'endsup':datetime.datetime(2015, 1, 1),'duration':35},
                  {'experiment':'ctrl', 'startinf':datetime.datetime(1980, 1, 1),'startsup':datetime.datetime(1980, 1, 1),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':120},
                  {'experiment':'ctrl_proj', 'startinf':datetime.datetime(2015, 1, 2),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,7,1),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
                  {'experiment':'exp01', 'startinf':datetime.datetime(2015, 1, 2),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
                  {'experiment':'exp02', 'startinf':datetime.datetime(2015, 1, 2),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
                  {'experiment':'exp03', 'startinf':datetime.datetime(2015, 1, 2),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
                  {'experiment':'exp04', 'startinf':datetime.datetime(2015, 1, 2),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
                  {'experiment':'exp05', 'startinf':datetime.datetime(2015, 1, 2),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
                  {'experiment':'exp06', 'startinf':datetime.datetime(2015, 1, 2),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
                  {'experiment':'exp07', 'startinf':datetime.datetime(2015, 1, 2),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
                  {'experiment':'exp08', 'startinf':datetime.datetime(2015, 1, 2),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
                  {'experiment':'exp09', 'startinf':datetime.datetime(2015, 1, 2),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
                  {'experiment':'exp10', 'startinf':datetime.datetime(2015, 1, 2),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
                  {'experiment':'exp11', 'startinf':datetime.datetime(2015, 1, 2),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
                  {'experiment':'exp12', 'startinf':datetime.datetime(2015, 1, 2),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
                  {'experiment':'exp13', 'startinf':datetime.datetime(2015, 1, 2),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86}
]

## Explore directory

In [209]:
source_path = '/mnt/d/1_protect/0_sanity_check/IMAU/IMAUICE1'

In [222]:
def list_files(path):
    for root,dirs,files in os.walk(path):
        level = root.replace(path,'').count(os.sep)
        indent = ' ' * 4 *(level)
        print(f'{indent}{os.path.basename(root)}/')
        subindent = '  '*4*(level+1)
        for f in files:
            print(f'{subindent}{f}')

def files_and_subdirectories(root_path):
    files = []
    directories = []
    for f in os.listdir(root_path):
        if os.path.isfile(f):
            files.append(f)
        elif os.path.isdir(f):
            directories.append(f)
    return directories, files

directories,files = files_and_subdirectories(source_path)

print(directories)

#list_files(source_path)

[]


## Check a single file

In [211]:
# load ISMIP6 compliance parameters
ismip  = pd.read_csv('/mnt/d/1_protect/0_sanity_check/ISMIP6/ismip_variable.csv',delimiter=';')
ismip_meta = ismip.to_dict('records')
ismip_var = [dic['variable'] for dic in ismip_meta]
#print(ismip_meta)

In [230]:


#file = '/mnt/d/1_protect/0_sanity_check/IMAU/IMAUICE1/hist_32/dlithkdt_AIS_IMAU_IMAUICE1_hist_std.nc'
#file = '/mnt/d/1_protect/0_sanity_check/IMAU/IMAUICE1/ctrl_proj_std_32/libmassbfgr_AIS_IMAU_IMAUICE1_ctrl_proj_std.nc'
file = '/mnt/d/1_protect/0_sanity_check/IMAU/IMAUICE1/ctrl_32/dlithkdt_AIS_IMAU_IMAUICE1_ctrl.nc'
#file = '/mnt/d/1_protect/0_sanity_check/IMAU/IMAUICE1/exp10_32/ligroundf_AIS_IMAU_IMAUICE1_exp10.nc'
split_path=os.path.normpath(file).split(os.sep)
print(split_path)
file_name = split_path[-1]
file_name_split = file_name.split('_')
print(file_name_split)

considered_variable = file_name_split[0]
region = file_name_split[1]
group  = file_name_split[2]
model = file_name_split[3]
file_extention = file_name_split[len(file_name_split)-1][-2:]
experiment_name = split_path[-2][:-3]
grid_resolution = int(split_path[-2][-2:])

print('resolution:',grid_resolution)

print('file_name:',file_name)
print('region:',region)
print('group:',group)
print('file_extention:',file_extention)
print('experiment_name:',experiment_name)


['', 'mnt', 'd', '1_protect', '0_sanity_check', 'IMAU', 'IMAUICE1', 'ctrl_32', 'dlithkdt_AIS_IMAU_IMAUICE1_ctrl.nc']
['dlithkdt', 'AIS', 'IMAU', 'IMAUICE1', 'ctrl.nc']
resolution: 32
file_name: dlithkdt_AIS_IMAU_IMAUICE1_ctrl.nc
region: AIS
group: IMAU
file_extention: nc
experiment_name: ctrl


In [231]:
# Load netcdf file
ds = xr.open_dataset(file)

file_variables = list(ds.data_vars)

#initialise counters
errors = 0
warnings = 0

# test extention
if file_extention != 'nc':
    print(' - !! ',file_name,' is not a NETCDF file. Compliance check is skipped.','\n')
else: 
    # test naming
    print('FILE:',file_name,'\n')
    print('NAMING Tests')
    if region.upper() in ['AIS', 'GIS']:
        print(' - Studied Region:',region)
    else: 
        print('- ERROR: Region ',region,'not recognized. It should be AIS or GIS.')
        errors += 1

    print('VARIABLE Tests')
    for ivar in file_variables:
        if ivar in ismip_var:
            print('**',ivar, 'is a valid name.')
            # get index in the ismip_var list
            var_index = [k for k in range(len(ismip_var)) if ismip_var[k]==ivar]
        # NUMERIC TESTS
            print(ivar,': NUMERIC Tests')
            # check the unit
            if ds[ivar].attrs['units'] == ismip_meta[var_index[0]]['units']:
                print(' - The unit is correct:',ds[ivar].attrs['units'])

            else:
                print(' - ERROR: The current variable\'s unit is',ds[ivar].attrs['units'],'and should be',ismip_meta[var_index[0]]['units'])
                errors += 1 
            # check the min value
            if ds[ivar].min()>ismip_meta[var_index[0]]['min_value']:
                print(' - The minimum value successfully verified.')
            else:
                print(' - ERROR: The minimum value(', round(ds[ivar].min().values.item(0),5),') is out of range.')
                errors = errors + 1 
            # check the max value
            if ds[ivar].max()>ismip_meta[var_index[0]]['max_value']:
                    print(' - The maximum value successfully verified.')
            else:
                print(' - ERROR: The maximum value(', round(ds[ivar].max().values.item(0),5),') is out of range.')
                errors = errors + 1

        # SPATIAL TESTS
            # SPATIAL:Check spatial extent of the grid
            print(ivar,': SPATIAL Tests')
            if region == 'AIS':
                # grid_exten[Xbottomleft,Ybottomleft,Xtopright,Ytopright]
                grid_extent = [-3040000,-3040000,3040000,3040000]
                possible_resolution = [1,2,4,8,16,32] 
            else: 
                # GIS Grid
                grid_extent = [-720000,-3450000,960000,-570000]
                possible_resolution = [1,2,4,5,10,20]

            # get the grid from the file
            coords = ds.coords.to_dataset()
            Xbottomleft=int(min(coords['x']).values.item())
            Ybottomleft=int(min(coords['y']).values.item())
            Xtopright=int(max(coords['x']).values.item())
            Ytopright=int(max(coords['y']).values.item())

            if Xbottomleft == grid_extent[0] & Ybottomleft == grid_extent[1]:
                print(' - Grid: Lowest left corner is well defined.')
            else:    
                print(' - ERROR: Lowest left corner of the grid is not well defined. [-3040000,-3040000] Expected')
                errors = errors + 1
            if Xtopright == grid_extent[2] & Ytopright == grid_extent[3]:
                print(' - Grid: Upper right corner is well defined.')
            else:    
                print(' - ERROR: Upper rigth corner of the grid is not well defined. [3040000,3040000] Expected')
                errors = errors + 1

            #SPATIAL:check the spatial resolution
            Xresolution = (coords['x'][1].values-coords['x'][0].values)/1000
            Yresolution = (coords['y'][1].values-coords['y'][0].values)/1000
            if Xresolution in set(possible_resolution) and Yresolution in set(possible_resolution):
                if Xresolution == grid_resolution and Yresolution == grid_resolution:
                    print(' - The spatial resolution (grid size) was successfully verified.')
                else:
                    print(' - ERROR: The grid resolution ( ', Xresolution,'or',Yresolution,') is different of ',grid_resolution,'declared in the file name.')
                    error = error + 1
            else:
                print(' - Error: x:',Xresolution,',y:',Yresolution,' is not an authorized grid resolution.')
                error = error + 1

        # TIME TESTS
            print(ivar,': TIME Tests')
            start_exp = pd.to_datetime(min(ds['time']).values)
            end_exp  = pd.to_datetime(max(ds['time']).values)
            avgyear = 365.2425        # pedants definition of a year length with leap years
            duration_days = (end_exp - start_exp)
            duration_years = round(pd.to_numeric(duration_days.days / avgyear))

            index_exp=[dic['experiment'] for dic in experiments].index(experiment_name)
            # test starting date
            if experiments[index_exp]['startinf'] <= start_exp <= experiments[index_exp]['startsup']:
                print(" - Experiment starts correctly on", start_exp.strftime('%Y-%m-%d'),'.')
            else:
                print(' - ERROR: the experiment starts the',start_exp.strftime('%Y-%m-%d'),'. The date should be comprised between',experiments[index_exp]['startinf'].strftime('%Y-%m-%d'),'and',experiments[index_exp]['startsup'].strftime('%Y-%m-%d'))
            #test ending date
            if experiments[index_exp]['endinf'] <= end_exp <= experiments[index_exp]['endsup']:
                print(" - Experiment ends correctly on", end_exp.strftime('%Y-%m-%d'),'.')
            else:
                print(' - ERROR: the experiment ends on',end_exp.strftime('%Y-%m-%d'),'. The date should be comprised between',experiments[index_exp]['endinf'].strftime('%Y-%m-%d'),'and',experiments[index_exp]['endsup'].strftime('%Y-%m-%d'))
            
            # Duration
            if experiments[index_exp]['duration']-1 <= duration_years <= experiments[index_exp]['duration']:
                print(" - Experiment lasts", duration_years,'years.')
            else:
                print(' - ERROR: the experiment lasts', duration_years,'years. The duration should be ',experiments[index_exp]['duration'],'years')
        else:
            print('!! ',ivar,'isn\'t a known variable name. Its verification has been ignored.')
    print('\n','************************************************************')
    print(file_name)
    if errors > 0:
        print(errors,'critical error(s). Please review before sharing.')
    else:
        print('no errors.')
    if warnings > 0:
        print(warnings,'warning(s). Please review before sharing.')
    else:
        print('no warnings.')
    print('************************************************************')

FILE: dlithkdt_AIS_IMAU_IMAUICE1_ctrl.nc 

NAMING Tests
 - Studied Region: AIS
VARIABLE Tests
** dlithkdt is a valid name.
dlithkdt : NUMERIC Tests
 - ERROR: The current variable's unit is kg m-2 s-1 and should be m s-1
 - ERROR: The minimum value( -0.0 ) is out of range.
 - ERROR: The maximum value( 0.0 ) is out of range.
dlithkdt : SPATIAL Tests
 - Grid: Lowest left corner is well defined.
 - Grid: Upper right corner is well defined.
 - The spatial resolution (grid size) was successfully verified.
dlithkdt : TIME Tests
 - ERROR: the experiment starts the 1979-07-01 . The date should be comprised between 1980-01-01 and 1980-01-01
 - Experiment ends correctly on 2100-07-01 .
 - ERROR: the experiment lasts 121 years. The duration should be  120 years
!!  mapping isn't a known variable name. Its verification has been ignored.
!!  time_bounds isn't a known variable name. Its verification has been ignored.

 ************************************************************
dlithkdt_AIS_IMAU_IMA