In [10]:
import os
import xarray as xr
import pandas as pd
import datetime
import numpy as np

### Load ISMIP Variables

In [11]:
# load csv :
ismip  = pd.read_csv('/mnt/d/1_protect/0_sanity_check/ISMIP6/ismip_compliance_check.csv',delimiter=';',decimal=",")
ismip_meta = ismip.to_dict('records')
# get the list of variables
ismip_var = [dic['variable'] for dic in ismip_meta]
# get the mandatory variables
ismip_mandatory_var = ismip['variable'][ismip.mandatory==1].tolist()

ismip.head()

Unnamed: 0,variable,dim,type,variable_name,standard_name,alias_name,units,mandatory,min_value_ais,max_value_ais,min_value_gis,max_value_gis
0,acabf,x-y-t,FL,Surface mass balance flux,land_ice_surface_specific_mass_balance_flux,,kg m-2 s-1,1,-0.0001,0.001,-0.0001,0.001
1,base,x-y-t,ST,Base elevation,base_altitude,,m,1,-3000.0,4000.0,-3000.0,4000.0
2,dlithkdt,x-y-t,FL,Ice thickness imbalance,tendency_of_land_ice_thickness,,m s-1,0,-5.5e-08,5.5e-08,-5.5e-08,5.5e-08
3,hfgeoubed,x-y-t,FL,Geothermal heat flux,upward_geothermal_heat_flux_in_land_ice,upward_geothermal_heat_flux_at_ground_level,W m-2,0,0.0,0.3,0.0,0.3
4,libmassbffl,x-y-t,FL,Basal mass balance flux beneath floating ice,land_ice_basal_specific_mass_balance_flux,,kg m-2 s-1,1,-0.004,0.001,-0.004,0.001


### Experiments list

In [12]:
experiments_ismip6 =[{'experiment':'hist', 'startinf':datetime.datetime(1979, 6, 30),'startsup':datetime.datetime(1980, 1, 1),'endinf':datetime.datetime(2014, 6, 30),'endsup':datetime.datetime(2015, 1, 1),'duration':35},
                  {'experiment':'ctrl', 'startinf':datetime.datetime(1979, 6, 30),'startsup':datetime.datetime(1980, 1, 1),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':120},
                  {'experiment':'ctrl_proj', 'startinf':datetime.datetime(2015, 1, 2),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,7,1),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
                  {'experiment':'exp01', 'startinf':datetime.datetime(2015, 1, 2),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
                  {'experiment':'exp02', 'startinf':datetime.datetime(2015, 1, 2),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
                  {'experiment':'exp03', 'startinf':datetime.datetime(2015, 1, 2),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
                  {'experiment':'exp04', 'startinf':datetime.datetime(2015, 1, 2),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
                  {'experiment':'exp05', 'startinf':datetime.datetime(2015, 1, 2),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
                  {'experiment':'exp06', 'startinf':datetime.datetime(2015, 1, 2),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
                  {'experiment':'exp07', 'startinf':datetime.datetime(2015, 1, 2),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
                  {'experiment':'exp08', 'startinf':datetime.datetime(2015, 1, 2),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
                  {'experiment':'exp09', 'startinf':datetime.datetime(2015, 1, 2),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
                  {'experiment':'exp10', 'startinf':datetime.datetime(2015, 1, 2),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
                  {'experiment':'exp11', 'startinf':datetime.datetime(2015, 1, 2),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
                  {'experiment':'exp12', 'startinf':datetime.datetime(2015, 1, 2),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86},
                  {'experiment':'exp13', 'startinf':datetime.datetime(2015, 1, 2),'startsup':datetime.datetime(2016, 1, 2),'endinf':datetime.datetime(2100,6,30),'endsup':datetime.datetime(2101, 1, 1),'duration':86}
]
experiments = experiments_ismip6

### Data directory

In [34]:
source_path = '/mnt/d/1_protect/0_sanity_check/IMAU/IMAUICE1'
model = os.path.basename(source_path)

In [14]:
def list_files(path):
    for root,dirs,files in os.walk(path):
        level = root.replace(path,'').count(os.sep)
        indent = ' ' * 4 *(level)
        print(f'{indent}{os.path.basename(root)}/')
        subindent = '  '*4*(level+1)
        for f in files:
            print(f'{subindent}{f}')

def files_and_subdirectories(path):
    files = []
    directories = []
    for f in os.listdir(path):
        if os.path.isfile(os.path.join(path, f)):
             files.append(f)
        elif os.path.isdir(os.path.join(path, f)):
            directories.append(f)
    return directories, files

def italic(txt):
    return f"\033[3m{txt}\033[0m"


### Experiment Analysis
 - verification of the experiments name (i.e. subfolders)
 - verification of the mandatory variables for each experiment.

In [35]:
experiment_directories,files = files_and_subdirectories(source_path)

print('Model:',model)
for xp in experiment_directories:

    exp_dir,exp_files = files_and_subdirectories(os.path.join(source_path, xp))

    for i in exp_files:
        file_name_split = i.split('_')
        variable = file_name_split[0]
        temp_ismip_mandatory_var = ismip_mandatory_var
        if  variable in ismip_mandatory_var:
            temp_ismip_mandatory_var.remove(variable)
        
    if not temp_ismip_mandatory_var:
        print(xp,':all mandatory variables exist')
    else:
        print('ERROR:',xp,': the following mandatory variable(s) is (are) missing :',temp_ismip_mandatory_var)


Model: IMAUICE1
ERROR: abmb_32 : the following mandatory variable(s) is (are) missing : ['strbasemag']
ERROR: asmb_32 : the following mandatory variable(s) is (are) missing : ['strbasemag']
ERROR: ctrl_32 : the following mandatory variable(s) is (are) missing : ['strbasemag']
ERROR: ctrl_proj_std_32 : the following mandatory variable(s) is (are) missing : ['strbasemag']
ERROR: exp05_32 : the following mandatory variable(s) is (are) missing : ['strbasemag']
ERROR: exp06_32 : the following mandatory variable(s) is (are) missing : ['strbasemag']
ERROR: exp07_32 : the following mandatory variable(s) is (are) missing : ['strbasemag']
ERROR: exp08_32 : the following mandatory variable(s) is (are) missing : ['strbasemag']
ERROR: exp09_32 : the following mandatory variable(s) is (are) missing : ['strbasemag']
ERROR: exp10_32 : the following mandatory variable(s) is (are) missing : ['strbasemag']
ERROR: exp11_32 : the following mandatory variable(s) is (are) missing : ['strbasemag']
ERROR: exp1

### Compliance Check - single file

In [61]:

#file = '/mnt/d/1_protect/0_sanity_check/IMAU/IMAUICE1/hist_32/dlithkdt_AIS_IMAU_IMAUICE1_hist_std.nc'
#file = '/mnt/d/1_protect/0_sanity_check/IMAU/IMAUICE1/ctrl_proj_std_32/libmassbfgr_AIS_IMAU_IMAUICE1_ctrl_proj_std.nc'
#file = '/mnt/d/1_protect/0_sanity_check/IMAU/IMAUICE1/ctrl_32/dlithkdt_AIS_IMAU_IMAUICE1_ctrl.nc'
#file = '/mnt/d/1_protect/0_sanity_check/IMAU/IMAUICE1/exp10_32/ligroundf_AIS_IMAU_IMAUICE1_exp10.nc'
file = '/mnt/d/1_protect/0_sanity_check/IMAU/IMAUICE1/exp10_32/orog_AIS_IMAU_IMAUICE1_exp10.nc'


split_path=os.path.normpath(file).split(os.sep)
print(split_path)
file_name = split_path[-1]
file_name_split = file_name.split('_')
print(file_name_split)

considered_variable = file_name_split[0]
region = file_name_split[1]
group  = file_name_split[2]
model = file_name_split[3]
file_extention = file_name_split[len(file_name_split)-1][-2:]
experiment_name = split_path[-2][:-3]
grid_resolution = int(split_path[-2][-2:])

print('resolution:',grid_resolution)

print('file_name:',file_name)
print('region:',region)
print('group:',group)
print('file_extention:',file_extention)
print('experiment_name:',experiment_name)


['', 'mnt', 'd', '1_protect', '0_sanity_check', 'IMAU', 'IMAUICE1', 'exp10_32', 'orog_AIS_IMAU_IMAUICE1_exp10.nc']
['orog', 'AIS', 'IMAU', 'IMAUICE1', 'exp10.nc']
resolution: 32
file_name: orog_AIS_IMAU_IMAUICE1_exp10.nc
region: AIS
group: IMAU
file_extention: nc
experiment_name: exp10


In [62]:
# Load netcdf file
ds = xr.open_dataset(file)
np.isnan(ds['orog'].values)

array([[[False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        ...,
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False]],

       [[False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        ...,
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False]],

       [[False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, False, False],
        ...,
        [False, False, False, ..., False, False, False],
        [False, False, False, ..., False, Fal

In [50]:

# Load variables included in file
file_variables = list(ds.data_vars)

#initialise counters
errors = 0
warnings = 0

# test  experiement naming
if experiment_name  in [dic['experiment'] for dic in experiments]:
    print('Experiment:',experiment_name,'.')
    if file_extention != 'nc':
        print(' - !! ',file_name,' is not a NETCDF file. The compliance check is ignored.','\n')
    else: 
        # test  file naming
        print('FILE:',file_name,'\n')
        if region.upper() in ['AIS', 'GIS']:
            print('Studied Region:',region)
            if region == 'AIS':
                # grid_exten[Xbottomleft,Ybottomleft,Xtopright,Ytopright]
                grid_extent = [-3040000,-3040000,3040000,3040000]
                possible_resolution = [1,2,4,8,16,32] 
            else: 
                # GIS Grid
                grid_extent = [-720000,-3450000,960000,-570000]
                possible_resolution = [1,2,4,5,10,20]


            for ivar in file_variables:
                if ivar in ismip_var:
                    print('Tested VARIABLE:',ivar)
                    # get index in the ismip_var list
                    var_index = [k for k in range(len(ismip_var)) if ismip_var[k]==ivar]
                # NUMERICAL TESTS
                    print('NUMERICAL Tests')
                    # check the unit
                    if ds[ivar].attrs['units'] == ismip_meta[var_index[0]]['units']:
                        print(' - The unit is correct:',ds[ivar].attrs['units'])
                    else:
                        print(' - ERROR: The unit of the variable is',ds[ivar].attrs['units'],'and should be',ismip_meta[var_index[0]]['units'])
                        errors += 1 
                    # check the min value
                    if ds[ivar].min().item()>=ismip_meta[var_index[0]]['min_value_'+region.lower()]:
                        print(' - The minimum value successfully verified.')
                    else:
                        print(' - ERROR: The minimum value(', ds[ivar].min().values.item(0),') is out of range. Min value accepted :',ismip_meta[var_index[0]]['min_value_'+region.lower()])
                        errors += 1 
                    # check the max value
                    if ds[ivar].max().item()<=ismip_meta[var_index[0]]['max_value_'+region.lower()]:
                            print(' - The maximum value successfully verified.')
                    else:
                        print(' - ERROR: The maximum value(', ds[ivar].max().values.item(0),') is out of range.Min value accepted :',ismip_meta[var_index[0]]['max_value_'+region.lower()])
                        errors += 1
                    # check NAN
                    

                # SPATIAL TESTS
                    # SPATIAL:Check spatial extent of the grid
                    print('SPATIAL Tests')
                    # get the grid from the file
                    coords = ds.coords.to_dataset()
                    Xbottomleft=int(min(coords['x']).values.item())
                    Ybottomleft=int(min(coords['y']).values.item())
                    Xtopright=int(max(coords['x']).values.item())
                    Ytopright=int(max(coords['y']).values.item())

                    if Xbottomleft == grid_extent[0] & Ybottomleft == grid_extent[1]:
                        print(' - Grid: Lowest left corner is well defined.')
                    else:    
                        print(' - ERROR: Lowest left corner of the grid [',Xbottomleft,',',Ybottomleft,'] is not well defined. [',grid_extent[0],',',grid_extent[1],'] Expected')
                        errors += 1
                    if Xtopright == grid_extent[2] & Ytopright == grid_extent[3]:
                        print(' - Grid: Upper right corner is well defined.')
                    else:    
                        print(' - ERROR: Upper rigth corner of the grid [',Xtopright,',',Ytopright,']is not well defined. [',grid_extent[0],',',grid_extent[1],'] Expected')
                        errors += 1

                    #SPATIAL:check the spatial resolution
                    Xresolution = round((coords['x'][1].values-coords['x'][0].values)/1000,0)
                    Yresolution = round((coords['y'][1].values-coords['y'][0].values)/1000,0)
                    if Xresolution in set(possible_resolution) and Yresolution in set(possible_resolution):
                        if Xresolution == grid_resolution and Yresolution == grid_resolution:
                            print(' - The grid resolution (',Xresolution,') was successfully verified.')
                        else:
                            print(' - ERROR: The grid resolution ( ', Xresolution,'or',Yresolution,') is different of ',grid_resolution,'declared in the file name.')
                            errors += 1
                    else:
                        print(' - Error: x:',Xresolution,',y:',Yresolution,' is not an authorized grid resolution.')
                        errors += 1

                # TIME TESTS
                    print('TIME Tests')
                    start_exp = pd.to_datetime(min(ds['time']).values)
                    end_exp  = pd.to_datetime(max(ds['time']).values)
                    avgyear = 365.2425        # pedants definition of a year length with leap years
                    duration_days = (end_exp - start_exp)
                    duration_years = round(pd.to_numeric(duration_days.days / avgyear))

                    index_exp=[dic['experiment'] for dic in experiments].index(experiment_name)
                    # test starting date
                    if experiments[index_exp]['startinf'] <= start_exp <= experiments[index_exp]['startsup']:
                        print(" - Experiment starts correctly on", start_exp.strftime('%Y-%m-%d'),'.')
                    else:
                        print(' - ERROR: the experiment starts the',start_exp.strftime('%Y-%m-%d'),'. The date should be comprised between',experiments[index_exp]['startinf'].strftime('%Y-%m-%d'),'and',experiments[index_exp]['startsup'].strftime('%Y-%m-%d'))
                        errors += 1
                    #test ending date
                    if experiments[index_exp]['endinf'] <= end_exp <= experiments[index_exp]['endsup']:
                        print(" - Experiment ends correctly on", end_exp.strftime('%Y-%m-%d'),'.')
                    else:
                        print(' - ERROR: the experiment ends on',end_exp.strftime('%Y-%m-%d'),'. The date should be comprised between',experiments[index_exp]['endinf'].strftime('%Y-%m-%d'),'and',experiments[index_exp]['endsup'].strftime('%Y-%m-%d'))
                        errors += 1
                    # Duration
                    if experiments[index_exp]['duration']-1 <= duration_years <= experiments[index_exp]['duration']:
                        print(" - Experiment lasts", duration_years,'years.')
                    else:
                        print(' - ERROR: the experiment lasts', duration_years,'years. The duration should be',experiments[index_exp]['duration'],'years')
                        errors += 1
                    #Time step
                    time_step = (ds['time'].values[11]-ds['time'].values[10]).astype('timedelta64[D]').item()

                    if 360<time_step.days<367:
                        print(' - Time step:',time_step.days,'days','\n')
                    else:
                      print(' - ERROR: the time step(',time_step.days,') should be comprised between [360,367].')
                      errors += 1

        else: 
            print('- ERROR: Region ',region,'not recognized. It should be AIS or GIS. The compliance check has been interrupted.')
            errors += 1
else:
    print('WARNING: Experiment',italic(experiment_name),'is not in [hist, ctrl, ctrl_proj, exp01, exp02, exp03, exp04, exp05, exp06, exp07, exp08, exp09, exp10, exp11, exp12, exp13]. Compliance check is ignored.')
    warnings +=1

print('************************************************************')
print(experiment_name,' - ',file_name)
if errors > 0:
    print(errors,'critical error(s). Please review before sharing.')
else:
    print('No errors. Good job.')
if warnings > 0:
    print(warnings,'warning(s). Please review before sharing.')
else:
    print('No warnings.')
print('************************************************************')
    
        

Experiment: exp10 .
FILE: ligroundf_AIS_IMAU_IMAUICE1_exp10.nc 

Studied Region: AIS
Tested VARIABLE: ligroundf
NUMERICAL Tests
 - The unit is correct: kg m-2 s-1
 - The minimum value successfully verified.
 - The maximum value successfully verified.
SPATIAL Tests
 - Grid: Lowest left corner is well defined.
 - Grid: Upper right corner is well defined.
 - The grid resolution ( 32.0 ) was successfully verified.
TIME Tests
 - Experiment starts correctly on 2015-07-01 .
 - Experiment ends correctly on 2100-07-01 .
 - Experiment lasts 85 years.
 - Time step: 365 days 

************************************************************
exp10  -  ligroundf_AIS_IMAU_IMAUICE1_exp10.nc
No errors. Good job.
************************************************************
