In [1]:
import json
from collections import defaultdict
import os
import sys


In [2]:
with open('01_output.json') as fh:
    data_request_info = json.load(fh)

In [3]:
with open('HadGEM3_mappings_table.json') as fh:
    HadGEM3_mapping_table = json.load(fh)

In [4]:
with open('UKESM1_mappings_table.json') as fh:
    UKESM1_mapping_table = json.load(fh)

In [5]:
data_request_info['Compound Name']['3hr.ps']

{'cell_measures': 'area: areacella',
 'cell_methods': 'area: mean time: point',
 'cmip6_differences': '"frequency" was "3hrPt"\n\n"comment" was "surface pressure (not mean sea-level pressure), 2-D field to calculate the 3-D pressure field from hybrid coordinates"\n',
 'cmip7_compound_name': 'atmos.3hr.ps_tpt-u-hxy-u',
 'comment': 'sampled synoptically to diagnose atmospheric tides, this is better than mean sea level pressure.',
 'dimensions': 'longitude latitude time1',
 'frequency': '3hr',
 'long_name': 'Surface Air Pressure',
 'modeling_realm': 'atmos',
 'positive': '',
 'processing_note': 'CHANGE SINCE CMIP6 in CMIP7 Frequency - CMIP6:3hrPt CMIP7:3hr,\n Changes since reverted/fixed. Variable now matches CMIP6 definition.',
 'remap': '',
 'standard_name': 'surface_air_pressure',
 'table': '3hr',
 'units': 'Pa',
 'variable_status': 'CMIP6 CMOR variable'}

In [6]:
def mappings_dict_from_table(table):
    mappings = {}
    for entry in table[1:]:
        mip_tables = entry[1].split()
        for mip_table in mip_tables:
            compound_name = '{}.{}'.format(mip_table, entry[0])
            mappings[compound_name] = dict(
                expression = ' '.join(entry[2].split()),
                notes = " ".join(';'.join(entry[3]).split()),
                dimensions = entry[4],
                model_units = entry[5],
                component = entry[6],
                status = entry[7])
    return mappings

In [7]:
HadGEM3_mappings = mappings_dict_from_table(HadGEM3_mapping_table)
UKESM1_mappings = mappings_dict_from_table(UKESM1_mapping_table)


In [8]:
with open('old_HadGEM3_mappings.json') as fh:
    old_HadGEM3_mappings = mappings_dict_from_table(json.load(fh))
with open('old_UKESM_mappings.json') as fh:
    old_UKESM1_mappings = mappings_dict_from_table(json.load(fh))

In [9]:

def check_mappings_against_old(old_mappings, mappings):

    for i in old_mappings:
        old_info = old_mappings[i]
        table = i.split(".")[0]
        if i not in data_request_info['Compound Name']:
            continue
        
        if i not in mappings:
            print("missing", i)
            continue
        new_info = mappings[i]
        for k in old_info:
            if old_info[k] != new_info[k]:
                if k == 'model_units' and old_info[k] == "-":
                    continue
                if k == 'dimensions':
                    continue
                print(i, k,)
                print("\t", old_info[k])
                print("\t", new_info[k])
            

In [10]:
check_mappings_against_old(old_HadGEM3_mappings, HadGEM3_mappings)

E1hrClimMon.rlut status
	 -
	 ok
E1hrClimMon.rlutcs status
	 -
	 ok
E1hrClimMon.rsdt status
	 -
	 ok
E1hrClimMon.rsut status
	 -
	 ok
E1hrClimMon.rsutcs status
	 -
	 ok
Eday.evspsblpot notes
	 -;Merge with common mappings when approved
	 -;-
Eday.evspsblpot component
	 -
	 land
Eday.evspsblpot status
	 embargoed
	 ok
AERday.ua10 expression
	 m01s30i201[blev=P10, lbproc=128] / m01s30i301[blev=P10, lbproc=128]
	 m01s30i201[blev=P10, lbproc=128]
AERday.ua10 notes
	 -;-
	 A division by the Heaviside function, as used in UKESM1-0-LL has been removed from the mapping used to calculate this variable. As the 10 hPa level is well above the surface everywhere this should have no impact on the scientific content of this variable;-
missing SImon.siflsaltbot


In [11]:
check_mappings_against_old(old_UKESM1_mappings, UKESM1_mappings)

E1hrClimMon.rlut status
	 -
	 ok
E1hrClimMon.rlutcs status
	 -
	 ok
E1hrClimMon.rsdt status
	 -
	 ok
E1hrClimMon.rsut status
	 -
	 ok
E1hrClimMon.rsutcs status
	 -
	 ok
Eday.evspsblpot notes
	 -;Merge with common mappings when approved
	 -;-
Eday.evspsblpot component
	 -
	 land
Eday.evspsblpot status
	 embargoed
	 ok


In [12]:
def check_mapping_consistency(dr_entry, mapping):
    issues = []
    fields_to_check = ['dimensions']    
    for f in fields_to_check:
        if mapping[f] != dr_entry[f]:
            issues.append(f)
    return issues
        



for compound_name, entry in data_request_info['Compound Name'].items():
    lookup_key = compound_name
    if entry['remap'] != "":
        lookup_key = entry['remap']
    entry['mappings'] = {}
    if lookup_key in HadGEM3_mappings:
        consistency = check_mapping_consistency(entry, HadGEM3_mappings[lookup_key])
        entry['mappings']['HadGEM3-GC31'] = HadGEM3_mappings[lookup_key]
        entry['mappings']['HadGEM3-GC31']['issues'] = consistency
    if lookup_key in UKESM1_mappings:
        consistency = check_mapping_consistency(entry, UKESM1_mappings[lookup_key])
        entry['mappings']['UKESM1'] = UKESM1_mappings[lookup_key]
        entry['mappings']['UKESM1']['issues'] = consistency




    

In [13]:
data_request_info['Compound Name']['Lmon.baresoilFrac']

{'cell_measures': 'area: areacella',
 'cell_methods': 'area: time: mean',
 'cmip6_differences': '"cell_methods" was "area: mean where land over all_area_types time: mean"\n',
 'cmip7_compound_name': 'land.mon.baresoilFrac_tavg-u-hxy-u',
 'comment': 'Percentage of entire grid cell  that is covered by bare soil.',
 'dimensions': 'longitude latitude time typebare',
 'frequency': 'mon',
 'long_name': 'Bare Soil Percentage Area Coverage',
 'modeling_realm': 'land',
 'positive': '',
 'processing_note': 'Note that if this variable is independent of time, it should be stored only for a single time (user choice).\nCHANGE SINCE CMIP6 in Dimensions - CMIP6:longitude latitude time typebare CMIP7:longitude latitude time  typebare,\n \nCHANGE SINCE CMIP6 in Cell Methods - CMIP6:area: mean where land over all_area_types time: mean CMIP7:area: time: mean,',
 'remap': '',
 'standard_name': 'area_fraction',
 'table': 'Lmon',
 'units': '%',
 'variable_status': 'CMIP6 CMOR variable',
 'mappings': {'HadGEM

In [14]:
ignore = ['notes']

for key, entry in data_request_info['Compound Name'].items():
    try:
        if all([i in entry['mappings'] for i in ['HadGEM3-GC31', 'UKESM1']]):
            if entry['mappings']['HadGEM3-GC31'] != entry['mappings']['UKESM1']:
                uk =  entry['mappings']['UKESM1']
                for j,v  in entry['mappings']['HadGEM3-GC31'].items():
                    if v != uk[j] and j not in ignore:
                        print(key, j, "\n\t", v, "\n\t",uk[j])
                
    except KeyError:
        pass


6hrLev.ec550aer dimensions 
	 longitude latitude alevel lambda550nm time1 
	 longitude latitude alevel lambda500nm time1
AERday.ua10 expression 
	 m01s30i201[blev=P10, lbproc=128] 
	 m01s30i201[blev=P10, lbproc=128] / m01s30i301[blev=P10, lbproc=128]
Emon.treeFracBdlDcd expression 
	 land_class_area(m01s03i317[lbproc=128], m01s03i395[lbproc=128], land_class='broadLeafTreeDeciduous') 
	 land_class_area(m01s19i013[lbtim_ia=240,lbproc=128], m01s03i395[lbproc=128], land_class='broadLeafTreeDeciduous')
Emon.treeFracNdlEvg expression 
	 land_class_area(m01s03i317[lbproc=128], m01s03i395[lbproc=128], land_class='needleLeafTreeEvergreen') 
	 land_class_area(m01s19i013[lbtim_ia=240,lbproc=128], m01s03i395[lbproc=128], land_class='needleLeafTreeEvergreen')
Emon.vegFrac expression 
	 land_class_area(m01s03i317[lbproc=128], m01s03i395[lbproc=128], land_class='natural') 
	 land_class_area(m01s19i013[lbtim_ia=240,lbproc=128], m01s03i395[lbproc=128], land_class='veg')
LImon.acabfIs expression 
	 land

In [15]:
[i for i in HadGEM3_mappings if 'tas' in i]


['3hr.tas',
 'AERhr.tas',
 'APday.tasmax',
 'APday.tasmin',
 'CFsubhr.tas',
 'GCAmon6hr.tas',
 'LImon.tasIs',
 'day.tasmax',
 'day.tasmin',
 '6hrPlev.tas',
 'Amon.tas',
 'day.tas',
 'CresAERday.tas',
 'Cres1HrMn.tas',
 'AP6hr.tas',
 'APday.tas',
 'APmon.tas',
 'mon.tas',
 '1hr.tas',
 'Amon.tasmax',
 'Amon.tasmin']