In [1]:
import json
from collections import defaultdict
import os
import sys


In [2]:
with open('01_output.json') as fh:
    data_request_info = json.load(fh)

In [3]:
with open('HadGEM3_mappings_table.json') as fh:
    HadGEM3_mapping_table = json.load(fh)

HadGEM3_mapping_table[1]

['huss',
 '3hr',
 'm01s03i237[lbproc=0]',
 ['-', '-'],
 'longitude latitude height2m time1',
 '1',
 '-',
 'ok',
 ['huss', 'HadGEM3_3hr_mappings.cfg']]

In [4]:
with open('UKESM1_mappings_table.json') as fh:
    UKESM1_mapping_table = json.load(fh)

In [5]:
data_request_info['Compound Name']['3hr.ps']

{'cell_measures': 'area: areacella',
 'cell_methods': 'area: mean time: point',
 'cmip6_differences': '"frequency" was "3hrPt"\n\n"comment" was "surface pressure (not mean sea-level pressure), 2-D field to calculate the 3-D pressure field from hybrid coordinates"\n',
 'cmip7_compound_name': 'atmos.3hr.ps_tpt-u-hxy-u',
 'comment': 'sampled synoptically to diagnose atmospheric tides, this is better than mean sea level pressure.',
 'dimensions': 'longitude latitude time1',
 'frequency': '3hr',
 'long_name': 'Surface Air Pressure',
 'modeling_realm': 'atmos',
 'positive': '',
 'processing_note': 'CHANGE SINCE CMIP6 in CMIP7 Frequency - CMIP6:3hrPt CMIP7:3hr,\n Changes since reverted/fixed. Variable now matches CMIP6 definition.',
 'remap': '',
 'standard_name': 'surface_air_pressure',
 'table': '3hr',
 'units': 'Pa',
 'variable_status': 'CMIP6 CMOR variable'}

In [6]:
def mappings_dict_from_table(table):
    mappings = {}
    for entry in table[1:]:
        mip_tables = entry[1].split()
        for mip_table in mip_tables:
            compound_name = '{}.{}'.format(mip_table, entry[0])
            comment, notes = entry[3]
            if comment == "-":
                comment = ""
            if notes == "-":
                notes = ""
            mappings[compound_name] = dict(
                expression = ' '.join(entry[2].split()),
                comment = comment,
                notes = notes,
                dimensions = entry[4],
                model_units = entry[5],
                component = entry[6],
                status = entry[7])
    return mappings

In [7]:
HadGEM3_mappings = mappings_dict_from_table(HadGEM3_mapping_table)
UKESM1_mappings = mappings_dict_from_table(UKESM1_mapping_table)


In [8]:
HadGEM3_mappings['Amon.tas']

{'expression': 'm01s03i236[lbproc=128]',
 'comment': '',
 'notes': '',
 'dimensions': 'longitude latitude height2m time',
 'model_units': 'K',
 'component': 'boundary-layer',
 'status': 'ok'}

In [9]:
def check_mapping_consistency(dr_entry, mapping):
    issues = []

    dr_dims = set(dr_entry['dimensions'].split())
    mapping_dims = set(mapping['dimensions'].split())

    if dr_dims != mapping_dims:
        
        issues.append('dimensions gained: "{}", dimensions lost: "{}"'.format(
            list(dr_dims.difference(mapping_dims)),
            list(mapping_dims.difference(dr_dims))
        ))
    
    return issues
        



for compound_name, entry in data_request_info['Compound Name'].items():
    lookup_key = compound_name
    if entry['remap'] != "":
        lookup_key = entry['remap']
    mappings_info = {}
    hg3consistency = hg3info = ukconsistency = ukinfo = None
    if lookup_key in HadGEM3_mappings:
        hg3consistency = check_mapping_consistency(entry, HadGEM3_mappings[lookup_key])
        hg3info = HadGEM3_mappings[lookup_key]
        
    if lookup_key in UKESM1_mappings:
        ukconsistency = check_mapping_consistency(entry, UKESM1_mappings[lookup_key])
        ukinfo = UKESM1_mappings[lookup_key]

    if hg3info is None and ukinfo is None:
        entry['mapping_info'] = {}
    elif hg3info is None:
        entry['mapping_info'] = {
            'model': 'UKESM1',
            'expression': ukinfo['expression'],
            'model_units': ukinfo['model_units'],
            'cmip6_status': ukinfo['status'],
            'comment': ukinfo['comment'],
            'notes': ukinfo['notes'],
            'consistency': ukconsistency,
        }
    elif ukinfo is None:
        entry['mapping_info'] = {
            'model': 'HadGEM3-GC31',
            'expression': hg3info['expression'],
            'model_units': hg3info['model_units'],
            'cmip6_status': hg3info['status'],
            'comment': hg3info['comment'],
            'notes': hg3info['notes'],
            'consistency': hg3consistency,
        }
    else:
        if ukinfo['expression'] == hg3info['expression']:
             entry['mapping_info'] = {
                'model': 'BOTH',
                'expression': ukinfo['expression'],
                'model_units': ukinfo['model_units'],
                'cmip6_status': ukinfo['status'],
                'comment': ukinfo['comment'],
                'notes': ukinfo['notes'],
                'consistency': hg3consistency+ukconsistency,
             } 
        else:
            entry['mapping_info'] = {
                'model': 'PER_MODEL',
                'expression': 'UKESM1:"{}", HadGEM3:"{}"'.format(ukinfo['expression'], hg3info['expression']),
                'model_units': ukinfo['model_units'],
                'cmip6_status': ukinfo['status'],
                'comment': ukinfo['comment'],
                'notes': ukinfo['notes'],
                'consistency': hg3consistency+ukconsistency,
             } 
            
        




    

In [10]:
data_request_info['Compound Name']['Amon.tas']

{'cell_measures': 'area: areacella',
 'cell_methods': 'area: time: mean',
 'cmip6_differences': '',
 'cmip7_compound_name': 'atmos.mon.tas_tavg-h2m-hxy-u',
 'comment': 'near-surface (usually, 2 meter) air temperature',
 'dimensions': 'longitude latitude time height2m',
 'frequency': 'mon',
 'long_name': 'Near-Surface Air Temperature',
 'modeling_realm': 'atmos',
 'positive': '',
 'processing_note': 'normally, the temperature should be reported at the 2 meter height',
 'remap': '',
 'standard_name': 'air_temperature',
 'table': 'Amon',
 'units': 'K',
 'variable_status': 'CMIP6 CMOR variable',
 'mapping_info': {'model': 'BOTH',
  'expression': 'm01s03i236[lbproc=128]',
  'model_units': 'K',
  'cmip6_status': 'ok',
  'comment': '',
  'notes': '',
  'consistency': []}}

In [11]:
with open('03_output.json', 'w') as fh:
    json.dump(data_request_info, fh, indent=2, sort_keys=True)