In [1]:
import json
import glob
import os
from collections import defaultdict

In [2]:
DATA_REQUEST_JSON = "data_request_1.2.1.json"
CMIP6_TABLE_DIR = os.path.expanduser("~cdds/etc/mip_tables/CMIP6/01.00.31/")

In [3]:
with open(DATA_REQUEST_JSON) as fh:
    data_request_info = json.load(fh)

In [4]:
data_request_info['Compound Name']['Amon.cct']

{'frequency': 'mon',
 'modeling_realm': 'atmos',
 'standard_name': 'air_pressure_at_convective_cloud_top',
 'units': 'Pa',
 'cell_methods': 'area: time: mean where convective_cloud (weighted by total convective cloud area)',
 'cell_measures': 'area: areacella',
 'long_name': 'Air Pressure at Convective Cloud Top',
 'comment': 'Where convective cloud is present in the grid cell, the instantaneous cloud top altitude should be that of the top of the highest level containing convective cloud. Missing data should be reported in the absence of convective cloud. The time mean should be calculated from these quantities averaging over occasions when convective cloud is present only, and should contain missing data for occasions when no convective cloud is present during the meaning period.',
 'dimensions': 'longitude latitude time',
 'out_name': 'cct',
 'type': 'real',
 'positive': '',
 'spatial_shape': 'XY-na',
 'temporal_shape': 'time-intv',
 'table': 'Amon',
 'cmip7_compound_name': 'atmos.mo

In [5]:
CMIP6_TABLES = []
for i,filename in enumerate(glob.glob(CMIP6_TABLE_DIR + "/*.json")):
    if not any([j in filename for j in ['grids', 'input_example', 'formula_terms', 'CV', 'coordinate']]):
        CMIP6_TABLES.append(filename)

In [6]:
def load_cmip6_mip_tables():
    cmip6_variable_definitions = defaultdict(dict)
    
    for filename in CMIP6_TABLES:
        with open(filename) as fh:
            data = json.load(fh)
        table_id = data['Header']['table_id'].split()[-1]
        for variable, description in data['variable_entry'].items():
            cmip6_variable_definitions[table_id][variable] = description
    return cmip6_variable_definitions

cmip6_variable_definitions = load_cmip6_mip_tables()

In [7]:
# These variables have been renamed
# CMIP7 "compound name" : "CMIP6 equivalent"
KNOWN_REMAPPINGS = {
    "3hrPt.uas": "3hr.uas",
    "3hrPt.vas": "3hr.vas",
    "6hrPlevPt.zg": "6hrPlevPt.zg500",
    "AERmon.od443aer": "AERmon.od440aer",
    "AERmon.od865aer": "AERmon.od870.aer",
    "Amon.co2massCLim": "Amon.co2massClim",
    "fx.lat": "CFsubhr.latitude",
    "fx.lon": "CFsubhr.longitude",
    "Omon.hfxint": "Omon.hfx",
    "Omon.hfyint": "Omon.hfy",
    "SImon.sidmassgrowthsi": "SImon.sidmasssi",
    "SImon.sidmassmeltlat": "SImon.sidmasslat",
    "SImon.sifllattop": "SImon.sifllatstop",
    "SImon.siflsensbot": "SImon.siflsensupbot",
    "SImon.simpthick": "SImon.simpmass",
    "SImon.sishearvel": "SImon.sishevel",
    "SImon.sisndmassdyn": "SImon.sndmasdyn",
    "SImon.sisndmassmelt": "SImon.sndmassmelt",
    "SImon.sisndmasssi": "SImon.sndmasssi",
    "SImon.sisndmasssnf": "SImon.sndmasssnf",
    "SImon.sisndmasssubl": "SImon.sndmasssubl",
    "SImon.sisndmasswind": "SImon.sndmasswind",
    "SImon.sisnmassacrossline": "SImon.snmassacrossline",
    "SImon.sistressave": "SImon.sistresave",
    "SImon.sistressmax": "SImon.sistremax",
}
ELEMENTS_TO_REMOVE = ['spatial_shape', 'temporal_shape', 'out_name', 'type', 'uid']

In [8]:
for compound_name, entry in data_request_info['Compound Name'].items():
    for i in ELEMENTS_TO_REMOVE:
        try:
            del entry[i]
        except KeyError:
            pass
    if compound_name in KNOWN_REMAPPINGS:
        cmip6_counterpart = KNOWN_REMAPPINGS[compound_name].split(".")
        entry['remap'] = KNOWN_REMAPPINGS[compound_name]
    else:
        cmip6_counterpart = compound_name.split(".")
        entry['remap'] = ''
    
    try:
        cmip6_entry = cmip6_variable_definitions[cmip6_counterpart[0]][cmip6_counterpart[1]]
    except KeyError:
        entry['cmip6_differences'] = 'Not present'
        continue
    
    differences = []
    for k in cmip6_entry:
        
        if k in entry:
            if not cmip6_entry[k] == entry[k]:
                if compound_name == 'Amon.cct':
                    print(differences)
                differences.append('"{}" was "{}"\n'.format(k,cmip6_entry[k]))
    entry['cmip6_differences'] = "\n".join(set(differences))
    

[]


In [9]:
data_request_info['Compound Name']['Amon.cct']

{'frequency': 'mon',
 'modeling_realm': 'atmos',
 'standard_name': 'air_pressure_at_convective_cloud_top',
 'units': 'Pa',
 'cell_methods': 'area: time: mean where convective_cloud (weighted by total convective cloud area)',
 'cell_measures': 'area: areacella',
 'long_name': 'Air Pressure at Convective Cloud Top',
 'comment': 'Where convective cloud is present in the grid cell, the instantaneous cloud top altitude should be that of the top of the highest level containing convective cloud. Missing data should be reported in the absence of convective cloud. The time mean should be calculated from these quantities averaging over occasions when convective cloud is present only, and should contain missing data for occasions when no convective cloud is present during the meaning period.',
 'dimensions': 'longitude latitude time',
 'positive': '',
 'table': 'Amon',
 'cmip7_compound_name': 'atmos.mon.cct_tavg-u-hxy-ccl',
 'processing_note': 'CHANGE SINCE CMIP6 in Cell Methods - CMIP6:area: tim

In [10]:
with open('01_output.json', 'w') as fh:
    json.dump(data_request_info, fh, indent=2, sort_keys=True)