# RECMORISATION 

Conda environment created with cmor v3.13 a recent version of iris and ipykernel to allow interface with Jupyterhub

In [1]:
import cmor  # used for writing files
import iris  # used for reading files -- netCDF4 or xarray could be used here based on preference
import json
import os
import warnings 

### CMOR input json

In [2]:
DATASET_INFO = {
    "_AXIS_ENTRY_FILE": "tables/CMIP7_coordinate.json",
    "_FORMULA_VAR_FILE": "tables/CMIP7_formula_terms.json",
    "_cmip7_option": 1,
    "_controlled_vocabulary_file": "test/esgvoc-integration-cmor-cvs-table.json",
    "activity_id": "CMIP",
    "branch_method": "standard",
    "branch_time_in_child": 30.0,
    "branch_time_in_parent": 10800.0,
    "calendar": "360_day",
    "drs_specs": "MIP-DRS7",
    "experiment_id": "1pctCO2",
    "forcing_index": "f3",
    "grid": "N96",
    "grid_label": "g99",
    "initialization_index": "i1",
    "institution_id": "MOHC",
    "license_id": "CC-BY-4-0",
    "nominal_resolution": "100-km",
    "outpath": ".",
    "parent_mip_era": "CMIP7",
    "parent_time_units": "days since 1850-01-01",
    "parent_activity_id": "CMIP",
    "parent_source_id": "UKESM1-0-LL",
    "parent_experiment_id": "piControl",
    "parent_variant_label": "r1i1p1f3",
    "physics_index": "p1",
    "realization_index": "r1",
    "source_id": "UKESM1-0-LL",
    "tracking_prefix": "hdl:21.14107",
    "host_collection": "CMIP7",
    "frequency": "mon",
    "region": "glb",
    "archive_id": "WCRP",
    "mip_era": "CMIP7",
}

with open('input.json', 'w') as fh:
    json.dump(DATASET_INFO, fh, indent=2)

### Get hold of data

In [3]:
LOCAL_CMIP6_ROOT = '/badc/cmip6/data/'  # will be different in other places


# Load all existing monthly tas data as a single iris cube;

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    cubelist = iris.load(LOCAL_CMIP6_ROOT + 'CMIP6/CMIP/MOHC/UKESM1-0-LL/1pctCO2/r1i1p1f2/Amon/tas/gn/v20190406/tas*.nc')

for i in cubelist:
    i.attributes = {}

tas_cube = cubelist.concatenate_cube()

# load all existing monthly pr data as a single iris cube
with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    cubelist = iris.load(LOCAL_CMIP6_ROOT + 'CMIP6/CMIP/MOHC/UKESM1-0-LL/1pctCO2/r1i1p1f2/Amon/pr/gn/v20190406/pr*.nc')

for i in cubelist:
    i.attributes = {}

pr_cube = cubelist.concatenate_cube()

# warnings related to missing CF netcdf or pyproj can be ignored.

### Setup CMOR

In [4]:
cmor.setup(inpath="tables", netcdf_file_action=cmor.CMOR_REPLACE)

cmor.dataset_json('input.json')

0

In [5]:
realm = "atmos"
cmor.load_table(f'CMIP7_{realm}.json')

# warnings in this can be ignored -- the metadata in the tables is not perfectly set up

0

### Write tas (50 record chunks)


In [6]:
# construct latitude and longitude coords
cmorlat = cmor.axis("latitude",
                    coord_vals=tas_cube.coord('latitude').points,
                    cell_bounds=tas_cube.coord('latitude').bounds,
                    units="degrees_north")
cmorlon = cmor.axis("longitude",
                    coord_vals=tas_cube.coord('longitude').points,
                    cell_bounds=tas_cube.coord('longitude').bounds,
                    units="degrees_east")
# construct time coord in a way that we can update with points and bounds later
cmortime = cmor.axis("time", units="days since 1850-01-01")
# define CMOR variable object
axes = [cmortime, cmorlat, cmorlon]

variable = "tas_tavg-h2m-hxy-u"
cmortas = cmor.variable(variable, "K", axes)

# manually apply cell measures
region = DATASET_INFO['region']
frequency = DATASET_INFO['frequency']
cmip7_compound_name = ".".join([realm] + variable.split("_") + [frequency, region])

# apply cell measures 
with open('tables/CMIP7_cell_measures.json') as fh:
    cell_measures = json.load(fh)

variable_cell_measures = cell_measures['cell_measures'][cmip7_compound_name]

cmor.set_variable_attribute(cmortas, "cell_measures", "c", variable_cell_measures)

# override long names if necessary

with open('tables/CMIP7_long_name_overrides.json') as fh:
    long_name_overrides = json.load(fh)

if cmip7_compound_name in long_name_overrides['long_name_overrides']:
    new_long_name = long_name_overrides['long_name_overrides'][cmip7_compound_name]
    cmor.set_variable_attribute(cmortas, "long_name", "c", new_long_name)

cmor.set_variable_attribute(cmortas, "cell_measures", "c", variable_cell_measures)


# slice up data into N time record chunks and push through CMOR.write
N = 50
for i in range(0, 1800, N):
    s = slice(i, i+N)
    print(s)
    cube_slice = tas_cube[s]
    cmor.write(
        cmortas,
        cube_slice.data, 
        time_vals=cube_slice.coord('time').points, 
        time_bnds=cube_slice.coord('time').bounds)

# close the file (sorts the full naming)
cmor.close(cmortas, file_name=True)

slice(0, 50, None)
slice(50, 100, None)
slice(100, 150, None)
slice(150, 200, None)
slice(200, 250, None)
slice(250, 300, None)
slice(300, 350, None)
slice(350, 400, None)
slice(400, 450, None)
slice(450, 500, None)
slice(500, 550, None)
slice(550, 600, None)
slice(600, 650, None)
slice(650, 700, None)
slice(700, 750, None)
slice(750, 800, None)
slice(800, 850, None)
slice(850, 900, None)
slice(900, 950, None)
slice(950, 1000, None)
slice(1000, 1050, None)
slice(1050, 1100, None)
slice(1100, 1150, None)
slice(1150, 1200, None)
slice(1200, 1250, None)
slice(1250, 1300, None)
slice(1300, 1350, None)
slice(1350, 1400, None)
slice(1400, 1450, None)
slice(1450, 1500, None)
slice(1500, 1550, None)
slice(1550, 1600, None)
slice(1600, 1650, None)
slice(1650, 1700, None)
slice(1700, 1750, None)
slice(1750, 1800, None)


'./MIP-DRS7//CMIP7//CMIP//MOHC//UKESM1-0-LL//1pctCO2//r1i1p1f3//glb//mon//tas//tavg-h2m-hxy-u//g99//v20260122/tas_tavg-h2m-hxy-u_mon_glb_g99_UKESM1-0-LL_1pctCO2_r1i1p1f3_185001-199912.nc'

In [7]:
!ls -R MIP-DRS7

MIP-DRS7:
CMIP7

MIP-DRS7/CMIP7:
CMIP

MIP-DRS7/CMIP7/CMIP:
MOHC

MIP-DRS7/CMIP7/CMIP/MOHC:
UKESM1-0-LL

MIP-DRS7/CMIP7/CMIP/MOHC/UKESM1-0-LL:
1pctCO2

MIP-DRS7/CMIP7/CMIP/MOHC/UKESM1-0-LL/1pctCO2:
r1i1p1f3

MIP-DRS7/CMIP7/CMIP/MOHC/UKESM1-0-LL/1pctCO2/r1i1p1f3:
glb

MIP-DRS7/CMIP7/CMIP/MOHC/UKESM1-0-LL/1pctCO2/r1i1p1f3/glb:
mon

MIP-DRS7/CMIP7/CMIP/MOHC/UKESM1-0-LL/1pctCO2/r1i1p1f3/glb/mon:
tas

MIP-DRS7/CMIP7/CMIP/MOHC/UKESM1-0-LL/1pctCO2/r1i1p1f3/glb/mon/tas:
tavg-h2m-hxy-u

MIP-DRS7/CMIP7/CMIP/MOHC/UKESM1-0-LL/1pctCO2/r1i1p1f3/glb/mon/tas/tavg-h2m-hxy-u:
g99

MIP-DRS7/CMIP7/CMIP/MOHC/UKESM1-0-LL/1pctCO2/r1i1p1f3/glb/mon/tas/tavg-h2m-hxy-u/g99:
v20260122

MIP-DRS7/CMIP7/CMIP/MOHC/UKESM1-0-LL/1pctCO2/r1i1p1f3/glb/mon/tas/tavg-h2m-hxy-u/g99/v20260122:
tas_tavg-h2m-hxy-u_mon_glb_g99_UKESM1-0-LL_1pctCO2_r1i1p1f3_185001-199912.nc


In [8]:
!ncdump -h ./MIP-DRS7/CMIP7/CMIP/MOHC/UKESM1-0-LL/1pctCO2/r1i1p1f3/glb/mon/tas/tavg-h2m-hxy-u/g99/*/tas_tavg-h2m-hxy-u_mon_glb_g99_UKESM1-0-LL_1pctCO2_r1i1p1f3_185001-199912.nc


netcdf tas_tavg-h2m-hxy-u_mon_glb_g99_UKESM1-0-LL_1pctCO2_r1i1p1f3_185001-199912 {
dimensions:
	time = UNLIMITED ; // (1800 currently)
	lat = 144 ;
	lon = 192 ;
	bnds = 2 ;
variables:
	double time(time) ;
		time:bounds = "time_bnds" ;
		time:units = "days since 1850-01-01" ;
		time:calendar = "360_day" ;
		time:axis = "T" ;
		time:long_name = "Time Intervals" ;
		time:standard_name = "time" ;
	double time_bnds(time, bnds) ;
	double lat(lat) ;
		lat:bounds = "lat_bnds" ;
		lat:units = "degrees_north" ;
		lat:axis = "Y" ;
		lat:long_name = "Latitude" ;
		lat:standard_name = "latitude" ;
	double lat_bnds(lat, bnds) ;
	double lon(lon) ;
		lon:bounds = "lon_bnds" ;
		lon:units = "degrees_east" ;
		lon:axis = "X" ;
		lon:long_name = "Longitude" ;
		lon:standard_name = "longitude" ;
	double lon_bnds(lon, bnds) ;
	double height ;
		height:units = "m" ;
		height:axis = "Z" ;
		height:positive = "up" ;
		height:long_name = "height" ;
		height:standard_name = "height" ;
	float tas(time, lat, lon)

### Write pr (50 record chunks)

In [9]:
# construct latitude and longitude coords
cmorlat = cmor.axis("latitude",
                    coord_vals=pr_cube.coord('latitude').points,
                    cell_bounds=pr_cube.coord('latitude').bounds,
                    units="degrees_north")
cmorlon = cmor.axis("longitude",
                    coord_vals=pr_cube.coord('longitude').points,
                    cell_bounds=pr_cube.coord('longitude').bounds,
                    units="degrees_east")
# construct time coord in a way that we can update with points and bounds later
cmortime = cmor.axis("time",
                        units="days since 1850-01-01")
# define CMOR variable object
axes = [cmortime, cmorlat, cmorlon]
variable = "pr_tavg-u-hxy-u"
cmorpr = cmor.variable(variable, "kg m-2 s-1", axes)

# manually apply cell measures
region = DATASET_INFO['region']
frequency = DATASET_INFO['frequency']
cmip7_compound_name = ".".join([realm] + variable.split("_") + [frequency, region])


# apply cell measures 
with open('tables/CMIP7_cell_measures.json') as fh:
    cell_measures = json.load(fh)

variable_cell_measures = cell_measures['cell_measures'][cmip7_compound_name]

cmor.set_variable_attribute(cmorpr, "cell_measures", "c", variable_cell_measures)

# override long names if necessary

with open('tables/CMIP7_long_name_overrides.json') as fh:
    long_name_overrides = json.load(fh)

if cmip7_compound_name in long_name_overrides['long_name_overrides']:
    new_long_name = long_name_overrides['long_name_overrides'][cmip7_compound_name]
    cmor.set_variable_attribute(cmorpr, "long_name", "c", new_long_name)

cmor.set_variable_attribute(cmorpr, "cell_measures", "c", variable_cell_measures)

# slice up data into N time record chunks and push through CMOR.write
N = 50
for i in range(0, 1800, N):
    s = slice(i, i + N)
    print(s)
    cube_slice = pr_cube[s]
    cmor.write(
        cmorpr,
        tas_cube[s].data, 
        time_vals=tas_cube.coord('time').points[s], 
        time_bnds=tas_cube.coord('time').bounds[s])

# close the file (sorts the full naming)
cmor.close(cmorpr, file_name=True)

slice(0, 50, None)
slice(50, 100, None)
slice(100, 150, None)
slice(150, 200, None)
slice(200, 250, None)
slice(250, 300, None)
slice(300, 350, None)
slice(350, 400, None)
slice(400, 450, None)
slice(450, 500, None)
slice(500, 550, None)
slice(550, 600, None)
slice(600, 650, None)
slice(650, 700, None)
slice(700, 750, None)
slice(750, 800, None)
slice(800, 850, None)
slice(850, 900, None)
slice(900, 950, None)
slice(950, 1000, None)
slice(1000, 1050, None)
slice(1050, 1100, None)
slice(1100, 1150, None)
slice(1150, 1200, None)
slice(1200, 1250, None)
slice(1250, 1300, None)
slice(1300, 1350, None)
slice(1350, 1400, None)
slice(1400, 1450, None)
slice(1450, 1500, None)
slice(1500, 1550, None)
slice(1550, 1600, None)
slice(1600, 1650, None)
slice(1650, 1700, None)
slice(1700, 1750, None)
slice(1750, 1800, None)


'./MIP-DRS7//CMIP7//CMIP//MOHC//UKESM1-0-LL//1pctCO2//r1i1p1f3//glb//mon//pr//tavg-u-hxy-u//g99//v20260122/pr_tavg-u-hxy-u_mon_glb_g99_UKESM1-0-LL_1pctCO2_r1i1p1f3_185001-199912.nc'

In [10]:

!ncdump -h ./MIP-DRS7/CMIP7/CMIP/MOHC/UKESM1-0-LL/1pctCO2/r1i1p1f3/glb/mon/pr/tavg-u-hxy-u/g99/*/pr_tavg-u-hxy-u_mon_glb_g99_UKESM1-0-LL_1pctCO2_r1i1p1f3_185001-199912.nc

netcdf pr_tavg-u-hxy-u_mon_glb_g99_UKESM1-0-LL_1pctCO2_r1i1p1f3_185001-199912 {
dimensions:
	time = UNLIMITED ; // (1800 currently)
	lat = 144 ;
	lon = 192 ;
	bnds = 2 ;
variables:
	double time(time) ;
		time:bounds = "time_bnds" ;
		time:units = "days since 1850-01-01" ;
		time:calendar = "360_day" ;
		time:axis = "T" ;
		time:long_name = "Time Intervals" ;
		time:standard_name = "time" ;
	double time_bnds(time, bnds) ;
	double lat(lat) ;
		lat:bounds = "lat_bnds" ;
		lat:units = "degrees_north" ;
		lat:axis = "Y" ;
		lat:long_name = "Latitude" ;
		lat:standard_name = "latitude" ;
	double lat_bnds(lat, bnds) ;
	double lon(lon) ;
		lon:bounds = "lon_bnds" ;
		lon:units = "degrees_east" ;
		lon:axis = "X" ;
		lon:long_name = "Longitude" ;
		lon:standard_name = "longitude" ;
	double lon_bnds(lon, bnds) ;
	float pr(time, lat, lon) ;
		pr:standard_name = "precipitation_flux" ;
		pr:long_name = "Precipitation" ;
		pr:units = "kg m-2 s-1" ;
		pr:cell_methods = "area: time: mean" ;
		pr:missi