In [1]:
import cmipld
import importlib
from collections import OrderedDict


repo_url = cmipld.utils.git.url()
io_url = cmipld.utils.git.url2io(repo_url)

whoami = cmipld.reverse_mapping()[io_url]
print('-'*50)
print(f'Parsing repo: {whoami}')
print(f'Location: {repo_url}')
print(f'Github IO link: {io_url}')
print('-'*50)

'''
To keep this set of functions generic, we create separate files containing all relevant functions for a repository to be processed. 
When running this script we determine the parent repository, and consequently its prefix. This is then used to grab all relevant functions for processing. 

Example usage: getattr(this,'activity')({'activity':[{"label":1,"id":'one'}]})

'''
# Dynamically import the target module and assign it to a variable
module_path = f"cmipld.cvs.{whoami}" # go to CMIPLD/CVs/<value of whoami>.py for functions
this = importlib.import_module(module_path)



make test for url2io
--------------------------------------------------
Parsing repo: cmip6plus
Location: https://github.com/WCRP-CMIP/CMIP6Plus_CVs
Github IO link: https://wcrp-cmip.github.io/CMIP6Plus_CVs/
--------------------------------------------------


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Set up offline override for the current repository

from cmipld.utils.offline import LD_server

ldpath = cmipld.utils.git.ldpath()


# create a temporary directory containing (a copy of) the current repository
# localserver = LD_server(copy = [[ldpath,'current_repo']])
repos= { 
        'https://wcrp-cmip.github.io/WCRP-universe/': 'universal',
        'https://wcrp-cmip.github.io/MIP-variables/': 'variables',
        'https://wcrp-cmip.github.io/CMIP6Plus_CVs/': 'cmip6plus'
        }

localserver = LD_server(repos = repos.items(),copy = [[ldpath,whoami]],override='y')

# localserver = LD_server(copy = [[ldpath,'cmip6plus']])


localhost = localserver.start_server(8089)


Cloning into '/var/folders/hc/s_7lggq12nndglbdyrn3f91m1l58yd/T/cmipld_local_y2fa9unb2025-02-04T21:45:08/universal'...


https://wcrp-cmip.github.io/WCRP-universe/ universal /var/folders/hc/s_7lggq12nndglbdyrn3f91m1l58yd/T/cmipld_local_y2fa9unb2025-02-04T21:45:08


https://wcrp-cmip.github.io/MIP-variables/ variables /var/folders/hc/s_7lggq12nndglbdyrn3f91m1l58yd/T/cmipld_local_y2fa9unb2025-02-04T21:45:08


Cloning into '/var/folders/hc/s_7lggq12nndglbdyrn3f91m1l58yd/T/cmipld_local_y2fa9unb2025-02-04T21:45:08/variables'...




https://wcrp-cmip.github.io/CMIP6Plus_CVs/ cmip6plus /var/folders/hc/s_7lggq12nndglbdyrn3f91m1l58yd/T/cmipld_local_y2fa9unb2025-02-04T21:45:08


Cloning into '/var/folders/hc/s_7lggq12nndglbdyrn3f91m1l58yd/T/cmipld_local_y2fa9unb2025-02-04T21:45:08/cmip6plus'...




Repositories cloned into <TemporaryDirectory '/var/folders/hc/s_7lggq12nndglbdyrn3f91m1l58yd/T/cmipld_local_y2fa9unb2025-02-04T21:45:08'>
Copying the repo into LocalServer  /Users/daniel.ellis/WIPwork/CMIP6Plus_CVs/src-data/ --> cmip6plus
Repositories copied into <TemporaryDirectory '/var/folders/hc/s_7lggq12nndglbdyrn3f91m1l58yd/T/cmipld_local_y2fa9unb2025-02-04T21:45:08'>
Created SSL certificates in: /var/folders/hc/s_7lggq12nndglbdyrn3f91m1l58yd/T/cmipld_local_y2fa9unb2025-02-04T21:45:08
Serving /var/folders/hc/s_7lggq12nndglbdyrn3f91m1l58yd/T/cmipld_local_y2fa9unb2025-02-04T21:45:08 at https://localhost:8089


..+......+.......+..+.........+++++++++++++++++++++++++++++++++++++++++++++*..+.+.....+.............+.....+.+...+...+...+..+...+.+......+...............+.........+...+..+++++++++++++++++++++++++++++++++++++++++++++*..........+.......................+...+.......+.....+.+......+...+........+...+..........+.....+.........+.+........................+......+............+.....+.........+....+...............+......+.....+....+.....+.........+...+....+......+..+............+......+..........+.........+...+.....+....................................+.......+.........+...............+...+......+..+...+......+..........+...........+...+...................+++++
...+.+..+.......+...+++++++++++++++++++++++++++++++++++++++++++++*............+.+........+....+..+....+...............+......+......+..+.+..+++++++++++++++++++++++++++++++++++++++++++++*.....+............+......+++++
-----


In [3]:
cmipld.processor.replace_loader(localhost,[list(i) for i in repos.items()]) 

Setting up location forwarding for:
 -  https://wcrp-cmip.github.io/WCRP-universe/ >>> https://localhost:8089/universal/ 

 -  https://wcrp-cmip.github.io/MIP-variables/ >>> https://localhost:8089/variables/ 

 -  https://wcrp-cmip.github.io/CMIP6Plus_CVs/ >>> https://localhost:8089/cmip6plus/ 



In [4]:

CV = OrderedDict()
cmipld.processor.clear_cache()


es_data = {}

In [5]:


es_data['project'] = project = cmipld.processor.EmbeddedFrame(f'{io_url}project/graph.jsonld') 

for item in [
    'repo',
    'mip-era',
    'license',
    'activity',
    'drs',
    'product',
    'required-global-attributes',
    'tables',
    ]:
    
    print(item)
    # CV[item] = getattr(locals(),item)
    # ld  = cmipld.processor.expand_document(f'{io_url}project/{item}.json', depth=3)[0]
    # functions may be autogenrated in (reponame).py under the CVs dir. 
    
    result = project.frame({'id':f'{io_url}project/{item}-list'})
    # from pprint import pprint 
    # pprint(result)
    
    # print(item,getattr(this,item))
    
    item = item.replace('-','_')
    CV[item] = getattr(this,item)(result[0])
    

127.0.0.1 - - [04/Feb/2025 21:45:11] "GET /cmip6plus/project/graph.jsonld HTTP/1.1" 200 -
127.0.0.1 - - [04/Feb/2025 21:45:11] "GET /universal/activity/_context_ HTTP/1.1" 200 -
127.0.0.1 - - [04/Feb/2025 21:45:11] "GET /universal/_context_ HTTP/1.1" 200 -
127.0.0.1 - - [04/Feb/2025 21:45:11] "GET /universal/license/_context_ HTTP/1.1" 200 -
127.0.0.1 - - [04/Feb/2025 21:45:11] "GET /universal/mip/_context_ HTTP/1.1" 200 -
127.0.0.1 - - [04/Feb/2025 21:45:11] "GET /universal/resolution/_context_ HTTP/1.1" 200 -
127.0.0.1 - - [04/Feb/2025 21:45:11] "GET /universal/product/_context_ HTTP/1.1" 200 -
127.0.0.1 - - [04/Feb/2025 21:45:11] "GET /variables/tables/_context_ HTTP/1.1" 200 -
127.0.0.1 - - [04/Feb/2025 21:45:11] "GET /variables/_context_ HTTP/1.1" 200 -
127.0.0.1 - - [04/Feb/2025 21:45:11] "GET /variables/variables/_context_ HTTP/1.1" 200 -
127.0.0.1 - - [04/Feb/2025 21:45:12] "GET /cmip6plus/project/graph.jsonld HTTP/1.1" 200 -
127.0.0.1 - - [04/Feb/2025 21:45:12] "GET /universal

 Please check output - this is not     corrected for secondary nested items.
[4] https://wcrp-cmip.github.io/CMIP6Plus_CVs/project ['https://wcrp-cmip.github.io/WCRP-universe/product/graph.jsonld', 'https://wcrp-cmip.github.io/WCRP-universe/mip/graph.jsonld', 'https://wcrp-cmip.github.io/WCRP-universe/license/graph.jsonld', 'https://wcrp-cmip.github.io/MIP-variables/tables/graph.jsonld', 'https://wcrp-cmip.github.io/CMIP6Plus_CVs/project/graph.jsonld', 'https://wcrp-cmip.github.io/WCRP-universe/activity/graph.jsonld']


100%|██████████| 6/6 [00:00<00:00, 51.40it/s]

repo





mip-era
license
activity
drs
product
required-global-attributes
tables


In [6]:

# adding the index properties
print('index')
CV.update(cmipld.processor.get(f'{io_url}/project/index-list.json')[0]['index'])

index


In [7]:
print('make a file for relevant realms and relevant realms')

for universe_item in ['realm','frequency']:
    # 'grid-label''nominal'
    print(universe_item)
    path = f'{cmipld.mapping["universal"]}{universe_item}/graph.jsonld'
    ld  = cmipld.jsonld.compact(path,path)['@graph']
    CV[universe_item] = getattr(this,universe_item)(ld)

make a file for relevant realms and relevant realms
realm
frequency


In [8]:
# path = f'{cmipld.mapping["universal"]}resolution/graph.jsonld'
# data  = cmipld.jsonld.compact(path,path)['@graph']

# CV['nominal_resolution'] = [f"{r['value']} {r['unit']}" for r in sorted(data, key=lambda x: float(x['value']) if x['value'].replace('.', '', 1).isdigit() else float('inf'))]

print('make a file for relevant nominal resolution')

make a file for relevant nominal resolution


In [11]:
# url = cmipld.processor.resolve_prefix(file)

# loads the experiments graph and relevant dependancies. 
# cmipld.processor.clear_cache()
# localserver.copy_existing_repos([[ldpath,whoami]],override=True)
print(ldpath,whoami)

es_data = {}

for value in ['experiment', 'source']:
    # print(value)
    es_data[value] = cmipld.processor.EmbeddedFrame(f'{io_url}{value}/graph.jsonld') 
    result = es_data[value].frame({'@type':[f'wcrp:{value}']})
    print(result[1])
    CV[value] = getattr(this,value)(result)
    
    

/Users/daniel.ellis/WIPwork/CMIP6Plus_CVs/src-data/ cmip6plus
 Please check output - this is not     corrected for secondary nested items.
[0] https://wcrp-cmip.github.io/CMIP6Plus_CVs/experiment ['https://wcrp-cmip.github.io/CMIP6Plus_CVs/experiment/graph.jsonld', 'https://wcrp-cmip.github.io/CMIP6Plus_CVs/sub-experiment/graph.jsonld', 'https://wcrp-cmip.github.io/WCRP-universe/source-type/graph.jsonld', 'https://wcrp-cmip.github.io/WCRP-universe/activity/graph.jsonld']


100%|██████████| 4/4 [00:00<00:00, 35.16it/s]


{'id': 'cmip6plus:experiment/abrupt-4xco2', 'type': ['wcrp:experiment', 'cmip6plus'], 'activity': {'id': 'universal:activity/cmip', 'type': ['wcrp:activity', 'universal'], 'description': 'CMIP DECK: 1pctCO2, abrupt-4xCO2, amip, esm-piControl, esm-historical, historical, and piControl experiments', 'label': 'CMIP', 'url': 'https://gmd.copernicus.org/articles/9/1937/2016/gmd-9-1937-2016.pdf'}, 'description': 'DECK: abrupt-4xCO2', 'end': -999, 'label': 'abrupt-4xCO2', 'min_number_yrs_per_sim': 150, 'model_realms': [{'id': 'universal:source-type/aogcm', 'type': ['wcrp:source_type', 'universal'], 'description': 'coupled atmosphere-ocean global climate model, additionally including explicit representation of at least the land and sea ice', 'is_required': True, 'label': 'AOGCM'}, {'id': 'universal:source-type/aer', 'type': ['wcrp:source_type', 'universal'], 'description': 'aerosol treatment in an atmospheric model where concentrations are calculated based on emissions, transformation, and rem

100%|██████████| 4/4 [00:00<00:00, 35.69it/s]


{'id': 'cmip6plus:source/cesm2', 'type': ['wcrp:source', 'cmip6plus'], 'cohort': 'Published', 'label': 'CESM2', 'label_extended': 'CESM2', 'license': [{'id': 'universal:license/cc-by-4.0', 'type': ['wcrp:license', 'universal'], 'exceptions_contact': '@ucar.edu <- cesm_cmip6', 'label': 'CC BY 4.0', 'long_label': 'Creative Commons Attribution 4.0 International', 'source_specific_info': '', 'url': 'https://creativecommons.org/licenses/by/4.0/'}], 'model_component': [{'id': 'cmip6plus:source/mam4', 'type': 'wcrp:model-component', 'codebase': '', 'component-family': 'mam', 'configuration': '', 'coupled-with': '', 'description': 'MAM4 (same grid as atmos)', 'embedded-in': '', 'label': 'MAM4', 'model-family': '', 'native-horizontal-grid': '', 'native-nominal-resolution': '100km', 'native-vertical-grid': '', 'realm': ['aerosol', 'atmoschem']}, {'id': 'cmip6plus:source/cam6', 'type': 'wcrp:model-component', 'codebase': '', 'component-family': 'cam', 'configuration': '', 'coupled-with': '', 'des

In [14]:
org = getattr(this,'organisation')(es_data['source'].frame({'@type':f'wcrp:source',"@explicit":True, 'organisation':{}}))
CV['institution_id'] = {k: v for d in org for k, v in d.items()}
print('organisations')

organisations


In [16]:
CV['institution_id']

{'CCCma': 'Environment and Climate Change Canada',
 'NCAR': 'National Center for Atmospheric Research',
 'universal:organisation/cnrm-cerfacs': None,
 'universal:organisation/ec-earth-consortium': None,
 'NASA-GISS': 'Goddard Institute for Space Studies',
 'MOHC': 'Met Office',
 'MIROC': 'MIROC Consortium',
 'MPI-M': 'Max Planck Institute for Meteorology',
 'MRI': 'Japan Meteorological Agency',
 'NCC': 'NCC Consortium'}

In [None]:
# import glob,json
# files = glob.glob('../source/*.json')
# print(files)

# for f in files:
#     print(f)
#     data = json.load(open(f))
#     data['organisation'] = [data['organisation'][0].split('/')[-1]]
    
#     json.dump(data,open(f,'w'),indent=4)

# data