# A basic script showing how to frame results and the different options

#### We start by importing the Library

In [1]:
# import the library
from cmipld import * 

#### Next we can load the JSONLD compacted files: 

When loading the data we can use
- the built in load function to extract the latest versions using the github api
- absolute file locations
- file URLs including github raw links
- github repository (supplied by a list)
- A combination of the above

In [2]:
# we can load the latest cmpi6plus and mip table files from github
latest = await CMIPFileUtils.load_latest()

'''
############################################################
# If you want to manually specify the locations you may use:
############################################################

cmip6plus = '<your path>/CMIP6Plus_CVs/compiled/graph_data.json'
mip_cmor_tables = '<your path>/mip-cmor-tables/compiled/graph_data.json'

latest = await CMIPFileUtils.load([cmip6plus, mip_cmor_tables])
'''
''

Loading latest CMIP6Plus and MIP-CMOR-Tables files...


''

#### Deciding on a frame
Exctracting information requires us to frame our request. Here we can start with asking for all objects of `@type` mip:institution and mip:consortim. These make up our list of consortiums

In [3]:
frame = {

        # "@context": {},
        
        "@type": [
            "mip:institution",
            "mip:consortium"
        ],
      
        # we also want to ensure any results are embedded in the output
        "@embed":"@always"
}

To request this information we use the library Frame class:

In [4]:
data = Frame(latest,frame)

Lets use pprint to display the results

In [5]:
data.print

[{'@id': 'mip-cmor-tables:organisations/consortia/miroc',
  '@type': 'mip:consortium',
  'consortium:changes': '',
  'consortium:cmip-acronym': 'MIROC',
  'consortium:members': [{'@type': 'consortia:member',
                          'consortium:consortia:membership-type': 'prior',
                          'consortium:dates': {'consortium:from': 2016,
                                               'consortium:phase': 'CMIP6',
                                               'consortium:to': 2022},
                          'consortium:institution': {'@id': 'mip-cmor-tables:organisations/institutions/jamstec'}},
                         {'@type': 'consortia:member',
                          'consortium:consortia:membership-type': 'prior',
                          'consortium:dates': {'consortium:from': 2016,
                                               'consortium:phase': 'CMIP6',
                                               'consortium:to': 2022},
                          'consor

#### Cleaning up the returned object.
There are a number of ways we can clean up the data, some of these are:
- untag: removing the tags on keys
- rmld: removing any id and type tags to result in normal json
- missing: writes the word "missing" for substituted data which does not exists
- lower: changes keys to lowercase
- flatten: converts single entry lists into just the entry. 
- [more functionality being added]

In [6]:
# lets clean the data using the simplest cleanup methods: rmld and untag
data.clean(['rmld','untag'])

# lets view how the results have changed
data.print

[{'changes': '',
  'cmip-acronym': 'MIROC',
  'members': [{'consortia:membership-type': 'prior',
               'dates': {'from': 2016, 'phase': 'CMIP6', 'to': 2022},
               'institution': {}},
              {'consortia:membership-type': 'prior',
               'dates': {'from': 2016, 'phase': 'CMIP6', 'to': 2022},
               'institution': {}},
              {'consortia:membership-type': 'prior',
               'dates': {'from': 2016, 'phase': 'CMIP6', 'to': 2022},
               'institution': {}},
              {'consortia:membership-type': 'prior',
               'dates': {'from': 2016, 'phase': 'CMIP6', 'to': 2022},
               'institution': {}}],
  'name': 'MIROC Consortium',
  'status': 'active',
  'url': ''},
 {'changes': '',
  'cmip-acronym': 'NCC',
  'members': [{'dates': {'from': 2016, 'phase': 'CMIP6', 'to': 2022},
               'institution': {},
               'membership-type': 'prior'},
              {'dates': {'from': 2016, 'phase': 'CMIP6', 'to': 2022

## Using python to massage the data
Now we can view the keysm and edit items as needed. 

In [7]:
dict([[i['cmip-acronym'],f"{i['name']}: {i['url']}"] for i in data.data])

{'MIROC': 'MIROC Consortium: ',
 'NCC': 'NCC Consortium: ',
 'SOLARIS HEPPA': 'SOLARIS HEPPA Consortium: https://solarisheppa.geomar.de',
 'AER': 'Atmospheric and Environmental Research: http://www.aer.com/',
 'AoR': 'Astronomical Observatory of Rome: http://web.oa-roma.inaf.it/',
 'AS-RCEC': 'Research Center for Environmental Changes, Academia Sinica: http://www.rcec.sinica.edu.tw/',
 'AUoT': 'Aristotle University of Thessaloniki: https://www.auth.gr/en',
 'AWI': 'Alfred Wegener Institute for Polar and Marine Research: https://www.awi.de',
 'BAS': 'British Antarctic Survey: https://www.bas.ac.uk/',
 'BCC': 'Beijing Biocytogen (China): http://www.bbctg.com.cn/',
 'CAMS': 'Chinese Academy of Meteorological Sciences: http://www.camscma.cn/en.html',
 'CAS': 'Chinese Academy of Sciences: http://english.cas.cn/',
 'CCCma': 'Environment and Climate Change Canada: https://www.canada.ca/en/environment-climate-change.html',
 'CCCR-IITM': 'Indian Institute of Tropical Meteorology: http://www.tro

------

## Advanced Framing
We can actually do all of the above directly by using the `@explicit` tag. This returns only fields which we request directly. 

In [8]:
frame2 = {
    "@type": [
        "mip:institution",
        "mip:consortium"
    ],
    "@explicit": True,
    "consortium:cmip-acronym": "",
    "institution:cmip-acronym": "",
    "consortium:name": "",
    "institution:name": ""
    
}

In [9]:
# by default clean covers: ['rmld','rmnull','untag','flatten']
data2 = Frame(latest,frame2).clean().filter_empty
data2.data

[{'cmip-acronym': 'MIROC', 'name': 'MIROC Consortium'},
 {'cmip-acronym': 'NCC', 'name': 'NCC Consortium'},
 {'cmip-acronym': 'SOLARIS HEPPA', 'name': 'SOLARIS HEPPA Consortium'},
 {'cmip-acronym': 'AER', 'name': 'Atmospheric and Environmental Research'},
 {'cmip-acronym': 'AoR', 'name': 'Astronomical Observatory of Rome'},
 {'cmip-acronym': 'AS-RCEC',
  'name': 'Research Center for Environmental Changes, Academia Sinica'},
 {'cmip-acronym': 'AUoT', 'name': 'Aristotle University of Thessaloniki'},
 {'cmip-acronym': 'AWI',
  'name': 'Alfred Wegener Institute for Polar and Marine Research'},
 {'cmip-acronym': 'BAS', 'name': 'British Antarctic Survey'},
 {'cmip-acronym': 'BCC', 'name': 'Beijing Biocytogen (China)'},
 {'cmip-acronym': 'CAMS',
  'name': 'Chinese Academy of Meteorological Sciences'},
 {'cmip-acronym': 'CAS', 'name': 'Chinese Academy of Sciences'},
 {'cmip-acronym': 'CCCma', 'name': 'Environment and Climate Change Canada'},
 {'cmip-acronym': 'CCCR-IITM',
  'name': 'Indian Ins

In [10]:
dict({item.values() for item in data2.data})

{'MPI-M': 'Max Planck Institute for Meteorology',
 'PCMDI': 'Lawrence Livermore National Laboratory',
 'PNNL-JGCRI': 'Joint Global Change Research Institute',
 'LPC2E': 'Laboratoire de Physique et Chimie de l’Environnement et de l’Espace',
 'UCSB': 'University of California, Santa Barbara',
 'ECMWF': 'European Centre for Medium-Range Weather Forecasts',
 'ESSO': 'Indian National Centre for Ocean Information Services',
 'NERC': 'Natural Environment Research Council',
 'CCCR-IITM': 'Indian Institute of Tropical Meteorology',
 'BCC': 'Beijing Biocytogen (China)',
 'UoM': 'University of Melbourne',
 'SOLARIS HEPPA': 'SOLARIS HEPPA Consortium',
 'FUBerlin': 'Freie Universität Berlin',
 'NASA-GSFC': 'Goddard Space Flight Center',
 'AS-RCEC': 'Research Center for Environmental Changes, Academia Sinica',
 'NIWA': 'National Institute of Water and Atmospheric Research',
 'NOAA-NCEI': 'NOAA National Centers for Environmental Information',
 'AoR': 'Astronomical Observatory of Rome',
 'OSU': 'Orego

------


## Case Specific Example
#### Selecting institutions from CMIP6Plus only
Here we look at the source_id entries, and the organisations which have been references within those. 

In [11]:
frame3 = {
    "@context": {
    "@vocab": "source-id:"
    
  },
    "@type": "mip:source-id",
    "source-id:name":"",
    "source-id:organisation-id": {
        "@type": [
            "mip:institution",
            "mip:consortium"
        ],
        "@explicit": True,
        "consortium:cmip-acronym": "",
        "institution:cmip-acronym": "",
        "consortium:name": "",
        "institution:name": "",
        
    },
    "@explicit": True
}

In [12]:
data3 = Frame(latest,frame3).clean()
data3.print

[{'name': 'GISS-E2-1-G',
  'organisation-id': {'cmip-acronym': 'NASA-GISS',
                      'name': 'Goddard Institute for Space Studies'}},
 {'name': 'HadGEM3-GC31-LL',
  'organisation-id': {'cmip-acronym': 'MOHC', 'name': 'Met Office'}},
 {'name': 'MIROC6',
  'organisation-id': {'cmip-acronym': 'MIROC', 'name': 'MIROC Consortium'}},
 {'name': 'MPI-ESM1-2-HR',
  'organisation-id': {'cmip-acronym': 'MPI-M',
                      'name': 'Max Planck Institute for Meteorology'}},
 {'name': 'MPI-ESM1-2-LR',
  'organisation-id': {'cmip-acronym': 'MPI-M',
                      'name': 'Max Planck Institute for Meteorology'}},
 {'name': 'NorESM2-LM',
  'organisation-id': {'cmip-acronym': 'NCC', 'name': 'NCC Consortium'}}]


In [13]:
dict(set([i['organisation-id'].values() for i in data3.data]))

{'MPI-M': 'Max Planck Institute for Meteorology',
 'NASA-GISS': 'Goddard Institute for Space Studies',
 'NCC': 'NCC Consortium',
 'MIROC': 'MIROC Consortium',
 'MOHC': 'Met Office'}