In [1]:
from gemmi import cif
import matplotlib.pyplot as plt
import numpy as np
import sys
import os
import pprint
import requests
import json
import isodbtools



## Equivalency Tables

| AIF      |      JSON      |
|----------|:-------------:|
| _exptl_adsorptive |  adsorbate|
| _exptl_temperature |  temperature |
| _sample_material_id |  adsorbent |
| _article_DOI |    DOI   |
| _article_source| articleSource |
| _units_loading| adsorptionUnits |
| _units_pressure| pressureUnits |
| _units_composition| compositionType |
| _date| date |
| _digitizer| digitizer |
| _measurement_type | category |

In [2]:
equivalency_table = [
    {'AIF': '_exptl_adsorption', 'JSON': 'adsorbate', 'dtype': str},
    {'AIF': '_exptl_temperature', 'JSON': 'temperature', 'dtype': float},
    {'AIF': '_sample_material_id', 'JSON': 'adsorbent', 'dtype': str},
    {'AIF': '_article_DOI', 'JSON': 'DOI', 'dtype': str},
    {'AIF': '_article_source', 'JSON': 'articleSource', 'dtype': str},
    {'AIF': '_units_loading', 'JSON': 'adsorptionUnits', 'dtype': str},
    {'AIF': '_units_pressure', 'JSON': 'pressureUnits', 'dtype': str},
    {'AIF': '_units_composition', 'JSON': 'compositionType', 'dtype': str},
    {'AIF': '_measurement_type', 'JSON': 'category', 'dtype': str},
    {'AIF': '_exptl_adsorptive', 'JSON': 'adsorbate', 'dtype': str},
    {'AIF': '_exptl_sample_mass', 'JSON': 'exptl_sample_mass', 'dtype': float},
    #{'AIF': '', 'JSON': ''},
    #{'AIF': '', 'JSON': ''},
        
    # AIF Keys without JSON equivalents
    # _exptl_operator
    # _exptl_instrument
    # _exptl_date
    
    # JSON Keys without AIF equivalents
    # digitizer
    
]

def crossreference_keys(table,key,informat):
    """
    Input syntax:
    table: list of cross-referenced AIF<->JSON equivalencies
    key: key to cross reference
    informat: format of input key, either AIF or JSON
    
    Output syntax:
    outformat: format of the output key (opposite the informat)
    Returns the outformat value of the informat key
    
    If the informat key is not in the equivalency table, the
    script uses a fallback handler:
       JSON key "inkey" -> AIF key "_inkey"
       AIF key "_inkey" -> JSON key "inkey"
    """
    
    if informat == 'AIF':
        outformat = 'JSON'
    elif informat == 'JSON':
        outformat = 'AIF'
    else:
        raise AssertionError('Unknown informat: '+informat)

    # Cross reference the input format against the list
    #  IS THERE A BETTER WAY TO DO THIS???
    tmp_list = [ x[informat] for x in table ]
    if key in tmp_list:
        index = tmp_list.index(key)
        #print(table[index])
        return table[index][outformat], table[index]['dtype']
    
    # Handler for unknown keys:
    if informat == 'AIF':
        #strip leading "_"
        return key[1:], str
    elif informat == 'JSON':
        #add leading "_"
        return "_"+key, str
    
print(crossreference_keys(equivalency_table,'temperature','JSON'))
print(crossreference_keys(equivalency_table,'_temperature','AIF'))

('_exptl_temperature', <class 'float'>)
('temperature', <class 'str'>)


## Example Conversion of AIF to JSON

In [14]:
filename = '../example.aif'

def aif2json(infile):

    data = cif.read(infile).sole_block()

    data_dict = {}

    # wrapper for metadata
    for item in data:
        if item.pair is not None:
            #print('a', item.pair)
            inkey = item.pair[0]
            outkey, dtype = crossreference_keys(equivalency_table,inkey,informat='AIF')
            if dtype == float:
                data_dict[outkey] = float(item.pair[-1])
            elif dtype == str:
                data_dict[outkey] = str(item.pair[-1])
            elif dtype == int:
                data_dict[outkey] = int(item.pair[-1])

    # wrapper for isotherm loop
    isotherm_data = []
    ads_press = np.array(data.find_loop('_adsorp_pressure'), dtype=float)
    ads_amount = np.array(data.find_loop('_adsorp_amount'), dtype=float)
    try:
        ads_p0 = np.array(data.find_loop('_adsorp_p0'), dtype=float)
        output_p0 = True
    except:
        output_p0 = False

    adsorbate = data_dict['adsorbate']
    for p, a in zip(ads_press, ads_amount):
        isotherm_data.append({'pressure': p,
                              'branch': 'adsorp',
                              'species_data': [
                                 {'name': adsorbate,
                                 'composition': 1.0,
                                 'adsorption': a}
                             ]})
        if output_p0:
            pindex = np.where(ads_press==p)
            isotherm_data[-1]["p0"] = ads_p0[pindex][0]
    data_dict["isotherm_data"] = isotherm_data

    return data_dict
    
pprint.pprint(aif2json(filename))

{'adsorbate': 'Nitrogen',
 'adsorbent': "'DUT-6'",
 'adsorptionUnits': 'mmol/g',
 'exptl_date': '2009-09-17T19:56:00',
 'exptl_instrument': "'Autosorb Station 1'",
 'exptl_operator': "'Nicole'",
 'exptl_sample_mass': 0.0339,
 'isotherm_data': [{'branch': 'adsorp',
                    'p0': 101860.98004799998,
                    'pressure': 0.269367243408,
                    'species_data': [{'adsorption': 0.006484305926579284,
                                      'composition': 1.0,
                                      'name': 'Nitrogen'}]},
                   {'branch': 'adsorp',
                    'p0': 101860.98004799998,
                    'pressure': 0.27275763204,
                    'species_data': [{'adsorption': 0.008217577878132952,
                                      'composition': 1.0,
                                      'name': 'Nitrogen'}]},
                   {'branch': 'adsorp',
                    'p0': 101860.98004799998,
                    'pressure': 0.27

## Example Conversion of JSON to AIF

In [5]:
URL = 'https://adsorption.nist.gov/isodb/api/isotherm/10.1021Jp400480q.Isotherm2.json'
isotherm_dict = requests.get(URL).json()

In [17]:
def json2aif(in_dict):
        # initialize aif block
    d = cif.Document()
    d.add_new_block(isotherm_dict['filename'])  #fix this

    block = d.sole_block()
    
    for inkey in in_dict:
        if inkey != 'isotherm_data':
            outkey, dtype = crossreference_keys(equivalency_table,inkey,informat='JSON')
            if in_dict[inkey] == '':
                #Ignore blank keys
                continue
            elif inkey == 'adsorbates':
                # Temporary kludge for adsorptives
                if len(in_dict[inkey]) == 1:
                    outkey = '_exptl_adsorptive'
                    outstring = in_dict[inkey][0]['name']
                    block.set_pair(outkey, outstring)
                else:
                    raise Exception('This script is only for pure component adsorption right now')
            elif type(in_dict[inkey]) in [str,float,int]:
                block.set_pair(outkey, str(in_dict[inkey]))
            elif type(in_dict[inkey]) == dict:
                # Temporary kludge for adsorbents
                if 'name' in in_dict[inkey]:
                    block.set_pair(outkey, str(in_dict[inkey]['name']))
            else:
                print(inkey, in_dict[inkey], outkey)
                raise Exception('Script unable to handle this key set')
    
    # Measurements
    #   Default to adsorption branch, state as desorption ONLY if specified
    pressure_adsorp = []
    amount_adsorp = []
    pressure_desorp = []
    amount_desorp = []
    for point in isotherm_dict['isotherm_data']:
        if 'branch' in point:
            if point['branch'] == 'adsorp':
                pressure_adsorp.append(point['pressure'])
                amount_adsorp.append(point['species_data'][0]['adsorption'])
            elif point['branch'] == 'desorp':
                pressure_desorp.append(point['pressure'])
                amount_desorp.append(point['species_data'][0]['adsorption'])
            else:
                raise Exception('ERROR: unknown branch type:', point['branch'])
        else: #default=adsorp
            pressure_adsorp.append(point['pressure'])
            amount_adsorp.append(point['species_data'][0]['adsorption'])
    
    loop_ads = block.init_loop('_adsorp_', ['pressure', 'amount'])
    loop_ads.set_all_values([
        list(np.array(pressure_adsorp).astype(str)),
        list(np.array(amount_adsorp).astype(str))
    ])
    
    if len(pressure_desorp) != 0:
        loop_ads = block.init_loop('_desorp_', ['pressure', 'amount'])
        loop_ads.set_all_values([
            list(np.array(pressure_desorp).astype(str)),
            list(np.array(amount_desorp).astype(str))
        ])
    
    d.write_file('test.aif')
    string = open('test.aif',mode='r').read()
    print()
    print(string)
    
json2aif(isotherm_dict)


data_10.1021Jp400480q.Isotherm2
_filename 10.1021Jp400480q.Isotherm2
_article_DOI 10.1021/Jp400480q
_article_source UNKNOWN
_date 1000-01-01
_digitizer UNKNOWN
_sample_material_id MWCNT
_exptl_adsorptive Argon
_exptl_temperature 87
_tabular_data 0
_units_loading kmol/m3
_units_pressure bar
_units_composition molefraction

loop_
_adsorp_pressure
_adsorp_amount
0.00374842 12.7703
0.0173326 15.3716
0.0332298 17.6577
0.042291 19.3131
0.0628041 21.1261
0.0810402 22.7027
0.0901066 24.2793
0.103815 24.9887
0.131262 25.9347
0.151837 26.8018
0.186192 27.2748
0.232015 27.6689
0.270965 27.9842
0.316798 28.2207
0.383263 28.4572
0.477242 28.6149
0.743142 28.9302
0.912776 29.009
0.988426 29.009

