In [1]:
import json
import yaml

In [2]:
with open('downloaded_data/PMID25545329.json', 'rt') as jsonin:
    annotations = json.load(jsonin)['rows']

In [3]:
# annotations

In [4]:
def fix_funcexp(tags):
    # until we fix/standardize these tags...
    for t in tags:
        if t.startswith('FuncExp'):
            v = t.split(':')[1]
            if v.endswith('Results'):
                v = v[:-7]
            t = 'FuncExp:%s'%v
        yield t

In [5]:
def tags2dict(tags):
    # FIXME: should be a multidict (e.g., if multiple materials are used)
    return dict(t.split(':') if ':' in t else (t,None) for t in fix_funcexp(tags))

In [6]:
def show_annotation(ann):
    print("cgtype: %s"%ann.get('cgtype', ''))
    print('tags:')
    print('\t' + '\n\t'.join(ann['tags']))
    print('-------')
    print(ann['text'])

In [7]:
paper_variants = {}
paper_experiments = {}
paper_results_by_allele = {}
paper_results_by_experiment = {}
for a in annotations:
    td = tags2dict(a['tags'])
    a['tagdict'] = td
    cgtype = td.get('CGType', None)
    if 'VariantID' == cgtype:
        try:
            paper_variants[td['Variant']] = {
                'label': a['text'].strip(),
                'tags': td
            }
        except KeyError:
            show_annotation(a)
    elif 'ExperimentDescription' == cgtype:
        paper_experiments[td['FuncExp']] = {
            'assayType': 'BAO:%s'%td.get('BAO', ''),
            'assayMaterial': 'CLO:%s'%td['CLO'],
            'description': a['text'].strip(),
            'tags': td
        }
    elif 'ExperimentResult' == cgtype:
        # hack for now-- fix the tags!
        experiment = td['FuncExp']
        expresult = {
            'description': a['text'].strip()
        }
        paper_results_by_allele[td['Variant']] = expresult
        paper_results_by_experiment[experiment] = expresult

print(yaml.dump({
    'variants': paper_variants,
    'experiments': paper_experiments,
    'by_allele': paper_results_by_allele,
    'by_experiment': paper_results_by_experiment
}))

# link together what we can...
for v in paper_variants:
    paper_results_by_allele[v]['evaluatedVariant'] = paper_variants[v]
for e in paper_experiments:
    paper_results_by_experiment[e]['experimentDescription'] = paper_experiments[e]

by_allele:
  '1':
    description: '**Variant 1 Experiment 3.B Results**


      Variant 1 (C116R)


      **AssayResult:** 36+/-9; LDL uptake is lower compared to wild type


      **ExperimentResultAssertion:** abnormal'
  '2':
    description: '**Variant 2 Experiment 3.B Results**


      Variant 2 (D168N)


      **AssayResult:** 36+/-3; LDL uptake is lower compared to wild type


      **ExperimentResultAssertion:** abnormal'
  '3':
    description: '**Variant 3 Experiment 3.B Results**


      Variant 3 (D172N)


      **AssayResult:** 29+/-5; LDL uptake is lower compared to wild type


      **ExperimentResultAssertion:** abnormal'
  '4':
    description: '**Variant 4 Experiment 3.B Results**


      Variant 4 (R257W)


      **AssayResult:** 99+/-6; LDL uptake similar to wild type


      **ExperimentResultAssertion**: normal'
  '5':
    description: '**Variant 5 Experiment 3.B Results**


      Variant 5 (R300G)


      **AssayResult:** 45+/-3;  LDL uptake is lower compared to