Skip to content

Commit

Permalink
Refactored CLI script nanopub write utility
Browse files Browse the repository at this point in the history
  • Loading branch information
wshayes committed Jan 20, 2018
1 parent fc59552 commit 492fdc2
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 0 deletions.
39 changes: 39 additions & 0 deletions bel/nanopub/files.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import yaml
import re
import copy
import sys
from typing import Mapping, Any, List, Iterable, Tuple
import gzip

Expand Down Expand Up @@ -82,6 +83,44 @@ def write_nanopubs(nanopubs: Mapping[str, Any], filename: str, jsonlines: bool =
pass


def create_nanopubs_fh(output_fn: str):
"""Create Nanopubs output filehandle
\b
If output fn has *.gz, will written as a gzip file
If output fn has *.jsonl*, will written as a JSONLines file
IF output fn has *.json*, will be written as a JSON file
If output fn has *.yaml* or *.yml*, will be written as a YAML file
Args:
output_fn: Name of output file
Returns:
(filehandle, yaml_flag, jsonl_flag, json_flag)
"""

# output file
# set output flags
json_flag, jsonl_flag, yaml_flag = False, False, False
if output_fn:
if re.search('gz$', output_fn):
out_fh = gzip.open(output_fn, 'wt')
else:
out_fh = open(output_fn, 'wt')

if re.search('ya?ml', output_fn):
yaml_flag = True
elif 'jsonl' in output_fn:
jsonl_flag = True
elif 'json' in output_fn:
json_flag = True

else:
out_fh = sys.stdout

return(out_fh, yaml_flag, jsonl_flag, json_flag)


def read_edges(fn):

jsonl_flag, json_flag, yaml_flag = False, False, False
Expand Down
32 changes: 32 additions & 0 deletions bel/scripts.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,6 +238,38 @@ def convert_belscript(ctx, input_fn, output_fn):
fout.close()


@nanopub.command(name="stats")
@click.argument('input_fn')
@pass_context
def nanopub_stats(ctx, input_fn):
"""Collect statistics on nanopub file
input_fn can be json, jsonl or yaml and additionally gzipped
"""

counts = {'nanopubs': 0, 'assertions': {'total': 0, 'subject_only': 0, 'nested': 0, 'relations': {}}}

for np in bnf.read_nanopubs(input_fn):
if 'nanopub' in np:
counts['nanopubs'] += 1
counts['assertions']['total'] += len(np['nanopub']['assertions'])
for assertion in np['nanopub']['assertions']:
if assertion['relation'] is None:
counts['assertions']['subject_only'] += 1
else:
if re.match('\s*\(', assertion['object']):
counts['assertions']['nested'] += 1

if not assertion.get('relation') in counts['assertions']['relations']:
counts['assertions']['relations'][assertion.get('relation')] = 1
else:
counts['assertions']['relations'][assertion.get('relation')] += 1

counts['assertions']['relations'] = sorted(counts['assertions']['relations'])

print('DumpVar:\n', json.dumps(counts, indent=4))


@belc.group()
def stmt():
"""BEL Statement specific commands"""
Expand Down

0 comments on commit 492fdc2

Please sign in to comment.