Skip to content

Commit

Permalink
v0.10.0.dev0: improvements to display of search results
Browse files Browse the repository at this point in the history
* display search results `--as-text`
* select search results to save to file using the `--filter-rows` option
* if concatenating search results to text file then option `--no-header` to only include results
* new classes to `sfftk.notes.find`: `CSVTable` and `CSVRow`
* associated unit tests with minor cleanup
  • Loading branch information
paulkorir committed Oct 24, 2023
1 parent 51807db commit bcddd64
Show file tree
Hide file tree
Showing 9 changed files with 328 additions and 86 deletions.
2 changes: 1 addition & 1 deletion sfftk/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,4 @@

BASE_DIR = os.path.dirname(__file__)

SFFTK_VERSION = 'v0.9.0.dev2'
SFFTK_VERSION = 'v0.10.0.dev0'
40 changes: 35 additions & 5 deletions sfftk/core/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -678,6 +678,19 @@
search_notes_parser.add_argument(
'--rows', type=int, default=10, help="number of rows [default: 10]"
)
search_notes_parser.add_argument(
'--as-text', action='store_true', help="output as CSV [default: False]"
)
search_notes_parser.add_argument(
'--filter-rows', nargs='*',
help="space-separated list of search result rows to display; "
"only works when --as-text flag is selected [default: False]"
)
search_notes_parser.add_argument(
'--no-header', action='store_true',
help="do not show CSV header (useful when concatenating "
"search results to an existing file which already has the header) [default: False]"
)
ols_parser = search_notes_parser.add_argument_group(
title='EBI Ontology Lookup Service (OLS)',
description='The Ontology Lookup Service (OLS) is a repository for biomedical ontologies that aims to provide a '
Expand Down Expand Up @@ -1388,17 +1401,17 @@ def parse_args(_args, use_shlex=False):
print_date(f"error: mergemask can handle at most 255 masks ({len(args.masks)} provided)")
return 64, configs
if not _masks_exist(args):
print_date(f"error: one or more masks missing; please verify that all paths are correct")
print_date("error: one or more masks missing; please verify that all paths are correct")
return 65, configs
if not _mask_all_correct_files(args):
print_date(f"error: one or more invalid file formats; please retry")
print_date("error: one or more invalid file formats; please retry")
return 65, configs
if not _masks_have_same_dimensions(args):
print_date(
f"error: inhomogeneous masks: dimension differs between masks (use --verbose to view details)")
"error: inhomogeneous masks: dimension differs between masks (use --verbose to view details)")
return 65, configs
if not _masks_have_mode_zero(args):
print_date(f"error: mode must be zero (0); please run `sff prep binmap` first on all masks")
print_date("error: mode must be zero (0); please run `sff prep binmap` first on all masks")
return 65, configs
# starsplit
elif args.prep_subcommand == 'starsplit':
Expand All @@ -1412,7 +1425,7 @@ def parse_args(_args, use_shlex=False):
if args.output is None:
args.output = f"{pathlib.Path(args.star_file).stem}_{args.infix}_{args.rows}.star"
if args.rows <= 0:
print_date(f"error: rows must be positive")
print_date("error: rows must be positive")
return 65, configs

# view
Expand Down Expand Up @@ -1629,6 +1642,23 @@ def parse_args(_args, use_shlex=False):
):
print_date("Invalid usage: -O, -x, -o, -L, -l can only be used with -R ols")
return 64, None
if args.as_text:
if args.filter_rows:
# make sure all the values are digits
try:
assert all(map(lambda i: i.isdecimal(), args.filter_rows))
except AssertionError:
print_date("Invalid filter rows: {}; should be a comma-separated list of digits".format(
args.filter_rows))
return 64, configs
# now validate the filter rows against --start and --rows
# valid values will ensure that there is a non-empty intersection between the set of --filter-row values and the range {--start,..,(--start+--rows)-1}
filter_row_values = set(list(map(int, args.filter_rows)))
valid_index_values = set(range(args.start, args.start + args.rows))
if len(filter_row_values.intersection(valid_index_values)) == 0:
print_date(
f"Invalid filter rows: {args.filter_rows}; should be in range {args.start}-{args.start + args.rows - 1}")
return 64, configs
elif args.notes_subcommand == "show":
if args.segment_id is not None:
args.segment_id = list(map(int, args.segment_id.split(',')))
Expand Down
27 changes: 15 additions & 12 deletions sfftk/core/prep.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,16 +108,19 @@ class MergedMask:
new_labels = {15, 18, 19}
label_tree[15] = [5, 10]
label_tree[18] = [8, 10]
label_tree[19] = [9, 10] # => {1: 0, 2: 0, 3: [1, 2], 4: 0, 5: 0, 8: [3, 5], 9: [4, 5], 10: 0, 15: [5, 10], 18: [8, 10], 19: [9, 10]}
label_tree[19] = [9, 10] # => {1: 0, 2: 0, 3: [1, 2], 4: 0, 5: 0, 8: [3, 5], 9: [4, 5], 10: 0, 15: [5, 10],
18: [8, 10], 19: [9, 10]}
label_set = {1, 2, 3, 4, 5, 10, 15, 18, 19}
label = numpy.amax(merge_mask) + 1 = 20
# mask 6
merged_mask = [10, 18, 19, 15] + [1, 0, 1, 0] * 20 = [30, 18, 39, 15]
label_set = {1, 2, 3, 4, 5, 10, 15, 18, 19, 20}
label_tree[20] = 0 # => {1: 0, 2: 0, 3: [1, 2], 4: 0, 5: 0, 8: [3, 5], 9: [4, 5], 10: 0, 15: [5, 10], 18: [8, 10], 19: [9, 10], 20: 0}
label_tree[20] = 0 # => {1: 0, 2: 0, 3: [1, 2], 4: 0, 5: 0, 8: [3, 5], 9: [4, 5], 10: 0, 15: [5, 10],
18: [8, 10], 19: [9, 10], 20: 0}
new_labels = {30, 39}
label_tree[30] = [10, 20]
label_tree[39] = [19, 20] # => {1: 0, 2: 0, 3: [1, 2], 4: 0, 5: 0, 8: [3, 5], 9: [4, 5], 10: 0, 15: [5, 10], 18: [8, 10], 19: [9, 10], 20: 0, 30: [10, 20], 39: [19, 20]}
label_tree[39] = [19, 20] # => {1: 0, 2: 0, 3: [1, 2], 4: 0, 5: 0, 8: [3, 5], 9: [4, 5], 10: 0, 15: [5, 10],
18: [8, 10], 19: [9, 10], 20: 0, 30: [10, 20], 39: [19, 20]}
label_set = {1, 2, 3, 4, 5, 10, 15, 18, 19, 20, 30, 39}
label = numpy.amax(merge_mask) + 1 = 40
Expand Down Expand Up @@ -516,28 +519,28 @@ def mergemask(args, configs):
# fail fast: ensure the output does not exist
outfile = pathlib.Path(f"{args.output_prefix}.{args.mask_extension}")
if not args.overwrite and outfile.exists():
print_date(f"error: the file already exists; use --overwrite to overwrite the existing merged_mask or set a "
f"new output prefix using --output-prefix")
print_date("error: the file already exists; use --overwrite to overwrite the existing merged_mask or set a "
"new output prefix using --output-prefix")
return 64
# ensure that the files are binary
if args.skip_assessment:
print_date("info: skipping mask assessment; assuming all masks are binary...")
elif not _masks_all_binary(args, configs) and not args.skip:
print_date(f"error: one or more masks are non-binary; use --verbose to view details")
print_date("error: one or more masks are non-binary; use --verbose to view details")
return 65
# todo: allow cases where one or more files are non-binary
# ensure that they don't overlap each other
if not _masks_no_overlap(args, configs) and not args.allow_overlap:
print_date(f"error: one or more masks overlap; use --verbose to view details")
print_date(f"info: if overlapping segments are expected re-run with the --allow-overlap argument; "
f"see 'sff prep mergemask' for more information")
print_date("error: one or more masks overlap; use --verbose to view details")
print_date("info: if overlapping segments are expected re-run with the --allow-overlap argument; "
"see 'sff prep mergemask' for more information")
return 65
# now we can merge masks
if args.verbose:
print_date(f"info: proceeding to merge masks...")
print_date("info: proceeding to merge masks...")
merged_mask = _mergemask(args.masks)
if args.verbose:
print_date(f"info: merge complete...")
print_date("info: merge complete...")
if args.verbose:
print_date(f"info: attempting to write output to '{args.output_prefix}.{args.mask_extension}'...")
with mrcfile.new(f"{args.output_prefix}.{args.mask_extension}", overwrite=args.overwrite) as mrc:
Expand All @@ -556,7 +559,7 @@ def mergemask(args, configs):
print_date(f"info: mask metadata:\n{data}")
print(data, file=label_file)
if args.verbose:
print_date(f"info: merge complete!")
print_date("info: merge complete!")
return 0


Expand Down
133 changes: 123 additions & 10 deletions sfftk/notes/find.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@ def search(self, *args, **kwargs):
R = requests.get(url)
if R.status_code == 200:
self._response = R.text
return SearchResults(self)
return SearchResults(self, *args, **kwargs)
else:
print_date("Error: server responded with {}".format(R.text))
return None
Expand Down Expand Up @@ -397,6 +397,7 @@ def pc(self):

def render(self, row_data, index):
"""Render this field"""
text = ''
if self.is_index:
text = _str(index)
elif self._key is not None:
Expand Down Expand Up @@ -492,6 +493,37 @@ def __str__(self):
return string


class CSVRow(Table):
"""Class definition for a single row in the table"""
column_separator = "\t"

def __init__(self, row_data, fields, index, *args, **kwargs):
super().__init__(*args, **kwargs)
self._row_data = row_data
self._fields = fields
self._index = index
self._rendered = self._render()

def _render(self):
"""We undo the wrapping here"""
rendered = list()
for field in self._fields:
rendered_field = field.render(self._row_data, self._index)
stripped_rendered_field = list(map(lambda f: f.strip(), rendered_field))
# we will strip all leading and trailing whitespace and join all wrapped lines
if str(field).strip() in ['description']: # the description needs spaces
rendered.append(' '.join(stripped_rendered_field).strip())
else:
rendered.append(''.join(stripped_rendered_field))
return rendered

def __bytes__(self):
return self.__str__().encode('utf-8')

def __str__(self):
return self.column_separator.join(self._rendered).strip('\n')


class ResultsTable(Table):
"""Class that formats search results as a table"""

Expand Down Expand Up @@ -641,6 +673,65 @@ def __str__(self):
return _str(string)


class CSVTable(Table):
"""Class that formats search results as a CSV"""
column_separator = "\t"

def __init__(self, search_results, fields, width='auto', *args, **kwargs):
super().__init__(*args, **kwargs)
self._search_results = search_results
self._fields = fields

@property
def header(self):
header = ""
if self._search_results.search_args.no_header:
return header
field_names = list(map(lambda f: str(f).strip(), self._fields))
header += self.column_separator.join(field_names) + self.row_separator
return header

@property
def body(self):
index = self._search_results.search_args.start
if self._search_results.results is not None:
body = ""
for row_data in self._search_results.results:
# if the user is using --filter-rows then only include rows with the specified indexes
if self._search_results.search_args.filter_rows:
if str(index) in self._search_results.search_args.filter_rows:
row_string = str(CSVRow(row_data, self._fields, index)) + self.row_separator
body += row_string
index += 1
continue
index += 1
else:
# otherwise display all rows
row_string = str(CSVRow(row_data, self._fields, index)) + self.row_separator
body += row_string
index += 1
else:
body = '\nNo data found at this time. Please try again in a few minutes.'.center(
self._width) + self.row_separator
body += self.row_separator
body += "-" * self._width + self.row_separator
return body.strip('\n')

@property
def footer(self):
return ""

def __bytes__(self):
return self.__str__().encode('utf-8')

def __str__(self):
string = ""
string += self.header
string += self.body
string += self.footer
return _str(string)


class SearchResults(object):
"""SearchResults class"""
_width = 180 # unreasonable default
Expand All @@ -651,7 +742,7 @@ class SearchResults(object):
DESCRIPTION_WIDTH = 80
TYPE_WIDTH = 18

def __init__(self, resource):
def __init__(self, resource, as_text=False):
self._resource = resource # the resource that was searched
self._raw_response = resource.response
self._structured_response = self._structure_response()
Expand All @@ -660,6 +751,7 @@ def __init__(self, resource):
self._width = terminal_size.columns
else:
self._width = self._width
self.as_text = as_text

@property
def structured_response(self):
Expand Down Expand Up @@ -704,9 +796,9 @@ def __bytes__(self):
return self.__str__().encode('utf-8')

def __str__(self):
return self.tabulate()
return self.tabulate(as_text=self.as_text)

def tabulate(self):
def tabulate(self, as_text=False):
"""Tabulate the search results"""
table = Styled("[[ ''|fg-yellow:no-end ]]") # ""
if self._resource.name == 'OLS':
Expand Down Expand Up @@ -751,7 +843,11 @@ def tabulate(self):
TableField('accession', key='short_form', pc=10, justify='center'),
TableField('description', key='description', pc=40, is_iterable=True),
]
table += _str(ResultsTable(self, fields=fields))
if as_text:
# exclude colour decoration
table = str(CSVTable(self, fields=fields))
else:
table += _str(ResultsTable(self, fields=fields))
elif self._resource.name == 'GO':
fields = [
TableField('index', key='index', pc=5, is_index=True, justify='right'),
Expand All @@ -761,7 +857,10 @@ def tabulate(self):
TableField('accession', key='short_form', pc=10, justify='center'),
TableField('description', key='description', pc=40, is_iterable=True),
]
table += _str(ResultsTable(self, fields=fields))
if as_text:
table = str(CSVTable(self, fields=fields))
else:
table += _str(ResultsTable(self, fields=fields))
elif self._resource.name == 'EMDB':
fields = [
TableField('index', key='index', pc=5, is_index=True, justify='right'),
Expand All @@ -771,7 +870,10 @@ def tabulate(self):
TableField('accession', key='emdb_id', pc=10, _format='{}', justify='center'),
TableField('description', key=['admin', 'title'], pc=40),
]
table += _str(ResultsTable(self, fields=fields))
if as_text:
table = str(CSVTable(self, fields=fields))
else:
table += _str(ResultsTable(self, fields=fields))
elif self._resource.name == "UniProt":
fields = [
TableField('index', pc=5, is_index=True, justify='right'),
Expand All @@ -793,7 +895,10 @@ def tabulate(self):
# TableField('title', key='organism_scientific_name', pc=20, is_iterable=True),
TableField('description', key='title', pc=40),
]
table += _str(ResultsTable(self, fields=fields))
if as_text:
table = str(CSVTable(self, fields=fields))
else:
table += _str(ResultsTable(self, fields=fields))
elif self._resource.name == 'Europe PMC':
fields = [
TableField('index', pc=5, is_index=True, justify='right'),
Expand All @@ -804,7 +909,10 @@ def tabulate(self):
TableField('description (title)', key='title', pc=25),
# TableField('iri (doi)', key='doi', _format='https://doi.org/{}', pc=30)
]
table += _str(ResultsTable(self, fields=fields))
if as_text:
table = str(CSVTable(self, fields=fields))
else:
table += _str(ResultsTable(self, fields=fields))
elif self._resource.name == 'EMPIAR':
fields = [
TableField('index', pc=5, is_index=True, justify='right'),
Expand All @@ -822,8 +930,13 @@ def tabulate(self):
self.structured_response[empiar_accession]['empiarid'] = empiar_accession
structured_response.append(self.structured_response[empiar_accession])
self._structured_response = structured_response
table += _str(ResultsTable(self, fields=fields))
if as_text:
table = str(CSVTable(self, fields=fields))
else:
table += _str(ResultsTable(self, fields=fields))
# close style
if as_text:
return table
table += Styled("[[ ''|reset ]]")
return _str(table)

Expand Down
5 changes: 3 additions & 2 deletions sfftk/sff.py
Original file line number Diff line number Diff line change
Expand Up @@ -208,9 +208,10 @@ def handle_notes_search(args, configs):
# query
resource = find.SearchResource(args, configs)
# fixme: use print_date
print(resource)
if not args.as_text:
print(resource)
# search
result = resource.search()
result = resource.search(as_text=args.as_text)
if result is not None:
# fixme: use print_date
print(result)
Expand Down

0 comments on commit bcddd64

Please sign in to comment.