From 943cc278d29897de3d86748ffb538cf54f395457 Mon Sep 17 00:00:00 2001 From: Richard Challis Date: Wed, 3 Mar 2021 08:22:48 +0000 Subject: [PATCH 1/5] begin adding NCBI datasets summary parser --- src/genomehubs/lib/ncbi.py | 66 +++++++++++++++++++++++++++++++++++++ src/genomehubs/lib/parse.py | 8 +++++ 2 files changed, 74 insertions(+) diff --git a/src/genomehubs/lib/ncbi.py b/src/genomehubs/lib/ncbi.py index 8ab16b1d..a2acac17 100644 --- a/src/genomehubs/lib/ncbi.py +++ b/src/genomehubs/lib/ncbi.py @@ -3,7 +3,9 @@ import gzip import re +import sys from collections import Counter +from subprocess import check_output import ujson from Bio import SeqIO @@ -243,3 +245,67 @@ def ncbi_genome_parser(directory, opts): record = ujson.loads(line) parse_ncbi_datasets_record(record, parsed) return [value for value in parsed.values()] + + +def parse_ncbi_datasets_summary(record, parsed): + """Parse a single NCBI datasets summary.""" + obj = {} + return + for key in ("taxId", "speciesName", "commonName", "isolate", "sex"): + obj[key] = record.get(key, None) + assemblyInfo = record.get("assemblyInfo", {}) + for key in ("assembly_category", "assembly_level"): + obj[key] = assemblyInfo.get(key, None) + # "assembly_accession": "GCF_900239965.1", + if obj["refseqAssmAccession"] == "na": + obj["refseqAssmAccession"] = None + obj["refseqCategory"] = None + annotationInfo = record.get("annotationInfo", {}) + if annotationInfo: + annot = {} + for key in ("name", "releaseDate", "reportUrl", "source"): + annot["annotation%s" % key.capitalize()] = annotationInfo.get(key, None) + if annot and "stats" in annotationInfo: + geneCounts = annotationInfo["stats"].get("geneCounts", None) + for key in ("nonCoding", "proteinCoding", "pseudogene", "total"): + annot["geneCount%s" % key.capitalize()] = geneCounts.get(key, None) + if obj["genbankAssmAccession"] in parsed: + parsed[obj["genbankAssmAccession"]].update(annot) + return + obj.update(annot) + bioprojects = [] + for lineage in assemblyInfo.get("bioprojectLineage", []): + for bioproject in lineage["bioprojects"]: + bioprojects.append(bioproject["accession"]) + obj["bioProjectAccession"] = ";".join(bioprojects) if bioprojects else None + assemblyStats = record.get("assemblyStats", {}) + obj.update(assemblyStats) + wgsInfo = record.get("wgsInfo", {}) + for key in ("masterWgsUrl", "wgsContigsUrl", "wgsProjectAccession"): + obj[key] = wgsInfo.get(key, None) + parsed[obj["genbankAssmAccession"]] = obj + + +def ncbi_datasets_summary_parser(_params, opts): + """Fetch and parse NCBI Datasets summary.""" + parsed = {} + datasets = check_output( + ["datasets", "summary", "genome", "taxon", opts["ncbi-datasets-summary"]] + ) + data = ujson.loads(datasets) + if "assemblies" not in data: + LOGGER.error("unable to fetch assemblies for %s", opts["ncbi-datasets-summary"]) + print(data) + sys.exit(1) + for record in data["assemblies"]: + parse_ncbi_datasets_summary(record, parsed) + print(parsed) + quit() + # parsed = {} + # with tofile.open_file_handle( + # "%s/ncbi_dataset/data/assembly_data_report.jsonl" % directory + # ) as report: + # for line in report: + # record = ujson.loads(line) + # parse_ncbi_datasets_record(record, parsed) + return [value for value in parsed.values()] diff --git a/src/genomehubs/lib/parse.py b/src/genomehubs/lib/parse.py index cbef1219..43e47401 100644 --- a/src/genomehubs/lib/parse.py +++ b/src/genomehubs/lib/parse.py @@ -7,6 +7,7 @@ genomehubs parse [--btk] [--btk-root STRING...] [--wikidata PATH] [--wikidata-root STRING...] [--wikidata-xref STRING...] [--gbif] [--gbif-root STRING...] [--gbif-xref STRING...] + [--ncbi-datasets-summary INT] [--ncbi-datasets-genome PATH] [--outfile PATH] [--refseq-mitochondria] [--refseq-organelles] [--refseq-plastids] [--refseq-root NAME] @@ -21,6 +22,7 @@ --wikidata PATH Parse taxa in WikiData dump --wikidata-root STRING WikiData taxon ID of root taxon --wikidata-xref STRING Include link to external reference from WikiData (e.g. NBN, BOLD) + --ncbi-datasets-summary INT Fetch and parse NCBI Datasets summary for a root taxId --ncbi-datasets-genome PATH Parse NCBI Datasets genome directory --outfile PATH Save parsed output to file --refseq-mitochondria Parse mitochondrial genomes from the NCBI RefSeq @@ -47,6 +49,7 @@ from .gbif import gbif_parser from .hub import load_types from .hub import order_parsed_fields +from .ncbi import ncbi_datasets_summary_parser from .ncbi import ncbi_genome_parser from .ncbi import refseq_organelle_parser from .version import __version__ @@ -62,6 +65,11 @@ "params": None, "types": "assembly", }, + "ncbi-datasets-summary": { + "func": ncbi_datasets_summary_parser, + "params": None, + "types": "assembly", + }, "refseq-mitochondria": { "func": refseq_organelle_parser, "params": ("mitochondrion"), From b03e3cac75edeb3d4141cf940acb9c2b7244c39d Mon Sep 17 00:00:00 2001 From: Richard Challis Date: Thu, 8 Apr 2021 13:48:08 +0100 Subject: [PATCH 2/5] update default ncbi datasets config --- src/genomehubs/lib/ncbi.py | 124 +++++++++---------- src/genomehubs/lib/parse.py | 15 ++- src/genomehubs/templates/assembly.types.yaml | 40 +++++- 3 files changed, 104 insertions(+), 75 deletions(-) diff --git a/src/genomehubs/lib/ncbi.py b/src/genomehubs/lib/ncbi.py index a2acac17..f8c0589a 100644 --- a/src/genomehubs/lib/ncbi.py +++ b/src/genomehubs/lib/ncbi.py @@ -3,9 +3,7 @@ import gzip import re -import sys from collections import Counter -from subprocess import check_output import ujson from Bio import SeqIO @@ -191,7 +189,7 @@ def refseq_organelle_parser(collections, opts): def parse_ncbi_datasets_record(record, parsed): """Parse a single NCBI datasets record.""" obj = {} - for key in ("taxId", "speciesName", "commonName", "isolate", "sex"): + for key in ("taxId", "organismName", "commonName", "isolate", "sex"): obj[key] = record.get(key, None) assemblyInfo = record.get("assemblyInfo", {}) for key in ( @@ -247,65 +245,65 @@ def ncbi_genome_parser(directory, opts): return [value for value in parsed.values()] -def parse_ncbi_datasets_summary(record, parsed): - """Parse a single NCBI datasets summary.""" - obj = {} - return - for key in ("taxId", "speciesName", "commonName", "isolate", "sex"): - obj[key] = record.get(key, None) - assemblyInfo = record.get("assemblyInfo", {}) - for key in ("assembly_category", "assembly_level"): - obj[key] = assemblyInfo.get(key, None) - # "assembly_accession": "GCF_900239965.1", - if obj["refseqAssmAccession"] == "na": - obj["refseqAssmAccession"] = None - obj["refseqCategory"] = None - annotationInfo = record.get("annotationInfo", {}) - if annotationInfo: - annot = {} - for key in ("name", "releaseDate", "reportUrl", "source"): - annot["annotation%s" % key.capitalize()] = annotationInfo.get(key, None) - if annot and "stats" in annotationInfo: - geneCounts = annotationInfo["stats"].get("geneCounts", None) - for key in ("nonCoding", "proteinCoding", "pseudogene", "total"): - annot["geneCount%s" % key.capitalize()] = geneCounts.get(key, None) - if obj["genbankAssmAccession"] in parsed: - parsed[obj["genbankAssmAccession"]].update(annot) - return - obj.update(annot) - bioprojects = [] - for lineage in assemblyInfo.get("bioprojectLineage", []): - for bioproject in lineage["bioprojects"]: - bioprojects.append(bioproject["accession"]) - obj["bioProjectAccession"] = ";".join(bioprojects) if bioprojects else None - assemblyStats = record.get("assemblyStats", {}) - obj.update(assemblyStats) - wgsInfo = record.get("wgsInfo", {}) - for key in ("masterWgsUrl", "wgsContigsUrl", "wgsProjectAccession"): - obj[key] = wgsInfo.get(key, None) - parsed[obj["genbankAssmAccession"]] = obj +# def parse_ncbi_datasets_summary(record, parsed): +# """Parse a single NCBI datasets summary.""" +# obj = {} +# return +# for key in ("taxId", "speciesName", "commonName", "isolate", "sex"): +# obj[key] = record.get(key, None) +# assemblyInfo = record.get("assemblyInfo", {}) +# for key in ("assembly_category", "assembly_level"): +# obj[key] = assemblyInfo.get(key, None) +# # "assembly_accession": "GCF_900239965.1", +# if obj["refseqAssmAccession"] == "na": +# obj["refseqAssmAccession"] = None +# obj["refseqCategory"] = None +# annotationInfo = record.get("annotationInfo", {}) +# if annotationInfo: +# annot = {} +# for key in ("name", "releaseDate", "reportUrl", "source"): +# annot["annotation%s" % key.capitalize()] = annotationInfo.get(key, None) +# if annot and "stats" in annotationInfo: +# geneCounts = annotationInfo["stats"].get("geneCounts", None) +# for key in ("nonCoding", "proteinCoding", "pseudogene", "total"): +# annot["geneCount%s" % key.capitalize()] = geneCounts.get(key, None) +# if obj["genbankAssmAccession"] in parsed: +# parsed[obj["genbankAssmAccession"]].update(annot) +# return +# obj.update(annot) +# bioprojects = [] +# for lineage in assemblyInfo.get("bioprojectLineage", []): +# for bioproject in lineage["bioprojects"]: +# bioprojects.append(bioproject["accession"]) +# obj["bioProjectAccession"] = ";".join(bioprojects) if bioprojects else None +# assemblyStats = record.get("assemblyStats", {}) +# obj.update(assemblyStats) +# wgsInfo = record.get("wgsInfo", {}) +# for key in ("masterWgsUrl", "wgsContigsUrl", "wgsProjectAccession"): +# obj[key] = wgsInfo.get(key, None) +# parsed[obj["genbankAssmAccession"]] = obj -def ncbi_datasets_summary_parser(_params, opts): - """Fetch and parse NCBI Datasets summary.""" - parsed = {} - datasets = check_output( - ["datasets", "summary", "genome", "taxon", opts["ncbi-datasets-summary"]] - ) - data = ujson.loads(datasets) - if "assemblies" not in data: - LOGGER.error("unable to fetch assemblies for %s", opts["ncbi-datasets-summary"]) - print(data) - sys.exit(1) - for record in data["assemblies"]: - parse_ncbi_datasets_summary(record, parsed) - print(parsed) - quit() - # parsed = {} - # with tofile.open_file_handle( - # "%s/ncbi_dataset/data/assembly_data_report.jsonl" % directory - # ) as report: - # for line in report: - # record = ujson.loads(line) - # parse_ncbi_datasets_record(record, parsed) - return [value for value in parsed.values()] +# def ncbi_datasets_summary_parser(_params, opts): +# """Fetch and parse NCBI Datasets summary.""" +# parsed = {} +# datasets = check_output( +# ["datasets", "summary", "genome", "taxon", opts["ncbi-datasets-summary"]] +# ) +# data = ujson.loads(datasets) +# if "assemblies" not in data: +# LOGGER.error("unable to fetch assemblies for %s", opts["ncbi-datasets-summary"]) +# print(data) +# sys.exit(1) +# for record in data["assemblies"]: +# parse_ncbi_datasets_summary(record, parsed) +# print(parsed) +# quit() +# # parsed = {} +# # with tofile.open_file_handle( +# # "%s/ncbi_dataset/data/assembly_data_report.jsonl" % directory +# # ) as report: +# # for line in report: +# # record = ujson.loads(line) +# # parse_ncbi_datasets_record(record, parsed) +# return [value for value in parsed.values()] diff --git a/src/genomehubs/lib/parse.py b/src/genomehubs/lib/parse.py index 43e47401..64f80dbd 100644 --- a/src/genomehubs/lib/parse.py +++ b/src/genomehubs/lib/parse.py @@ -7,7 +7,6 @@ genomehubs parse [--btk] [--btk-root STRING...] [--wikidata PATH] [--wikidata-root STRING...] [--wikidata-xref STRING...] [--gbif] [--gbif-root STRING...] [--gbif-xref STRING...] - [--ncbi-datasets-summary INT] [--ncbi-datasets-genome PATH] [--outfile PATH] [--refseq-mitochondria] [--refseq-organelles] [--refseq-plastids] [--refseq-root NAME] @@ -22,7 +21,6 @@ --wikidata PATH Parse taxa in WikiData dump --wikidata-root STRING WikiData taxon ID of root taxon --wikidata-xref STRING Include link to external reference from WikiData (e.g. NBN, BOLD) - --ncbi-datasets-summary INT Fetch and parse NCBI Datasets summary for a root taxId --ncbi-datasets-genome PATH Parse NCBI Datasets genome directory --outfile PATH Save parsed output to file --refseq-mitochondria Parse mitochondrial genomes from the NCBI RefSeq @@ -49,7 +47,8 @@ from .gbif import gbif_parser from .hub import load_types from .hub import order_parsed_fields -from .ncbi import ncbi_datasets_summary_parser + +# from .ncbi import ncbi_datasets_summary_parser from .ncbi import ncbi_genome_parser from .ncbi import refseq_organelle_parser from .version import __version__ @@ -65,11 +64,11 @@ "params": None, "types": "assembly", }, - "ncbi-datasets-summary": { - "func": ncbi_datasets_summary_parser, - "params": None, - "types": "assembly", - }, + # "ncbi-datasets-summary": { + # "func": ncbi_datasets_summary_parser, + # "params": None, + # "types": "assembly", + # }, "refseq-mitochondria": { "func": refseq_organelle_parser, "params": ("mitochondrion"), diff --git a/src/genomehubs/templates/assembly.types.yaml b/src/genomehubs/templates/assembly.types.yaml index cb716fb8..8c3c210e 100644 --- a/src/genomehubs/templates/assembly.types.yaml +++ b/src/genomehubs/templates/assembly.types.yaml @@ -8,8 +8,8 @@ file: taxonomy: taxon_id: header: taxId - species: - header: speciesName + taxon: + header: organismName names: common_name: header: commonName @@ -133,10 +133,12 @@ attributes: taxon_name: assembly_span taxon_key: assembly_span taxon_display_name: Assembly span - taxon_summary: median + taxon_summary: + - median + - min + - max taxon_traverse: median taxon_traverse_direction: both - taxon_traverse_limit: superkingdom taxon_display_level: 1 taxon_bins: min: 6 @@ -161,6 +163,21 @@ attributes: header: contigN50 type: long units: bases + taxon_display_group: assembly + taxon_name: contig_n50 + taxon_key: contig_n50 + taxon_summary: + - median + - min + - max + taxon_traverse: median + taxon_traverse_direction: both + taxon_display_level: 2 + taxon_bins: + min: 4 + max: 9 + count: 10 + scale: log10 contig_l50: display_group: metrics display_level: 2 @@ -179,6 +196,21 @@ attributes: header: scaffoldN50 type: long units: bases + taxon_display_group: assembly + taxon_name: scaffold_n50 + taxon_key: scaffold_n50 + taxon_summary: + - median + - min + - max + taxon_traverse: median + taxon_traverse_direction: both + taxon_display_level: 2 + taxon_bins: + min: 4 + max: 9 + count: 10 + scale: log10 scaffold_l50: display_group: metrics display_level: 2 From a3baf237c5d4b8b0049e0386881c0a755cf83ee7 Mon Sep 17 00:00:00 2001 From: Richard Challis Date: Thu, 8 Apr 2021 15:32:45 +0100 Subject: [PATCH 3/5] set refseq as primary value --- src/genomehubs/lib/hub.py | 9 ++++++--- src/genomehubs/lib/ncbi.py | 14 ++++++++++---- src/genomehubs/templates/assembly.types.yaml | 5 +++++ 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/src/genomehubs/lib/hub.py b/src/genomehubs/lib/hub.py index a068f9a1..0dd5a0bd 100644 --- a/src/genomehubs/lib/hub.py +++ b/src/genomehubs/lib/hub.py @@ -550,9 +550,12 @@ def process_row(types, names, row): taxon_data = {} taxon_types = {} if "is_primary_value" in data["metadata"]: - data["metadata"]["is_primary_value"] = bool( - int(data["metadata"]["is_primary_value"]) - ) + try: + data["metadata"]["is_primary_value"] = bool( + int(data["metadata"]["is_primary_value"]) + ) + except ValueError: + data["metadata"]["is_primary_value"] = False for attr_type in list(["attributes", "identifiers"]): if attr_type in data and data[attr_type]: ( diff --git a/src/genomehubs/lib/ncbi.py b/src/genomehubs/lib/ncbi.py index 26272348..32b4c5ec 100644 --- a/src/genomehubs/lib/ncbi.py +++ b/src/genomehubs/lib/ncbi.py @@ -168,7 +168,7 @@ def parse_listing(listing, collection, opts): return parsed -def refseq_organelle_parser(collections, opts, *args, **kwargs): +def refseq_organelle_parser(collections, opts): """Fetch and parse RefSeq organelle collections.""" parsed = [] if isinstance(collections, tuple): @@ -190,7 +190,7 @@ def parse_ncbi_datasets_record(record, parsed): """Parse a single NCBI datasets record.""" obj = {} for key in ("taxId", "organismName", "commonName", "isolate", "sex"): - obj[key] = record.get(key, None) + obj[key] = record.get(key, "None") assemblyInfo = record.get("assemblyInfo", {}) for key in ( "assemblyLevel", @@ -204,9 +204,15 @@ def parse_ncbi_datasets_record(record, parsed): "submitter", ): obj[key] = assemblyInfo.get(key, None) + if key == "refseqCategory": + if obj[key] == "representative genome": + obj["primaryValue"] = 1 + else: + obj["primaryValue"] = None if obj["refseqAssmAccession"] == "na": obj["refseqAssmAccession"] = None obj["refseqCategory"] = None + obj["primaryValue"] = None annotationInfo = record.get("annotationInfo", {}) if annotationInfo: annot = {} @@ -233,11 +239,11 @@ def parse_ncbi_datasets_record(record, parsed): parsed[obj["genbankAssmAccession"]] = obj -def ncbi_genome_parser(directory, opts, *args, **kwargs): +def ncbi_genome_parser(_params, opts, *, types=None, names=None): """Parse NCBI Datasets genome report.""" parsed = {} with tofile.open_file_handle( - "%s/ncbi_dataset/data/assembly_data_report.jsonl" % directory + "%s/ncbi_dataset/data/assembly_data_report.jsonl" % opts["ncbi-datasets-genome"] ) as report: for line in report: record = ujson.loads(line) diff --git a/src/genomehubs/templates/assembly.types.yaml b/src/genomehubs/templates/assembly.types.yaml index 8c3c210e..78f39216 100644 --- a/src/genomehubs/templates/assembly.types.yaml +++ b/src/genomehubs/templates/assembly.types.yaml @@ -134,6 +134,7 @@ attributes: taxon_key: assembly_span taxon_display_name: Assembly span taxon_summary: + - primary - median - min - max @@ -167,6 +168,7 @@ attributes: taxon_name: contig_n50 taxon_key: contig_n50 taxon_summary: + - primary - median - min - max @@ -200,6 +202,7 @@ attributes: taxon_name: scaffold_n50 taxon_key: scaffold_n50 taxon_summary: + - primary - median - min - max @@ -274,3 +277,5 @@ attributes: metadata: source_slug: header: genbankAssmAccession + is_primary_value: + header: primaryValue From 11c5ede5b34265f103ab2b31d0f5641138218488 Mon Sep 17 00:00:00 2001 From: Richard Challis Date: Fri, 9 Apr 2021 13:39:17 +0100 Subject: [PATCH 4/5] set source when taxon data derived from assembly --- src/genomehubs/lib/attributes.py | 53 +++++++++++++++++++++++++++++--- src/genomehubs/lib/hub.py | 5 ++- 2 files changed, 53 insertions(+), 5 deletions(-) diff --git a/src/genomehubs/lib/attributes.py b/src/genomehubs/lib/attributes.py index aefc5e61..0d191e91 100644 --- a/src/genomehubs/lib/attributes.py +++ b/src/genomehubs/lib/attributes.py @@ -37,18 +37,63 @@ def index(es, group, attributes, opts, *, index_type="attribute"): return template, stream +# def fetch_types(es, opts): +# """Fetch all existing types.""" +# template = index_template(opts, index_type="attribute") +# body = { +# "id": "attribute_types", +# "params": {}, +# } +# entries = stream_template_search_results( +# es, index=template["index_name"], body=body +# ) +# return {entry["key"]: entry for entry in entries} + + +def add_attribute_sources(name, obj, attributes): + """Generate a list of attribute sources.""" + for key, value in attributes[name].items(): + if key.startswith("source"): + if key in obj: + if not isinstance(obj[key], list): + obj[key] = [obj[key]] + obj[key].append(value) + else: + obj[key] = value + + def index_types(es, types_name, types, opts): """Index types into Elasticsearch.""" + # TODO: fetch existing types to allow new sources to add, not overwrite + try: + attributes = fetch_types(es, types_name, opts) + except Exception: + attributes = {} if "attributes" in types: - if "defaults" in types and "attributes" in types["defaults"]: - for key, value in types["attributes"].items(): + new_attributes = {} + existing_attributes = {} + for key, value in types["attributes"].items(): + if "defaults" in types and "attributes" in types["defaults"]: value = {**types["defaults"]["attributes"], **value} - types["attributes"][key] = value + # types["attributes"][key] = value + if key in attributes: + existing_attributes[key] = value + add_attribute_sources(key, value, attributes) + else: + new_attributes[key] = value template, stream = index( - es, types_name, types["attributes"], opts, index_type="attribute" + es, types_name, new_attributes, opts, index_type="attribute" + ) + template, update_stream = index( + es, + types_name, + existing_attributes, + opts, + index_type="attribute", ) load_mapping(es, template["name"], template["mapping"]) index_stream(es, template["index_name"], stream) + index_stream(es, template["index_name"], update_stream, _op_type="update") if "taxon_names" in types: if "defaults" in types and "taxon_names" in types["defaults"]: for key, value in types["names"].items(): diff --git a/src/genomehubs/lib/hub.py b/src/genomehubs/lib/hub.py index 0dd5a0bd..6950ddd8 100644 --- a/src/genomehubs/lib/hub.py +++ b/src/genomehubs/lib/hub.py @@ -356,7 +356,9 @@ def add_attributes( else: attribute = {"identifier": validated, "class": key} attribute.update(meta) - if source is not None: + if "source" in types[key]: + attribute.update({"source": types[key]["source"]}) + elif source is not None: attribute.update({"source": source}) attributes.append(attribute) if attribute_values: @@ -470,6 +472,7 @@ def validate_types_file(types_file, dir_path): if key.startswith("display") or key.startswith("taxon"): defaults["attributes"].update({key: value}) elif key.startswith("source"): + defaults["attributes"].update({key: value}) defaults["metadata"].update({key: value}) types.update({"defaults": defaults}) data = tofile.open_file_handle(Path(dir_path) / types["file"]["name"]) From 4f3650737a4cd6a2ce06a671b04a2f0304998d42 Mon Sep 17 00:00:00 2001 From: Richard Challis Date: Fri, 9 Apr 2021 13:58:28 +0100 Subject: [PATCH 5/5] show assembly level in taxon index --- src/genomehubs/templates/assembly.types.yaml | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/genomehubs/templates/assembly.types.yaml b/src/genomehubs/templates/assembly.types.yaml index 78f39216..22228938 100644 --- a/src/genomehubs/templates/assembly.types.yaml +++ b/src/genomehubs/templates/assembly.types.yaml @@ -63,7 +63,11 @@ attributes: taxon_display_group: assembly taxon_name: gene_count taxon_key: gene_count - taxon_summary: median + taxon_summary: + - primary + - median + - min + - max taxon_display_level: 2 taxon_bins: min: 0 @@ -103,7 +107,9 @@ attributes: taxon_name: sample_sex taxon_key: sample_sex taxon_display_name: Sample sex - taxon_summary: list + taxon_summary: + - primary + - list taxon_display_level: 2 isolate: display_level: 2 @@ -115,6 +121,14 @@ attributes: header: assemblyLevel display_name: Assembly level type: keyword + taxon_display_group: assembly + taxon_name: assembly_level + taxon_key: assembly_level + taxon_display_name: Assembly level + taxon_summary: + - primary + - list + taxon_display_level: 2 assembly_type: display_level: 2 header: assemblyType