From 7494f324c8b1fa698c4f773cd95d43cb70ac104f Mon Sep 17 00:00:00 2001
From: dustine32 <dustine32@gmail.com>
Date: Mon, 22 Mar 2021 15:23:16 -0700
Subject: [PATCH 1/3] Gocamgen reports: Logical def errors and GO rules

---
 bin/validate.py                          | 11 ++++++++---
 ontobio/rdfgen/gocamgen/errors.py        | 14 +++++++++++++-
 ontobio/rdfgen/gocamgen/gocam_builder.py |  6 ++++--
 ontobio/rdfgen/gocamgen/gocamgen.py      |  5 ++++-
 4 files changed, 29 insertions(+), 7 deletions(-)

diff --git a/bin/validate.py b/bin/validate.py
index 1c51541d..fabbdeee 100644
--- a/bin/validate.py
+++ b/bin/validate.py
@@ -566,9 +566,11 @@ def gpad2gocams(ctx, gpad_path, gpi_path, target, ontology, ttl):
     gpad_basename = os.path.basename(gpad_path)
     gpad_basename_root, gpad_ext = os.path.splitext(gpad_basename)
     output_basename = "{}.nq".format(gpad_basename_root)
-    report_basename = "{}.gocamgen.report".format(gpad_basename_root)
+    parse_report_basename = "{}.parser.report".format(gpad_basename_root)
+    model_report_basename = "{}.gocamgen.report".format(gpad_basename_root)
     output_path = os.path.join(absolute_target, output_basename)
-    report_path = os.path.join(absolute_target, report_basename)
+    parse_report_path = os.path.join(absolute_target, parse_report_basename)
+    model_report_path = os.path.join(absolute_target, model_report_basename)
 
     builder = GoCamBuilder(parser_config=parser_config)
 
@@ -580,7 +582,10 @@ def gpad2gocams(ctx, gpad_path, gpi_path, target, ontology, ttl):
     if not ttl:
         builder.write_out_store_to_nquads(filepath=output_path)
 
-    builder.write_report(report_filepath=report_path)
+    # Reports
+    builder.write_report(report_filepath=model_report_path)
+    with open(parse_report_path, 'w') as prp:
+        prp.write(extractor.gpad_parser.report.to_markdown())
 
 
 @cli.command()
diff --git a/ontobio/rdfgen/gocamgen/errors.py b/ontobio/rdfgen/gocamgen/errors.py
index 2aa2cda3..70d281d2 100644
--- a/ontobio/rdfgen/gocamgen/errors.py
+++ b/ontobio/rdfgen/gocamgen/errors.py
@@ -1,5 +1,8 @@
+from ontobio.rdfgen.gocamgen import collapsed_assoc
+
+
 class GocamgenException(Exception):
-    def __init__(self, message):
+    def __init__(self, message: str):
         self.message = message
 
     def __str__(self):
@@ -14,6 +17,15 @@ class ShexException(GocamgenException):
     pass
 
 
+class CollapsedAssocGocamgenException(GocamgenException):
+    def __init__(self, message: str, assoc: collapsed_assoc.CollapsedAssociation):
+        self.message = message
+        self.assoc = assoc
+
+    def __str__(self):
+        return "{}\n{}".format(self.message, "\n".join([l.source_line for l in self.assoc.lines]))
+
+
 class GeneErrorSet:
     def __init__(self):
         self.errors = {}
diff --git a/ontobio/rdfgen/gocamgen/gocam_builder.py b/ontobio/rdfgen/gocamgen/gocam_builder.py
index 629450b4..ebd1d878 100644
--- a/ontobio/rdfgen/gocamgen/gocam_builder.py
+++ b/ontobio/rdfgen/gocamgen/gocam_builder.py
@@ -76,6 +76,8 @@ def make_model(self, gene, annotations: List[GoAssociation], output_directory=No
                     model.write(out_filename)
                     logger.info("Model for {} written to {} in {} sec".format(gene, out_filename,
                                                                               (time.time() - start_time)))
+                for err in model.errors:
+                    self.errors.add_error(gene, err)
                 return model
             except GocamgenException as ex:
                 self.errors.add_error(gene, ex)
@@ -156,13 +158,13 @@ def parse_gpi(gpi_file):
 class AssocExtractor:
     def __init__(self, gpad_file, parser_config: AssocParserConfig):
         self.assocs = []
-        gpad_parser = gpadparser.GpadParser(config=parser_config)
+        self.gpad_parser = gpadparser.GpadParser(config=parser_config)
         with open(gpad_file) as sg:
             lines = sum(1 for line in sg)
 
         with open(gpad_file) as gf:
             click.echo("Making products...")
-            with click.progressbar(iterable=gpad_parser.association_generator(file=gf, skipheader=True),
+            with click.progressbar(iterable=self.gpad_parser.association_generator(file=gf, skipheader=True),
                                    length=lines) as associations:
                 self.assocs = list(associations)
 
diff --git a/ontobio/rdfgen/gocamgen/gocamgen.py b/ontobio/rdfgen/gocamgen/gocamgen.py
index ffb07026..222830bc 100644
--- a/ontobio/rdfgen/gocamgen/gocamgen.py
+++ b/ontobio/rdfgen/gocamgen/gocamgen.py
@@ -463,6 +463,7 @@ def __init__(self, modeltitle, assocs: List[GoAssociation], config: AssocParserC
         self.provided_bys = set()
         self.graph.bind("GOREL", GOREL)  # Because GOREL isn't in context.jsonld's
         self.gpi_entities = gpi_entities
+        self.errors: List[errors.GocamgenException] = []
         ncbi_taxon = self.taxon_id_from_entity(str(assocs[0].subject.id))
         # Emit model-level in_taxon triple from ncbi_taxon
         if ncbi_taxon:
@@ -615,7 +616,9 @@ def translate(self):
                                     ext_target_n = annot_subgraph.add_instance_of_class(ext_target)
                                     annot_subgraph.add_edge(regulated_term_n, chained_rel, ext_target_n)
                                 else:
-                                    logger.warning("Couldn't get regulates relation from LD of: {}".format(term))
+                                    err_msg = "Couldn't get regulates relation from LD of: {}".format(term)
+                                    logger.warning(err_msg)
+                                    self.errors.append(errors.CollapsedAssocGocamgenException(err_msg, a))
                             elif ext_relation in HAS_REGULATION_TARGET_RELATIONS:
                                 if aspect == 'P':
                                     # For BP annotations, translate 'has regulation target' to 'has input'.

From bee51e36f814ce688430144a3831d85358c6860d Mon Sep 17 00:00:00 2001
From: dustine32 <dustine32@gmail.com>
Date: Mon, 22 Mar 2021 15:33:32 -0700
Subject: [PATCH 2/3] Rm gpad_extensions_mapper and referencing code

---
 ontobio/rdfgen/gocamgen/gocamgen.py           | 244 ++++---
 .../rdfgen/gocamgen/gpad_extensions_mapper.py | 662 ------------------
 2 files changed, 119 insertions(+), 787 deletions(-)
 delete mode 100644 ontobio/rdfgen/gocamgen/gpad_extensions_mapper.py

diff --git a/ontobio/rdfgen/gocamgen/gocamgen.py b/ontobio/rdfgen/gocamgen/gocamgen.py
index 222830bc..4cf24d02 100644
--- a/ontobio/rdfgen/gocamgen/gocamgen.py
+++ b/ontobio/rdfgen/gocamgen/gocamgen.py
@@ -547,133 +547,127 @@ def translate(self):
                     annot_subgraph = self.translate_primary_annotation(a)
 
                     intersection_extensions: List[ExtensionUnit] = dedupe_extensions(uo.elements)
-                    # is_cool = self.extensions_mapper.annot_following_rules(intersection_extensions, aspect, term)
-                    is_cool = True  # Open the flood gates
-                    if is_cool:
-                        logger.debug("GOOD: {}".format(ext_str))
-                        # Nesting repeated extension relations (i.e. occurs_in, part_of)
-                        ext_rels_to_nest = ['occurs_in', 'part_of']  # Switch to turn on/off extension nesting
-                        for ertn in ext_rels_to_nest:
-                            nest_exts = [ext for ext in intersection_extensions if relation_equals(ext.relation, ertn)]
-                            if len(nest_exts) > 1:
-                                # Sort by specific term to general term
-                                sorted_nest_ext_terms = sort_terms_by_ontology_specificity(
-                                    [ne.term for ne in nest_exts])
-                                # Translate
-                                loc_subj_n = annot_subgraph.get_anchor()
-                                for idx, ne_term in enumerate(sorted_nest_ext_terms):
-                                    # location_relation could be part_of, occurs_in, or located_in
-                                    # Figure out what types of classes these are
-                                    # Use case here is matching to ShEx shape class
-                                    subj_shape = SHEX_HELPER.shape_from_class(AnnotationSubgraph.node_class(loc_subj_n),
-                                                                              self.go_aspector)
-                                    loc_obj_n = annot_subgraph.add_instance_of_class(ne_term)
-                                    obj_shape = SHEX_HELPER.shape_from_class(str(ne_term),
-                                                                             self.go_aspector)
-                                    # location_relation = "BFO:0000050"  # part_of
-                                    location_relation = SHEX_HELPER.relation_lookup(subj_shape, obj_shape)
-                                    if idx == 0 and ertn == "occurs_in":
-                                        location_relation = "BFO:0000066"  # occurs_in - because MF -> @<AnatomicalEntity> OR @<CellularComponent>
-                                    annot_subgraph.add_edge(loc_subj_n, location_relation, loc_obj_n)
-
-                                    loc_subj_n = loc_obj_n  # For next iteration
-                                # Remove from intersection_extensions because this is now already translated
-                                [intersection_extensions.remove(ext) for ext in nest_exts]
-                        for rel in intersection_extensions:
-                            ext_relation = relations.lookup_uri(expand_uri_wrapper(str(rel.relation)), default="")
-                            if ext_relation == "":
-                                # AttributeError: 'NoneType' object has no attribute 'replace'
-                                error_msg = "Relation '{}' not found in relations lookup. Skipping annotation translation.".format(str(rel.relation))
-                                raise errors.GocamgenException(error_msg)
-                            ext_target = str(rel.term)
-                            if ext_relation not in list(INPUT_RELATIONS.keys()) + list(HAS_REGULATION_TARGET_RELATIONS.keys()):
-                                # No RO term yet. Try looking up in RO
-                                relation_term = self.translate_relation_to_ro(ext_relation)
-                                if relation_term:
-                                    # print("Ext relation {} auto-mapped to {} in {}".format(ext_relation, relation_term, a.subject_id()))
-                                    INPUT_RELATIONS[ext_relation] = relation_term
-                            if ext_relation in INPUT_RELATIONS:
+
+                    # Nesting repeated extension relations (i.e. occurs_in, part_of)
+                    ext_rels_to_nest = ['occurs_in', 'part_of']  # Switch to turn on/off extension nesting
+                    for ertn in ext_rels_to_nest:
+                        nest_exts = [ext for ext in intersection_extensions if relation_equals(ext.relation, ertn)]
+                        if len(nest_exts) > 1:
+                            # Sort by specific term to general term
+                            sorted_nest_ext_terms = sort_terms_by_ontology_specificity(
+                                [ne.term for ne in nest_exts])
+                            # Translate
+                            loc_subj_n = annot_subgraph.get_anchor()
+                            for idx, ne_term in enumerate(sorted_nest_ext_terms):
+                                # location_relation could be part_of, occurs_in, or located_in
+                                # Figure out what types of classes these are
+                                # Use case here is matching to ShEx shape class
+                                subj_shape = SHEX_HELPER.shape_from_class(AnnotationSubgraph.node_class(loc_subj_n),
+                                                                          self.go_aspector)
+                                loc_obj_n = annot_subgraph.add_instance_of_class(ne_term)
+                                obj_shape = SHEX_HELPER.shape_from_class(str(ne_term),
+                                                                         self.go_aspector)
+                                # location_relation = "BFO:0000050"  # part_of
+                                location_relation = SHEX_HELPER.relation_lookup(subj_shape, obj_shape)
+                                if idx == 0 and ertn == "occurs_in":
+                                    location_relation = "BFO:0000066"  # occurs_in - because MF -> @<AnatomicalEntity> OR @<CellularComponent>
+                                annot_subgraph.add_edge(loc_subj_n, location_relation, loc_obj_n)
+
+                                loc_subj_n = loc_obj_n  # For next iteration
+                            # Remove from intersection_extensions because this is now already translated
+                            [intersection_extensions.remove(ext) for ext in nest_exts]
+                    for rel in intersection_extensions:
+                        ext_relation = relations.lookup_uri(expand_uri_wrapper(str(rel.relation)), default="")
+                        if ext_relation == "":
+                            # AttributeError: 'NoneType' object has no attribute 'replace'
+                            error_msg = "Relation '{}' not found in relations lookup. Skipping annotation translation.".format(str(rel.relation))
+                            raise errors.GocamgenException(error_msg)
+                        ext_target = str(rel.term)
+                        if ext_relation not in list(INPUT_RELATIONS.keys()) + list(HAS_REGULATION_TARGET_RELATIONS.keys()):
+                            # No RO term yet. Try looking up in RO
+                            relation_term = self.translate_relation_to_ro(ext_relation)
+                            if relation_term:
+                                # print("Ext relation {} auto-mapped to {} in {}".format(ext_relation, relation_term, a.subject_id()))
+                                INPUT_RELATIONS[ext_relation] = relation_term
+                        if ext_relation in INPUT_RELATIONS:
+                            ext_target_n = annot_subgraph.add_instance_of_class(ext_target)
+                            # Need to find what mf we're talking about
+                            anchor_n = annot_subgraph.get_anchor()
+                            annot_subgraph.add_edge(anchor_n, INPUT_RELATIONS[ext_relation], ext_target_n)
+                        # elif ext_relation in REGULATES_CHAIN_RELATIONS:
+                        elif ext_relation.startswith("regulates_o_"):
+                            # Get target MF from primary term (BP) e.g. GO:0007346 regulates some mitotic cell cycle
+                            regulates_rel, regulated_term = self.get_rel_and_term_in_logical_definitions(term)
+                            if regulates_rel:
+                                # Need to derive chained relation (e.g. "occurs_in") from this ext rel. Just replace("regulates_o_", "")?
+                                chained_rel_label = ext_relation.replace("regulates_o_", "")
+                                chained_rel = INPUT_RELATIONS.get(chained_rel_label)
+                                if chained_rel is None:
+                                    chained_rel = self.translate_relation_to_ro(chained_rel_label)
+                                if chained_rel is None:
+                                    # We have tried all we can for this poor thing. Skip translating this extension.
+                                    continue
+                                regulated_term_n = annot_subgraph.add_instance_of_class(regulated_term)
+                                anchor_n = annot_subgraph.get_anchor()
+                                annot_subgraph.add_edge(anchor_n, regulates_rel, regulated_term_n)
+                                ext_target_n = annot_subgraph.add_instance_of_class(ext_target)
+                                annot_subgraph.add_edge(regulated_term_n, chained_rel, ext_target_n)
+                            else:
+                                err_msg = "Couldn't get regulates relation from LD of: {}".format(term)
+                                logger.warning(err_msg)
+                                self.errors.append(errors.CollapsedAssocGocamgenException(err_msg, a))
+                        elif ext_relation in HAS_REGULATION_TARGET_RELATIONS:
+                            if aspect == 'P':
+                                # For BP annotations, translate 'has regulation target' to 'has input'.
                                 ext_target_n = annot_subgraph.add_instance_of_class(ext_target)
-                                # Need to find what mf we're talking about
                                 anchor_n = annot_subgraph.get_anchor()
-                                annot_subgraph.add_edge(anchor_n, INPUT_RELATIONS[ext_relation], ext_target_n)
-                            # elif ext_relation in REGULATES_CHAIN_RELATIONS:
-                            elif ext_relation.startswith("regulates_o_"):
-                                # Get target MF from primary term (BP) e.g. GO:0007346 regulates some mitotic cell cycle
-                                regulates_rel, regulated_term = self.get_rel_and_term_in_logical_definitions(term)
-                                if regulates_rel:
-                                    # Need to derive chained relation (e.g. "occurs_in") from this ext rel. Just replace("regulates_o_", "")?
-                                    chained_rel_label = ext_relation.replace("regulates_o_", "")
-                                    chained_rel = INPUT_RELATIONS.get(chained_rel_label)
-                                    if chained_rel is None:
-                                        chained_rel = self.translate_relation_to_ro(chained_rel_label)
-                                    if chained_rel is None:
-                                        # We have tried all we can for this poor thing. Skip translating this extension.
-                                        continue
-                                    regulated_term_n = annot_subgraph.add_instance_of_class(regulated_term)
-                                    anchor_n = annot_subgraph.get_anchor()
-                                    annot_subgraph.add_edge(anchor_n, regulates_rel, regulated_term_n)
-                                    ext_target_n = annot_subgraph.add_instance_of_class(ext_target)
-                                    annot_subgraph.add_edge(regulated_term_n, chained_rel, ext_target_n)
-                                else:
-                                    err_msg = "Couldn't get regulates relation from LD of: {}".format(term)
-                                    logger.warning(err_msg)
-                                    self.errors.append(errors.CollapsedAssocGocamgenException(err_msg, a))
-                            elif ext_relation in HAS_REGULATION_TARGET_RELATIONS:
-                                if aspect == 'P':
-                                    # For BP annotations, translate 'has regulation target' to 'has input'.
-                                    ext_target_n = annot_subgraph.add_instance_of_class(ext_target)
-                                    anchor_n = annot_subgraph.get_anchor()
-                                    annot_subgraph.add_edge(anchor_n, INPUT_RELATIONS['has_input'], ext_target_n)
-                                else:
-                                    buckets = has_regulation_target_bucket(self.ontology, term)
-                                    if len(buckets) > 0:
-                                        bucket = buckets[0]  # Or express all buckets?
-                                        # Four buckets
-                                        if bucket in ["a", "d"]:
-                                            regulates_rel, regulated_mf = self.get_rel_and_term_in_logical_definitions(term)
-                                            if regulates_rel and regulated_mf:
-                                                # [GP-A]<-enabled_by-[root MF]-regulates->[molecular function Z]-enabled_by->[GP-B]
-                                                ext_target_n = annot_subgraph.add_instance_of_class(ext_target)
-                                                regulated_mf_n = annot_subgraph.add_instance_of_class(regulated_mf)
-                                                annot_subgraph.add_edge(regulated_mf_n, ro.enabled_by, ext_target_n)
-                                                anchor_n = annot_subgraph.get_anchor()
-                                                annot_subgraph.add_edge(anchor_n, regulates_rel, regulated_mf_n)
-                                                # TODO: Suppress/delete (GP-A)<-enabled_by-(root MF)-part_of->(term) aka involved_in_translated
-                                                # Remove (anchor_uri, None, term)
-                                                # Is this anchor_uri always going to the root_mf?
-                                                # Will the term individual be used for anything else?
-                                                #   Other comma-delimited extensions on same annotation?
-                                                # has_during? occurs_in?
-                                                # WB:WBGene00001173 GO:0051343 ['has_regulation_target', 'occurs_in'] ['a']
-                                                # WB:WBGene00006652 GO:0045944 ['has_regulation_target', 'occurs_in'] ['b']
-                                                # WB:WBGene00003639 GO:0036003 ['happens_during', 'has_regulation_target'] ['b']
-                                                # Other comma-delimited extensions (e.g. happens_during, has_input) need this triple?
-                                                # WB:WBGene00002335 GO:1902685 ['has_regulation_target', 'occurs_in'] ['d']
-                                            else:
-                                                logger.warning("Couldn't get regulates relation and/or regulated term from LD of: {}".format(term))
-                                        elif bucket in ["b", "c"]:
-                                            regulates_rel, regulated_term = self.get_rel_and_term_in_logical_definitions(term)
-                                            if regulates_rel:
-                                                # find 'Y subPropertyOf regulates_rel' in RO where Y will be `causally
-                                                # upstream of` relation
-                                                # Ex. GO:0045944 -> RO:0002213 -> RO:0002304
-                                                # edges(RO:0002213) only returns subProperties. Need superProperties
-                                                # Gettin super properties
-                                                causally_upstream_relation = self.get_causally_upstream_relation(regulates_rel)
-                                                # GP-A<-enabled_by-[root MF]-part_of->[regulation of Z]-has_input->GP-B,-causally upstream of (positive/negative effect)->[root MF]-enabled_by->GP-B
-                                                ext_target_n = annot_subgraph.add_instance_of_class(ext_target)
-                                                anchor_n = annot_subgraph.get_anchor()  # TODO: Gotta find MF. MF no longer anchor if primary term is BP
-                                                annot_subgraph.add_edge(anchor_n, INPUT_RELATIONS["has input"], ext_target_n)
-                                                root_mf_b_n = annot_subgraph.add_instance_of_class(upt.molecular_function)
-                                                annot_subgraph.add_edge(anchor_n, causally_upstream_relation, root_mf_b_n)
-                                                annot_subgraph.add_edge(root_mf_b_n, ro.enabled_by, ext_target_n)
-                                                # WB:WBGene00001574 GO:1903363 ['happens_during', 'has_regulation_target'] ['c', 'd']
-                                            else:
-                                                logger.warning("Couldn't get regulates relation from LD of: {}".format(term))
-
-                    else:
-                        logger.debug("BAD: {}".format(ext_str))
+                                annot_subgraph.add_edge(anchor_n, INPUT_RELATIONS['has_input'], ext_target_n)
+                            else:
+                                buckets = has_regulation_target_bucket(self.ontology, term)
+                                if len(buckets) > 0:
+                                    bucket = buckets[0]  # Or express all buckets?
+                                    # Four buckets
+                                    if bucket in ["a", "d"]:
+                                        regulates_rel, regulated_mf = self.get_rel_and_term_in_logical_definitions(term)
+                                        if regulates_rel and regulated_mf:
+                                            # [GP-A]<-enabled_by-[root MF]-regulates->[molecular function Z]-enabled_by->[GP-B]
+                                            ext_target_n = annot_subgraph.add_instance_of_class(ext_target)
+                                            regulated_mf_n = annot_subgraph.add_instance_of_class(regulated_mf)
+                                            annot_subgraph.add_edge(regulated_mf_n, ro.enabled_by, ext_target_n)
+                                            anchor_n = annot_subgraph.get_anchor()
+                                            annot_subgraph.add_edge(anchor_n, regulates_rel, regulated_mf_n)
+                                            # TODO: Suppress/delete (GP-A)<-enabled_by-(root MF)-part_of->(term) aka involved_in_translated
+                                            # Remove (anchor_uri, None, term)
+                                            # Is this anchor_uri always going to the root_mf?
+                                            # Will the term individual be used for anything else?
+                                            #   Other comma-delimited extensions on same annotation?
+                                            # has_during? occurs_in?
+                                            # WB:WBGene00001173 GO:0051343 ['has_regulation_target', 'occurs_in'] ['a']
+                                            # WB:WBGene00006652 GO:0045944 ['has_regulation_target', 'occurs_in'] ['b']
+                                            # WB:WBGene00003639 GO:0036003 ['happens_during', 'has_regulation_target'] ['b']
+                                            # Other comma-delimited extensions (e.g. happens_during, has_input) need this triple?
+                                            # WB:WBGene00002335 GO:1902685 ['has_regulation_target', 'occurs_in'] ['d']
+                                        else:
+                                            logger.warning("Couldn't get regulates relation and/or regulated term from LD of: {}".format(term))
+                                    elif bucket in ["b", "c"]:
+                                        regulates_rel, regulated_term = self.get_rel_and_term_in_logical_definitions(term)
+                                        if regulates_rel:
+                                            # find 'Y subPropertyOf regulates_rel' in RO where Y will be `causally
+                                            # upstream of` relation
+                                            # Ex. GO:0045944 -> RO:0002213 -> RO:0002304
+                                            # edges(RO:0002213) only returns subProperties. Need superProperties
+                                            # Gettin super properties
+                                            causally_upstream_relation = self.get_causally_upstream_relation(regulates_rel)
+                                            # GP-A<-enabled_by-[root MF]-part_of->[regulation of Z]-has_input->GP-B,-causally upstream of (positive/negative effect)->[root MF]-enabled_by->GP-B
+                                            ext_target_n = annot_subgraph.add_instance_of_class(ext_target)
+                                            anchor_n = annot_subgraph.get_anchor()  # TODO: Gotta find MF. MF no longer anchor if primary term is BP
+                                            annot_subgraph.add_edge(anchor_n, INPUT_RELATIONS["has input"], ext_target_n)
+                                            root_mf_b_n = annot_subgraph.add_instance_of_class(upt.molecular_function)
+                                            annot_subgraph.add_edge(anchor_n, causally_upstream_relation, root_mf_b_n)
+                                            annot_subgraph.add_edge(root_mf_b_n, ro.enabled_by, ext_target_n)
+                                            # WB:WBGene00001574 GO:1903363 ['happens_during', 'has_regulation_target'] ['c', 'd']
+                                        else:
+                                            logger.warning("Couldn't get regulates relation from LD of: {}".format(term))
                     # For annots w/ extensions, this is where we write subgraph to model
                     annot_subgraph.write_to_model(self, evidences)
         # self.extensions_mapper.go_aspector.write_cache()
diff --git a/ontobio/rdfgen/gocamgen/gpad_extensions_mapper.py b/ontobio/rdfgen/gocamgen/gpad_extensions_mapper.py
deleted file mode 100644
index 088bcebb..00000000
--- a/ontobio/rdfgen/gocamgen/gpad_extensions_mapper.py
+++ /dev/null
@@ -1,662 +0,0 @@
-# from ontobio.io.gafparser import GafParser
-from ontobio.io.gpadparser import GpadParser
-from ontobio.io.assocparser import SplitLine
-from ontobio.ontol_factory import OntologyFactory
-# from prefixcommons import curie_util
-from ontobio.ecomap import EcoMap
-from ontobio.util.go_utils import GoAspector
-from ontobio.rdfgen.assoc_rdfgen import prefix_context
-from .filter_rule import *
-from .collapsed_assoc import extract_properties
-import json
-import csv
-import os
-import argparse
-import logging
-import datetime
-from pathlib import Path
-
-
-DISTINCT_EXTENSIONS = {}
-
-# logging.basicConfig(level=logging.INFO)
-logger = logging.getLogger(__name__)
-logger.setLevel("INFO")
-
-parser = argparse.ArgumentParser()
-parser.add_argument("-f", "--filename")
-parser.add_argument("-d", "--dir")
-parser.add_argument("-c", "--constraints_yaml")
-parser.add_argument("-o", "--out_file")
-parser.add_argument("-l", "--leftovers_out_file")
-parser.add_argument("-p", "--pattern")
-parser.add_argument("-q", "--pattern_outfile")
-parser.add_argument("-r", "--pattern_sourcefile")
-parser.add_argument("-m", "--mod")
-parser.add_argument("-e", "--extensions_list", action='store_const', const=True,
-                    help="Print out distinct extensions list")
-
-ontology_prefixes = []
-for k, v in prefix_context.items():
-    if v.startswith("http://purl.obolibrary.org/obo/"):
-        ontology_prefixes.append(k)
-ontology_prefixes.append("Pfam")
-
-mod_prefixes = [
-    "PomBase",
-    "UniProtKB",
-]
-
-acceptable_evidence_codes = [
-    "EXP",
-    "IDA",
-    "IPI",
-    "IMP",
-    "IGI",
-    "IEP"
-]
-ecomap = EcoMap()
-
-
-def date_fname(fname):
-    date_str = datetime.date.today().isoformat()
-    fname_and_ext = os.path.splitext(fname)
-    new_fname = fname_and_ext[0]
-    new_fname += "_{}".format(date_str.replace("-", ""))
-    new_fname += fname_and_ext[1]
-    return new_fname
-
-class ExtRelationPattern:
-    def __init__(self, ext_relation, ext_namespaces, primary_term_roots=None, max_allowed=None):
-        self.ext_relation = ext_relation
-        self.ext_namespaces = ext_namespaces
-        self.max_allowed = max_allowed  # cardinality?
-        self.primary_term_roots = primary_term_roots if isinstance(primary_term_roots, list) else [primary_term_roots]
-        # TODO: Also handle primary_term_roots == None
-
-
-class ExtRelationValidPattern(ExtRelationPattern):
-    def __init__(self, ext_relation, ext_namespaces, primary_term_roots=None, max_allowed=None):
-        ExtRelationPattern.__init__(self, ext_relation, ext_namespaces, primary_term_roots, max_allowed)
-        self.is_valid = True
-
-
-class ExtRelationInvalidPattern(ExtRelationPattern):
-    def __init__(self, ext_relation, ext_namespaces, primary_term_roots=None, max_allowed=None):
-        ExtRelationPattern.__init__(self, ext_relation, ext_namespaces, primary_term_roots, max_allowed)
-        self.is_valid = False
-
-
-# Extension rules - These should live in separate files
-FUNCTION_SINGLES_ONLY = [
-    "occurs_in(GO:C)",
-    "occurs_in(CL)",
-    "occurs_in(UBERON)",
-    "occurs_in(EMAPA)",
-    "has_input(geneID)",
-    "has_input(CHEBI)",
-    "has_direct_input(geneID)",
-    "has_direct_input(CHEBI)",
-    "happens_during(GO:P)",
-    "part_of(GO:P)",
-    "has_regulation_target(geneID)",
-    "activated_by(CHEBI)",
-    "inhibited_by(CHEBI)"
-]
-COMPONENT_SINGLES_ONLY = [
-    "part_of(GO:C)",
-    "part_of(CL)",
-    "part_of(UBERON)",
-    "part_of(EMAPA)"
-]
-PROCESS_SINGLES_ONLY = [
-    "occurs_in(GO:C)",
-    "occurs_in(CL)",
-    "occurs_in(UBERON)",
-    "occurs_in(EMAPA)",
-    "occurs_in(WBbt)",
-    "has_input(geneID)",
-    "has_input(CHEBI)",
-    "has_direct_input(geneID)",
-    "has_direct_input(CHEBI)",
-    "has_regulation_target(geneID)",
-    "part_of(GO:P)"
-]
-EXTENSION_RELATION_UNIVERSE = []  # Should this also be aspect-specific?
-for r in FUNCTION_SINGLES_ONLY + COMPONENT_SINGLES_ONLY + PROCESS_SINGLES_ONLY:
-    if r not in EXTENSION_RELATION_UNIVERSE:
-        EXTENSION_RELATION_UNIVERSE.append(r)
-# Add remaining extensions not having cardinality restrictions yet we know how to translate them
-EXTENSION_RELATION_UNIVERSE.append([
-    "has_regulation_target(geneID)"
-])
-
-
-gaf_indices = {}
-gpad_indices = {
-
-}
-
-# TODO: Check where Aspector is used in gocamgen. If only for validating extensions we may be able to remove it.
-class CachedGoAspector(GoAspector):
-
-    def __init__(self, cache_filepath=None, go_ontology=None):
-        GoAspector.__init__(self, go_ontology)
-        if cache_filepath is None:
-            # TODO: Where to store cache?
-            cache_filepath = "resources/aspect_lookup.json"
-        self.cache_filepath = cache_filepath
-        self.aspect_lookup = {
-            "F": [],
-            "P": [],
-            "C": []
-        }
-        aspect_file = Path(self.cache_filepath)
-        if not aspect_file.is_file():
-            logger.debug("Creating aspect_lookup cache file: {}".format(self.cache_filepath))
-            self.write_cache()
-        else:
-            with open(cache_filepath) as af:
-                try:
-                    self.aspect_lookup = json.load(af)
-                except json.decoder.JSONDecodeError:
-                    logger.warning("Corrupt aspect_lookup cache file: {} - Recreating...".format(self.cache_filepath))
-
-    def write_cache(self):
-        with open(self.cache_filepath, "w+") as af:
-            json.dump(self.aspect_lookup, af)
-
-    def _is_biological_process(self, go_term):
-        bp_root = "GO:0008150"
-        return go_term in self.aspect_lookup["P"] or go_term == bp_root
-
-    def _is_molecular_function(self, go_term):
-        mf_root = "GO:0003674"
-        return go_term in self.aspect_lookup["F"] or go_term == mf_root
-
-    def _is_cellular_component(self, go_term):
-        cc_root = "GO:0005575"
-        return go_term in self.aspect_lookup["C"] or go_term == cc_root
-
-    def go_aspect(self, term):
-        if self._is_molecular_function(term):
-            return 'F'
-        elif self._is_biological_process(term):
-            return 'P'
-        elif self._is_cellular_component(term):
-            return 'C'
-        else:
-            aspect = super(CachedGoAspector, self).go_aspect(term)
-            if aspect:
-                self.aspect_lookup[aspect].append(term)
-                return aspect
-            return None
-
-
-
-def format_extensions(ext_json):
-    # print(ext_json)
-    ej = json.loads(ext_json)
-    ret = []
-    # print(ej["union_of"][0]["intersection_of"])
-    for i in ej["union_of"][0]["intersection_of"]:
-        ret.append("{}({})".format(i["property"], i["filler"]))
-    # print(ret)
-    return ",".join(ret)
-
-def get_relation_and_term(ext):
-    ext_parts = ext.split("(")
-    relation = ext_parts[0]  # i["property"]
-    try:
-        ext_term = ext_parts[1].split(")")[0]  # i["filler"]
-    except:
-        logger.warning(ext_parts)
-        # [' dppa4']
-        # $ grep dppa4 resources/mgi.gpa.test.gpa
-        # MGI	MGI:2141165	acts_upstream_of_or_within	GO:0010628	MGI:MGI:5913752|PMID:20699224	ECO:0000315	MGI:MGI:6108355		20190124	MGI	has_regulation_target(Dppa2, dppa4, piwil2),occurs_in(EMAPA:16036)
-        ext_term = ext_parts[1].split(")")[0]  # i["filler"]
-    return relation, ext_term
-
-def filter_evi_codes(annots):
-    filtered = []
-    for a in annots:
-        if ecomap.ecoclass_to_coderef(a[5])[0] in acceptable_evidence_codes:
-            filtered.append(a)
-    return filtered
-
-def filter_has_extension(annots):
-    filtered = []
-    for a in annots:
-        if a[10] != "":
-            filtered.append(a)
-    return filtered
-
-def filter_rule_validate_lines(annots, assoc_filter):
-    filtered = []
-    # Converts split GPAD line into ontobio assoc obj for passing into standard FilterRule validation
-    gpad_parser = GpadParser()
-    for a in annots:
-        parse_result = gpad_parser.parse_line("\t".join(a))
-        if len(parse_result.associations) > 0:
-            # Right now, GpadParser only returns 0 or 1 associations
-            assoc = parse_result.associations[0]
-            assoc = extract_properties(assoc)
-            if "annotation_properties" in assoc:
-                a.append(assoc["annotation_properties"])
-            if assoc_filter.validate_line(assoc):
-                filtered.append(a)
-    return filtered
-
-def sum_combos(extension_counts, combo_list):
-    cur_sum = 0
-    for c in combo_list:
-        if c in extension_counts:
-            cur_sum += extension_counts[c]
-    return cur_sum
-
-def violates_combo_rule(extension_counts, list_of_combo_lists, max_allowed):
-    for combo_list in list_of_combo_lists:
-        if sum_combos(extension_counts, combo_list) > max_allowed:
-            return True
-    return False
-
-def is_nested_occurs_in_grouping(extensions_list):
-    # When annotation extensions are comma delimited and are from GO_CC, CL and an anatomy ontology
-    # then the model should indicate GO_CC -> part_of->CL->part_of->anatomy.
-    # Filter occurs_in extensions
-    # Gather terms, arrange
-    # When would multiple occurs_in's cause this to fail? Multiple occurs_in(GO:CC), unless CC's are related? Examples?
-    #
-    do_stuff = 1
-
-class ExtensionValidityCheckResult:
-    def __init__(self, is_valid, reason=None, offending_extension=None):
-        self.is_valid = is_valid
-        self.reason = reason
-        self.offending_extension = offending_extension
-
-    def __nonzero__(self):
-        return self.is_valid
-
-
-class ExtensionsMapper:
-    def __init__(self, constraints_yaml, ontology=None):
-        self.ontology = self.setup_ontologies(ontology)
-        self.go_aspector = CachedGoAspector("resources/aspect_lookup.json", go_ontology=go_ontology)
-
-        self.ext_relation_valid_patterns = []
-        # Parsing formatted version of David's spreadsheet extensions_list_w_provided_bys_updated_20190328 into more rules
-        with open(constraints_yaml) as rf:
-            extension_constraints = yaml.load(rf)
-            # rel_reader = csv.reader(rf, delimiter="\t")
-            # next(rel_reader)  # Skip header row
-            for ec in extension_constraints:
-                relation = ec["relation"]
-                namespaces = ec["namespaces"]
-                root_terms = ec["primary_root_terms"]
-                max_allowed = ec.get("cardinality")
-                # if len(rl[3]) > 0:
-                #     max_allowed = int(rl[3])
-                ext_pattern = ExtRelationValidPattern(relation, namespaces, root_terms, max_allowed=max_allowed)
-                self.ext_relation_valid_patterns.append(ext_pattern)
-
-    @staticmethod
-    def setup_ontologies(ontology=None):
-        if ontology is None:
-            return OntologyFactory().create("go")
-        return ontology
-
-    def is_valid_ext_pattern(self, primary_term, ext_relation, ext_namespace, occurence_count):
-        for p in self.ext_relation_valid_patterns:
-            if p.ext_relation == ext_relation and ext_namespace in p.ext_namespaces and \
-                    not (p.max_allowed and occurence_count > p.max_allowed):
-                # Check ancestors of query term - might take a while
-                all_ancestors = GO_ONTOLOGY.ancestors(primary_term, reflexive=True)
-                is_a_ancestors = GO_ONTOLOGY.subontology(all_ancestors).ancestors(primary_term,
-                                                                                  relations=["subClassOf"],
-                                                                                  reflexive=True)
-                # Cache IS_A_ANCESTORS[primary_term] = is_a_ancestors ?
-                # Valid pattern root term is an is_a ancestor of GPAD primary term
-                for t in p.primary_term_roots:
-                    if t in is_a_ancestors:
-                        return True
-        return False
-
-    def following_rules(self, extension_list, aspect, term):
-        ext_counts = {}
-        for e in extension_list:
-            if e in ext_counts:
-                ext_counts[e] += 1
-            else:
-                ext_counts[e] = 1
-
-        for ek in ext_counts.keys():
-            if not self.is_valid_ext_pattern(term, *get_relation_and_term(ek), ext_counts[ek]):
-                # return False
-                # Pack this into Error class
-                return ExtensionValidityCheckResult(is_valid=False, offending_extension=ek)
-
-        combos_to_check_for = [
-            ["occurs_in(UBERON)", "occurs_in(EMAPA)"]
-        ]
-        # Only allow 1 occurrence each of combos_to_check_for
-        if violates_combo_rule(ext_counts, combos_to_check_for, 1):
-            # return False
-            return ExtensionValidityCheckResult(is_valid=False, reason="violates_combo_rule")
-
-        # return True
-        return ExtensionValidityCheckResult(is_valid=True)
-
-    def filter_no_rules_broken(self, annots):
-        filtered = []
-        for a in annots:
-            if not self.following_rules(a):
-                filtered.append(a)
-        return filtered
-
-    def translate_relation_to_ro(self, relation_label):
-        for n in self.ontology.nodes():
-            node_label = self.ontology.label(n)
-            if node_label == relation_label.replace("_", " "):
-                return n
-
-    def extensions_list(self, intersection_extensions, row_cols=[]):
-        ext_list = []
-        # Assuming these extensions are already separated by comma
-        intersection_extensions = self.dedupe_extensions(intersection_extensions)
-        for i in intersection_extensions:
-            relation, ext_term = i['property'], i['filler']
-            term_prefix = ext_term.split(":")[0]
-            # is prefix for ontology or mod?
-            if term_prefix in ontology_prefixes:
-                if term_prefix == "GO":
-                    # need to find aspect of GO term
-                    go_aspect = self.go_aspector.go_aspect(ext_term)
-                    if go_aspect is not None:
-                        ont = "GO:" + go_aspect
-                    else:
-                        logger.warning("No aspect found for term: {}".format(ext_term))
-                        ont = "GO"
-                else:
-                    ont = term_prefix
-                term_prefix = ont
-            else:
-                term_prefix = "geneID"
-            ext_list.append("{}({})".format(relation, term_prefix))
-            # Track stats - make sure to get GO aspect from ont var
-            if relation not in DISTINCT_EXTENSIONS:
-                DISTINCT_EXTENSIONS[relation] = {}
-            if term_prefix not in DISTINCT_EXTENSIONS[relation]:
-                # should be checking for duplicate rows
-                DISTINCT_EXTENSIONS[relation][term_prefix] = []
-            # else:
-            #     DISTINCT_EXTENSIONS[relation][term_prefix] += 1
-            # if row_cols not in DISTINCT_EXTENSIONS[relation][term_prefix]:
-            DISTINCT_EXTENSIONS[relation][term_prefix].append(row_cols)
-        # ordering of extension relations may be inconsistent so sort
-        return sorted(ext_list, key=str.lower)
-
-    # Do we need this? Does this work? WE TOTALLY NEED THIS!!! At least maybe as an entry point to find what bucket an
-    # annotation falls in.
-    # HANDLE ASSOCIATION OBJECT_EXTENSIONS STRUCTURE
-    def annot_following_rules(self, annot, aspect, term):
-        ext_list = []
-        #TODO for a in annot.split("|"):
-        # extensions = annot.split(",")
-        # annot["object_extensions"]
-        # ext_list = self.extensions_list(extensions)
-        ext_list = self.extensions_list(annot)
-        # Standardize key - ex:
-        #   ["part_of(GO:aspect),part_of(UBERON:)"]
-        return self.following_rules(ext_list, aspect, term)
-
-
-d = [
-    "GeneDB_tsetse",
-    "gonuts",
-    "reactome",
-    "isoform",
-    "goa_pdb",
-    "goa_uniprot_all.gaf",
-]
-
-
-if __name__ == "__main__":
-    args = parser.parse_args()
-
-    def get_filter_name(gpad_fname):
-        if args.mod:
-            filter_name = args.mod
-        else:
-            # Try matching filename to filter rule (e.g. wb.gpad to 'WB' to WBFilterRule)
-            filter_name = os.path.basename(os.path.splitext(gpad_fname)[0]).upper()
-        return filter_name
-
-    bad_extensions = []
-
-    # filenames = []
-    # Use dict for holding different FilterRules for each file
-    filenames = {}
-    # data = []
-    # fname = "/Users/ebertdu/go/go-pombase/gene_association.pombase-06-2018"
-    # data = GafParser().parse(fname, skipheader=True)
-    if args.filename is not None:
-        # filenames.append(args.filename)
-        filter_name = get_filter_name(args.filename)
-        filter_rule = get_filter_rule(filter_name)
-        filenames[args.filename] = filter_rule
-        # data = GafParser().parse(args.filename, skipheader=True)
-    elif args.dir is not None:
-        for fname in os.listdir(args.dir):
-            # print("Loading file:", fname)
-            nono_in_fname = False
-            for nono in d:
-                if nono in fname:
-                    nono_in_fname = True
-            if fname.endswith(".tsv") or nono_in_fname:
-                continue
-            # filenames.append(args.dir + fname)
-            filter_name = get_filter_name(fname)
-            filenames[args.dir + fname] = get_filter_rule(filter_name)
-            # data = data + GafParser().parse(fname, skipheader=True)
-
-    # all_dict = {}
-    extensions_mapper = ExtensionsMapper(constraints_yaml=args.constraints_yaml)
-    gpad_parser = GpadParser()
-    print("Creating extension dictionary...")
-    ext_dict = {}
-    ext_dict['F'] = {}
-    ext_dict['P'] = {}
-    ext_dict['C'] = {}
-    for fname in filenames:
-        with open(fname) as f:
-            data = []
-            print("Loading file:", fname)
-            for l in f.readlines():
-                if not l.startswith("!"):
-                    parts = l.split("\t")
-                    # if parts[15] != "" and parts[6] in acceptable_evidence_codes:
-                    data.append(parts)
-            print("# of GPAD lines in file:", len(data))
-            data = filter_has_extension(data)
-            print("# of GPAD lines having extensions:", len(data))
-            # filter_rule = get_filter_rule(args.mod)
-            filter_rule = filenames[fname]
-            assoc_filter = AssocFilter(filter_rule)
-            data = filter_rule_validate_lines(data, assoc_filter)
-            print("Total GPAD count after applying {}: {}".format(filter_rule.__class__.__name__, len(data)))
-
-            for g in data:
-                go_term = g[3]
-                aspect = extensions_mapper.go_aspector.go_aspect(go_term)
-                split_line = SplitLine(line="\t".join(g), values=g, taxon="")  # Needed for ontobio error handling
-                ontobio_extensions = gpad_parser._parse_full_extension_expression(g[10], split_line)
-                ontobio_extensions = extensions_mapper.dedupe_extensions(ontobio_extensions)
-                # ontobio_pattern = {
-                #     'union_of': [
-                #         {
-                #             'intersection_of': [
-                #                 {'property': 'part_of', 'filler': 'CL:0000678'},
-                #                 {'property': 'part_of', 'filler': 'EMAPA:16525'}
-                #             ]
-                #         },
-                #         {
-                #               'intersection_of': [
-                #                   {'property': 'part_of', 'filler': 'CL:0000678'},
-                #                   {'property': 'part_of', 'filler': 'EMAPA:16525'}
-                #               ]
-                #         }
-                #     ]
-                # }
-                for onto_ext in ontobio_extensions:
-                    ext_list = extensions_mapper.extensions_list(onto_ext['intersection_of'], g)
-                    check_ext_result = extensions_mapper.following_rules(ext_list, aspect, go_term)
-                    if not check_ext_result.is_valid:
-                        ext_key = ",".join(ext_list)
-                        offending_extension = check_ext_result.offending_extension
-                        if offending_extension is None:
-                            offending_extension = check_ext_result.reason
-                        bad_extensions.append([":".join(g[0:2]), go_term, GO_ONTOLOGY.label(go_term), offending_extension, g[10]])
-                        if ext_key not in ext_dict[aspect]:
-                            ext_dict[aspect][ext_key] = [g]
-                        elif g not in ext_dict[aspect][ext_key]:
-                            ext_dict[aspect][ext_key].append(g)
-
-            for aspect in ['F','P','C']:
-                max_count = 0
-                top_k = None
-                example_v = None
-                for k, v in ext_dict[aspect].items():
-                    if len(v) > max_count:
-                        max_count = len(v)
-                        # print(max_count)
-                        top_k = k
-                        example_v = v[0]
-
-    def assigner_count(annots, assign):
-        count = 0
-        for a in annots:
-            if a[9] == assign:
-                count += 1
-        return count
-
-    cols = ['Aspect', 'Total count', 'Extension']
-    all_assigners = []
-    for aspect in ['F', 'P', 'C']:
-        for k, v in ext_dict[aspect].items():
-            for a in v:
-                if a[9] not in all_assigners:
-                    all_assigners.append(a[9])
-                    cols.append(a[9])
-
-    def parse_pattern_sourcefile(sourcefile):
-        parsed_patterns = []
-        with open(sourcefile) as sf:
-            for pl in sf.readlines():
-                pl = pl.rstrip()
-                parsed_patterns.append(pl)
-        return parsed_patterns
-
-    class GpadWriter:
-        def __init__(self, gpad_file):
-            self.gpad_file = gpad_file
-            self.writer = csv.writer(pattern_gpad, delimiter="\t")
-
-        def writerow(self, row):
-            self.writer.writerow(row)
-
-    class WriterCollection:
-        def __init__(self):
-            self.writers = {}
-            self.rows = {}
-
-        def set_writer(self, writer, writer_name):
-            self.writers[writer_name] = writer
-            self.rows[writer_name] = []
-
-
-    patterns = []
-    writers = WriterCollection()
-    if args.pattern or args.pattern_sourcefile:
-        if args.pattern_sourcefile:
-            patterns = parse_pattern_sourcefile(args.pattern_sourcefile)
-            # writers = {}
-            for patt in patterns:
-                pattern_outfile = "{}.gpad".format(patt)
-                pattern_gpad = open(pattern_outfile, "w+")
-                pattern_gpad_writer = GpadWriter(pattern_gpad)
-                writers.set_writer(pattern_gpad_writer, patt)
-        else:
-            patterns.append(args.pattern)
-            pattern_outfile = "{}.gpad".format(args.pattern)
-            if args.pattern_outfile:
-                pattern_outfile = args.pattern_outfile
-            pattern_gpad = open(pattern_outfile, "w+")
-            pattern_gpad_writer = GpadWriter(pattern_gpad)
-            writers.set_writer(pattern_gpad_writer, args.pattern)
-    out_file = "all.tsv"
-    if args.out_file:
-        out_file = args.out_file
-    with open(out_file, 'w') as f:
-        writer = csv.writer(f, delimiter='\t')
-        writer.writerow(cols)
-        for aspect in ['F', 'P', 'C']:
-            max_count = 0
-            top_k = None
-            example_v = None
-            for k, v in ext_dict[aspect].items():
-                row_to_write = [aspect, len(v), k]
-                if len(patterns) > 0:
-                    for patt in patterns:
-                        # if k == patt:
-                        if patt in k:
-                            for a in v:
-                                writers.rows[patt].append(a[0:len(a)-1])
-                                # writers.writers[patt].writerow(a[0:len(a)-1])
-                for assigner in all_assigners:
-                    a_count = assigner_count(v, assigner)
-                    row_to_write.append(a_count)
-                writer.writerow(row_to_write)
-    for writer_name in writers.writers:
-        for row in writers.rows[writer_name]:
-            writers.writers[writer_name].writerow(row)
-        gpad_file = writers.writers[writer_name].gpad_file
-        print("Offending GPAD lines written to:", gpad_file.name)
-        gpad_file.close()
-
-    wanted_gafs = []
-    leftovers_out_file = "leftovers.gpad"
-    if args.leftovers_out_file:
-        leftovers_out_file = args.leftovers_out_file
-    with open(leftovers_out_file, 'w') as wf:
-        for k in ext_dict['P'].keys():
-            if 'has_regulation_target' in k:
-                wanted_gafs = wanted_gafs + ext_dict['P'][k]
-                for g in ext_dict['P'][k]:
-                    wf.write("\t".join(g))
-
-    if args.extensions_list:
-        with open("distinct_extensions.txt", "w+") as de_f:
-            de_writer = csv.writer(de_f, delimiter="\t")
-            de_writer.writerow(["pattern", "namespaces", "usage count", "line count", "RO code"])
-            for rel in DISTINCT_EXTENSIONS:
-                distinct_lines = []
-                usage_count = 0
-                ro_rel = extensions_mapper.translate_relation_to_ro(rel)
-                for prefix in DISTINCT_EXTENSIONS[rel]:
-                    usage_count += len(DISTINCT_EXTENSIONS[rel][prefix])
-                    # distinct_lines = []
-                    for line in DISTINCT_EXTENSIONS[rel][prefix]:
-                        if line not in distinct_lines:
-                            distinct_lines.append(line)
-                    # line_count = len(distinct_lines)
-                    # de_writer.writerow(["{}({})".format(rel, prefix), len(DISTINCT_EXTENSIONS[rel][prefix]), line_count])
-                line_count = len(distinct_lines)
-                de_writer.writerow([rel, ",".join(DISTINCT_EXTENSIONS[rel].keys()), usage_count, line_count, ro_rel])
-
-    bad_extensions_fname = date_fname("bad_extensions.tsv")
-    with open(bad_extensions_fname, "w+") as bef:
-        be_writer = csv.writer(bef, delimiter="\t")
-        be_writer.writerow(["DB Object", "Primary term", "Label", "Offending extension", "Full extensions"])
-        for be in bad_extensions:
-            be_writer.writerow(be)
\ No newline at end of file

From d30d7ea61dd88aad3ca35f8b4383f0d7c4a7afd6 Mon Sep 17 00:00:00 2001
From: dustine32 <dustine32@gmail.com>
Date: Tue, 23 Mar 2021 12:03:26 -0700
Subject: [PATCH 3/3] Reverting validate cmd changes for parser report

---
 bin/validate.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/bin/validate.py b/bin/validate.py
index fabbdeee..1c51541d 100644
--- a/bin/validate.py
+++ b/bin/validate.py
@@ -566,11 +566,9 @@ def gpad2gocams(ctx, gpad_path, gpi_path, target, ontology, ttl):
     gpad_basename = os.path.basename(gpad_path)
     gpad_basename_root, gpad_ext = os.path.splitext(gpad_basename)
     output_basename = "{}.nq".format(gpad_basename_root)
-    parse_report_basename = "{}.parser.report".format(gpad_basename_root)
-    model_report_basename = "{}.gocamgen.report".format(gpad_basename_root)
+    report_basename = "{}.gocamgen.report".format(gpad_basename_root)
     output_path = os.path.join(absolute_target, output_basename)
-    parse_report_path = os.path.join(absolute_target, parse_report_basename)
-    model_report_path = os.path.join(absolute_target, model_report_basename)
+    report_path = os.path.join(absolute_target, report_basename)
 
     builder = GoCamBuilder(parser_config=parser_config)
 
@@ -582,10 +580,7 @@ def gpad2gocams(ctx, gpad_path, gpi_path, target, ontology, ttl):
     if not ttl:
         builder.write_out_store_to_nquads(filepath=output_path)
 
-    # Reports
-    builder.write_report(report_filepath=model_report_path)
-    with open(parse_report_path, 'w') as prp:
-        prp.write(extractor.gpad_parser.report.to_markdown())
+    builder.write_report(report_filepath=report_path)
 
 
 @cli.command()