From ec26ec7f53233ca0a6d19a840780ec6a66decf4d Mon Sep 17 00:00:00 2001 From: Rob Court Date: Thu, 28 May 2026 13:51:35 +0100 Subject: [PATCH 1/5] extend the Cypher + preview_columns for three queries # Extends the v1.10.1 channel/template/technique pattern. Adds: # - type = pipe-joined parent class labels (n2 -[:INSTANCEOF]-> Class) # matches v2 prod's `Type` column from SOLR's `types` collection # - template = `[symbol](short_form)` markdown of the alignment template # - technique = imaging technique label (channel -[:is_specified_output_of]-> Class) --- src/vfbquery/vfb_queries.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/src/vfbquery/vfb_queries.py b/src/vfbquery/vfb_queries.py index 9806a33..dfdccef 100644 --- a/src/vfbquery/vfb_queries.py +++ b/src/vfbquery/vfb_queries.py @@ -2453,19 +2453,32 @@ def get_similar_neurons(neuron, similarity_score='NBLAST_score', return_datafram count_df = pd.DataFrame.from_records(get_dict_cursor()(count_results)) total_count = count_df['total_count'][0] if not count_df.empty else 0 - main_query = f"""MATCH (c1:Class)<-[:INSTANCEOF]-(n1)-[r:has_similar_morphology_to]-(n2)-[:INSTANCEOF]->(c2:Class) + # Extends the v1.10.1 channel/template/technique pattern. Adds: + # - type = pipe-joined parent class labels (n2 -[:INSTANCEOF]-> Class) + # matches v2 prod's `Type` column from SOLR's `types` collection + # - template = `[symbol](short_form)` markdown of the alignment template + # - technique = imaging technique label (channel -[:is_specified_output_of]-> Class) + main_query = f"""MATCH (c1:Class)<-[:INSTANCEOF]-(n1)-[r:has_similar_morphology_to]-(n2)-[:INSTANCEOF]->(c2:Class) WHERE n1.short_form = '{neuron}' and exists(r.{similarity_score}) WITH c1, n1, r, n2, c2 OPTIONAL MATCH (n2)-[rx:database_cross_reference]->(site:Site) WHERE site.is_data_source WITH n2, r, c2, rx, site - OPTIONAL MATCH (n2)<-[:depicts]-(:Individual)-[ri:in_register_with]->(:Template)-[:depicts]->(templ:Template) + OPTIONAL MATCH (n2)<-[:depicts]-(channel:Individual)-[ri:in_register_with]->(:Template)-[:depicts]->(templ:Template) + OPTIONAL MATCH (channel)-[:is_specified_output_of]->(technique:Class) + WITH n2, r, c2, rx, site, channel, ri, templ, technique + OPTIONAL MATCH (n2)-[:INSTANCEOF]->(typ:Class) + WITH n2, r, rx, site, channel, ri, templ, technique, + apoc.text.join(collect(DISTINCT coalesce(typ.label, '')), '; ') AS type RETURN DISTINCT n2.short_form as id, - apoc.text.format("[%s](%s)", [n2.label, n2.short_form]) AS name, + apoc.text.format("[%s](%s)", [n2.label, n2.short_form]) AS name, r.{similarity_score}[0] AS score, - apoc.text.join(n2.uniqueFacets, '|') AS tags, + apoc.text.join(coalesce(n2.uniqueFacets, []), '|') AS tags, + type, REPLACE(apoc.text.format("[%s](%s)",[COALESCE(site.symbol[0],site.label),site.short_form]), '[null](null)', '') AS source, REPLACE(apoc.text.format("[%s](%s)",[rx.accession[0], (site.link_base[0] + rx.accession[0])]), '[null](null)', '') AS source_id, + REPLACE(apoc.text.format("[%s](%s)",[COALESCE(templ.symbol[0],templ.label),templ.short_form]), '[null](null)', '') AS template, + coalesce(technique.label, '') AS technique, REPLACE(apoc.text.format("[![%s](%s '%s')](%s)",[COALESCE(n2.symbol[0],n2.label) + " aligned to " + COALESCE(templ.symbol[0],templ.label), REPLACE(COALESCE(ri.thumbnail[0],""),"thumbnailT.png","thumbnail.png"), COALESCE(n2.symbol[0],n2.label) + " aligned to " + COALESCE(templ.symbol[0],templ.label), templ.short_form + "," + n2.short_form]), "[![null]( 'null')](null)", "") as thumbnail ORDER BY score DESC""" From c59b642252b8b384fc50a224c0b23be28f5cde71 Mon Sep 17 00:00:00 2001 From: Robbie Court Date: Thu, 28 May 2026 13:16:28 +0000 Subject: [PATCH 2/5] Address Copilot review on PR #42 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two issues raised: 1. `type` delimiter mismatch — doc says "pipe-joined" but Cypher joined with "; ". Switch to "|" and filter empties so an INSTANCEOF-less row doesn't produce `; ` artefacts. Uses `[l IN collect(DISTINCT typ.label) WHERE l IS NOT NULL AND l <> '']` to drop nulls/empties before the join. 2. Downstream wiring missed new columns. `template` is a markdown link `[symbol](short_form)` so it needs encode_markdown_links the same as name/source/source_id/thumbnail. The non-DataFrame response builder only listed the old keys, silently dropping type/template/technique when return_dataframe=False. Added the three columns to both the headers dict (Type/Template/Imaging Technique, plain text + markdown) and the row key extraction so the API surface matches the Cypher. --- src/vfbquery/vfb_queries.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/src/vfbquery/vfb_queries.py b/src/vfbquery/vfb_queries.py index dfdccef..f152657 100644 --- a/src/vfbquery/vfb_queries.py +++ b/src/vfbquery/vfb_queries.py @@ -2469,7 +2469,7 @@ def get_similar_neurons(neuron, similarity_score='NBLAST_score', return_datafram WITH n2, r, c2, rx, site, channel, ri, templ, technique OPTIONAL MATCH (n2)-[:INSTANCEOF]->(typ:Class) WITH n2, r, rx, site, channel, ri, templ, technique, - apoc.text.join(collect(DISTINCT coalesce(typ.label, '')), '; ') AS type + apoc.text.join([l IN collect(DISTINCT typ.label) WHERE l IS NOT NULL AND l <> ''], '|') AS type RETURN DISTINCT n2.short_form as id, apoc.text.format("[%s](%s)", [n2.label, n2.short_form]) AS name, r.{similarity_score}[0] AS score, @@ -2491,9 +2491,13 @@ def get_similar_neurons(neuron, similarity_score='NBLAST_score', return_datafram # Convert the results to a DataFrame df = pd.DataFrame.from_records(get_dict_cursor()(results)) - columns_to_encode = ['name', 'source', 'source_id', 'thumbnail'] + # template is a `[symbol](short_form)` markdown link — must be encoded the + # same way as name/source/source_id/thumbnail so the V2 frontend's link + # parser renders it consistently. type/technique are plain text and + # don't need encoding. + columns_to_encode = ['name', 'source', 'source_id', 'template', 'thumbnail'] df = encode_markdown_links(df, columns_to_encode) - + if return_dataframe: return df else: @@ -2503,8 +2507,11 @@ def get_similar_neurons(neuron, similarity_score='NBLAST_score', return_datafram "score": {"title": "Score", "type": "numeric", "order": 1, "sort": {0: "Desc"}}, "name": {"title": "Name", "type": "markdown", "order": 1, "sort": {1: "Asc"}}, "tags": {"title": "Tags", "type": "tags", "order": 2}, - "source": {"title": "Source", "type": "metadata", "order": 3}, - "source_id": {"title": "Source ID", "type": "metadata", "order": 4}, + "type": {"title": "Type", "type": "text", "order": 3}, + "source": {"title": "Source", "type": "metadata", "order": 4}, + "source_id": {"title": "Source ID", "type": "metadata", "order": 5}, + "template": {"title": "Template", "type": "markdown", "order": 6}, + "technique": {"title": "Imaging Technique", "type": "text", "order": 7}, "thumbnail": {"title": "Thumbnail", "type": "markdown", "order": 9} }, "rows": [ @@ -2515,8 +2522,11 @@ def get_similar_neurons(neuron, similarity_score='NBLAST_score', return_datafram "name", "score", "tags", + "type", "source", "source_id", + "template", + "technique", "thumbnail" ] } From 66b8dc3638623535702e442b7beb831dfd54b955 Mon Sep 17 00:00:00 2001 From: Rob Court Date: Thu, 28 May 2026 14:50:12 +0100 Subject: [PATCH 3/5] Bump SimilarMorphologyTo preview_columns to include new fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewer flagged that SimilarMorphologyTo_to_schema's preview_columns still listed only ["id","score","name","tags","thumbnail"] — so the term-info preview wouldn't surface the new type/template/technique columns even though /run_query now returns them. Updated to ["id","name","score","tags","type","template","technique", "thumbnail"], matching the v1.10.1 SimilarMorphologyToPartOf* preview shape plus the new type column this PR adds. source/source_id are deliberately omitted from the preview — they're noisy and only meaningful in the full table. --- src/vfbquery/vfb_queries.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/vfbquery/vfb_queries.py b/src/vfbquery/vfb_queries.py index f152657..af09286 100644 --- a/src/vfbquery/vfb_queries.py +++ b/src/vfbquery/vfb_queries.py @@ -1262,7 +1262,12 @@ def SimilarMorphologyTo_to_schema(name, take_default): "default": take_default, } preview = 5 - preview_columns = ["id","score","name","tags","thumbnail"] + # Match the v1.10.1 SimilarMorphologyTo* preview shape and add the new + # type column this PR exposes — keeps term-info previews in sync with + # the full /run_query response. source/source_id are intentionally + # omitted; they're noisy in compact previews and only meaningful when + # the user opens the full table. + preview_columns = ["id", "name", "score", "tags", "type", "template", "technique", "thumbnail"] return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns) From 2ac1787f3fd50a113dd6c669ff644b5057502b6d Mon Sep 17 00:00:00 2001 From: Robbie Court Date: Thu, 28 May 2026 13:56:40 +0000 Subject: [PATCH 4/5] Restructure get_similar_neurons with CALL subqueries MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewer flagged a cartesian-product risk in the previous version: chained OPTIONAL MATCHes for (xref, site), (channel, ri, templ), (technique), and (typ) compounded into N × M × K rows per n2 before RETURN DISTINCT collapsed them. On densely-typed or multi-aligned neurons this is wasteful, and `type` was being aggregated within the (channel, technique) grouping rather than once per n2. Refactored each optional branch into a CALL subquery scoped to n2: CALL { WITH n2 OPTIONAL MATCH (n2)-[:INSTANCEOF]->(typ:Class) RETURN apoc.text.join([l IN collect(...) WHERE ...], '|') AS type } CALL { WITH n2 OPTIONAL MATCH (n2)-[rx:database_cross_reference]->(site:Site) WHERE site.is_data_source WITH rx, site LIMIT 1 RETURN rx, site } CALL { WITH n2 OPTIONAL MATCH (n2)<-[:depicts]-(channel)-[ri:in_register_with]->... OPTIONAL MATCH (channel)-[:is_specified_output_of]->(technique:Class) WITH ri, templ, technique LIMIT 1 RETURN ri, templ, technique } `type` aggregates internally (always 1 row, no LIMIT needed). Cross-ref and alignment pick a single representative — matches what the v2 row needs and what prod's `apoc.cypher.run('... LIMIT 5'/'10')` pattern already does inside the XMI. `WITH DISTINCT r, n2` upfront drops the c1/c2 cartesian from the INSTANCEOF anchors. RETURN no longer needs DISTINCT — the row key is guaranteed unique by construction. --- src/vfbquery/vfb_queries.py | 42 ++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 12 deletions(-) diff --git a/src/vfbquery/vfb_queries.py b/src/vfbquery/vfb_queries.py index af09286..0e8eba8 100644 --- a/src/vfbquery/vfb_queries.py +++ b/src/vfbquery/vfb_queries.py @@ -2463,19 +2463,37 @@ def get_similar_neurons(neuron, similarity_score='NBLAST_score', return_datafram # matches v2 prod's `Type` column from SOLR's `types` collection # - template = `[symbol](short_form)` markdown of the alignment template # - technique = imaging technique label (channel -[:is_specified_output_of]-> Class) - main_query = f"""MATCH (c1:Class)<-[:INSTANCEOF]-(n1)-[r:has_similar_morphology_to]-(n2)-[:INSTANCEOF]->(c2:Class) + # + # Each OPTIONAL branch is wrapped in a CALL subquery so the outer query + # carries one row per n2 throughout. Without this, an n2 with N + # cross-references × M alignments × K types would produce N×M×K rows + # that DISTINCT then collapses at the end — wasteful, especially on + # densely-typed neurons. Each subquery either aggregates (for `type`) + # or LIMIT 1s (for the single representative cross-ref / alignment + # the V2 row needs), so n2 stays the row key end-to-end. + main_query = f"""MATCH (c1:Class)<-[:INSTANCEOF]-(n1:Individual)-[r:has_similar_morphology_to]-(n2:Individual)-[:INSTANCEOF]->(c2:Class) WHERE n1.short_form = '{neuron}' and exists(r.{similarity_score}) - WITH c1, n1, r, n2, c2 - OPTIONAL MATCH (n2)-[rx:database_cross_reference]->(site:Site) - WHERE site.is_data_source - WITH n2, r, c2, rx, site - OPTIONAL MATCH (n2)<-[:depicts]-(channel:Individual)-[ri:in_register_with]->(:Template)-[:depicts]->(templ:Template) - OPTIONAL MATCH (channel)-[:is_specified_output_of]->(technique:Class) - WITH n2, r, c2, rx, site, channel, ri, templ, technique - OPTIONAL MATCH (n2)-[:INSTANCEOF]->(typ:Class) - WITH n2, r, rx, site, channel, ri, templ, technique, - apoc.text.join([l IN collect(DISTINCT typ.label) WHERE l IS NOT NULL AND l <> ''], '|') AS type - RETURN DISTINCT n2.short_form as id, + WITH DISTINCT r, n2 + CALL {{ + WITH n2 + OPTIONAL MATCH (n2)-[:INSTANCEOF]->(typ:Class) + RETURN apoc.text.join([l IN collect(DISTINCT typ.label) WHERE l IS NOT NULL AND l <> ''], '|') AS type + }} + CALL {{ + WITH n2 + OPTIONAL MATCH (n2)-[rx:database_cross_reference]->(site:Site) + WHERE site.is_data_source + WITH rx, site LIMIT 1 + RETURN rx, site + }} + CALL {{ + WITH n2 + OPTIONAL MATCH (n2)<-[:depicts]-(channel:Individual)-[ri:in_register_with]->(:Template)-[:depicts]->(templ:Template) + OPTIONAL MATCH (channel)-[:is_specified_output_of]->(technique:Class) + WITH ri, templ, technique LIMIT 1 + RETURN ri, templ, technique + }} + RETURN n2.short_form as id, apoc.text.format("[%s](%s)", [n2.label, n2.short_form]) AS name, r.{similarity_score}[0] AS score, apoc.text.join(coalesce(n2.uniqueFacets, []), '|') AS tags, From bc44dcd6cd0c01540c9383c1a3ab4a075bb808f4 Mon Sep 17 00:00:00 2001 From: Rob Court Date: Thu, 28 May 2026 17:44:24 +0100 Subject: [PATCH 5/5] Potential fix for pull request finding Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com> --- src/vfbquery/vfb_queries.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/vfbquery/vfb_queries.py b/src/vfbquery/vfb_queries.py index 0e8eba8..4d28762 100644 --- a/src/vfbquery/vfb_queries.py +++ b/src/vfbquery/vfb_queries.py @@ -1266,8 +1266,10 @@ def SimilarMorphologyTo_to_schema(name, take_default): # type column this PR exposes — keeps term-info previews in sync with # the full /run_query response. source/source_id are intentionally # omitted; they're noisy in compact previews and only meaningful when - # the user opens the full table. - preview_columns = ["id", "name", "score", "tags", "type", "template", "technique", "thumbnail"] + # the user opens the full table. Keep score before name so preview + # sorting continues to default to score-descending under the current + # header-order-based preview sort selection. + preview_columns = ["id", "score", "name", "tags", "type", "template", "technique", "thumbnail"] return Query(query=query, label=label, function=function, takes=takes, preview=preview, preview_columns=preview_columns)