From 9a5d41f842af78d95e4cc35af68f299426166cbd Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Mon, 27 May 2024 14:16:58 -0700 Subject: [PATCH 01/28] Fix MolecularProfile._disambiguate() --- src/civic/profile.js | 24 +++++++++++++++--------- test/civic.profile.test.js | 19 +++++++++++++++++-- 2 files changed, 32 insertions(+), 11 deletions(-) diff --git a/src/civic/profile.js b/src/civic/profile.js index 2530a99..1a91037 100644 --- a/src/civic/profile.js +++ b/src/civic/profile.js @@ -60,23 +60,29 @@ const MolecularProfile = (molecularProfile) => ({ }, /* Desambiguation of variants with implicit 'or' in the name */ _disambiguate() { - // Split ambiguous variants - const temp = []; + const newConditions = []; + + // For each set of conditions this.conditions.forEach((condition) => { + const temp = []; condition.forEach((variant) => { temp.push( + // Split ambiguous variants into an array of 1 or more variant(s) this._split(variant), ); }); - }); - let newCondition; + // Combine variations into new condition + let newConditionSet; - // Combine variations into new condition - for (let i = 0; i < temp.length; i++) { - newCondition = this._combine({ arr1: newCondition || [[]], arr2: temp[i] }); - } - this.conditions = newCondition; + for (let i = 0; i < temp.length; i++) { + newConditionSet = this._combine({ arr1: newConditionSet || [[]], arr2: temp[i] }); + } + newConditions.push(...newConditionSet); + }); + + // Replace old conditions by new ones + this.conditions = [...newConditions]; return this; }, /* Returns index of closing parenthesis for end of block */ diff --git a/test/civic.profile.test.js b/test/civic.profile.test.js index 5f70ac3..f02f2df 100644 --- a/test/civic.profile.test.js +++ b/test/civic.profile.test.js @@ -32,7 +32,7 @@ describe('MolecularProfile._compile()', () => { }); describe('MolecularProfile._disambiguate()', () => { - test('disambiguate conditions', () => { + test('disambiguate conditions in AND statements', () => { const Mp = MolecularProfile(); Mp.conditions = [ [{ id: 8, name: 'X123M/N' }, { id: 9, name: 'X456O/P' }, { id: 10, name: 'X456Q' }], @@ -46,6 +46,22 @@ describe('MolecularProfile._disambiguate()', () => { ], ); }); + + test('disambiguate conditions in OR statements', () => { + const Mp = MolecularProfile(); + Mp.conditions = [ + [{ id: 8, name: 'X123M/N' }], + [{ id: 9, name: 'X456O/P' }], + ]; + expect(Mp._disambiguate().conditions).toEqual( + [ + [{ id: 8, name: 'X123M' }], + [{ id: 8, name: 'X123N' }], + [{ id: 9, name: 'X456O' }], + [{ id: 9, name: 'X456P' }], + ], + ); + }); }); describe('MolecularProfile._end()', () => { @@ -140,7 +156,6 @@ describe('MolecularProfile._parse()', () => { ); }); - describe('MolecularProfile._split()', () => { test.each([ ['Q157P/R', [[{ name: 'Q157P' }], [{ name: 'Q157R' }]]], From 09f36a0d4c267646bd5366e249a1cb35077abe43 Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Mon, 27 May 2024 14:26:23 -0700 Subject: [PATCH 02/28] Add processMolecularProfile() for MP cache management --- src/civic/index.js | 9 +++---- src/civic/profile.js | 58 ++++++++++++++++++++++++++++++++++++-------- 2 files changed, 51 insertions(+), 16 deletions(-) diff --git a/src/civic/index.js b/src/civic/index.js index 2615104..40aeab3 100644 --- a/src/civic/index.js +++ b/src/civic/index.js @@ -21,7 +21,7 @@ const { civic: SOURCE_DEFN, ncit: NCIT_SOURCE_DEFN } = require('../sources'); const { processVariantRecord } = require('./variant'); const { getRelevance } = require('./relevance'); const { getPublication } = require('./publication'); -const { MolecularProfile } = require('./profile'); +const { processMolecularProfile } = require('./profile'); const { EvidenceItem: evidenceSpec } = require('./specs.json'); class NotImplementedError extends ErrorMixin { } @@ -562,8 +562,6 @@ const upload = async ({ continue; } - // Introducing Molecular Profiles with CIViC GraphQL API v2.2.0 - // [EvidenceItem]--(many-to-one)--[MolecularProfile]--(many-to-many)--[Variant] if (!record.molecularProfile) { logger.error(`Evidence Item without Molecular Profile. Violates assumptions: ${record.id}`); counts.skip++; @@ -616,10 +614,9 @@ const upload = async ({ // Process Molecular Profiles expression into an array of conditions // Each condition is itself an array of variants, one array for each expected GraphKB Statement from this CIViC Evidence Item - const Mp = MolecularProfile(record.molecularProfile); - try { - record.conditions = Mp.process().conditions; + // Molecular Profile (conditions w/ variants) + record.conditions = processMolecularProfile(record.molecularProfile).conditions; } catch (err) { logger.error(`evidence (${record.id}) ${err}`); counts.skip++; diff --git a/src/civic/profile.js b/src/civic/profile.js index 1a91037..4d6845d 100644 --- a/src/civic/profile.js +++ b/src/civic/profile.js @@ -1,7 +1,12 @@ +/** + * Introducing Molecular Profiles with CIViC GraphQL API v2.2.0 + * [EvidenceItem]--(many-to-one)--[MolecularProfile]--(many-to-many)--[Variant] + */ const { error: { ErrorMixin } } = require('@bcgsc-pori/graphkb-parser'); -class NotImplementedError extends ErrorMixin { } +class NotImplementedError extends ErrorMixin { } +const MOLECULAR_PROFILE_CACHE = new Map(); /** * Factory function returning a MolecularProfile object. @@ -198,9 +203,9 @@ const MolecularProfile = (molecularProfile) => ({ newConditions.forEach((condition) => { condition.forEach((variant) => { if (!variant) { - throw new Error( - `unable to process molecular profile with missing or misformatted variants (${this.profile.id || ''})`, - ); + const errMsg = `unable to process molecular profile with missing or misformatted variants (${this.profile.id || ''})`; + this.error = errMsg; + throw new Error(errMsg); } }); }); @@ -211,6 +216,8 @@ const MolecularProfile = (molecularProfile) => ({ }, /* Corresponding GKB Statements' conditions (1 array per statement) */ conditions: [[]], + /* Keep track of processing error */ + error: undefined, /* Main object's method. Process expression into array of conditions' arrays */ process() { // Get Molecular Profile's expression (parsedName property) @@ -222,15 +229,15 @@ const MolecularProfile = (molecularProfile) => ({ || parsedName.length === 0 || typeof parsedName[0] !== 'object' ) { - throw new Error( - `unable to process molecular profile with missing or misformatted parsedName (${this.profile.id || ''})`, - ); + const errMsg = `unable to process molecular profile with missing or misformatted parsedName (${this.profile.id || ''})`; + this.error = errMsg; + throw new Error(errMsg); } // NOT operator not yet supported if (this._not(parsedName)) { - throw new NotImplementedError( - `unable to process molecular profile with NOT operator (${this.profile.id || ''})`, - ); + const errMsg = `unable to process molecular profile with NOT operator (${this.profile.id || ''})`; + this.error = errMsg; + throw new NotImplementedError(errMsg); } // Filters out unwanted Feature info from expression const filteredParsedName = parsedName.filter(el => el.__typename !== 'Feature'); @@ -247,7 +254,38 @@ const MolecularProfile = (molecularProfile) => ({ profile: molecularProfile || {}, }); +/** + * Processing a molecular profile expression while managing the cache + * + * @param {Object} molecularProfile a Molecular Profile segment from GraphQL query + * @returns {MolecularProfile} object whose conditions' property is an array of lists of conditions + */ +const processMolecularProfile = (molecularProfile) => { + let Mp = MOLECULAR_PROFILE_CACHE.get(molecularProfile.id); + + if (Mp) { + if (Mp.error) { + throw new Error( + `Molecular profile ${molecularProfile.id} already processed with error "${Mp.error}"`, + ); + } + return Mp; + } + Mp = MolecularProfile(molecularProfile); + + // Actually process the profile expression + try { + Mp.process(); + } catch (err) { + MOLECULAR_PROFILE_CACHE.set(molecularProfile.id, Mp); + throw err; + } + MOLECULAR_PROFILE_CACHE.set(molecularProfile.id, Mp); + + return Mp; +}; module.exports = { MolecularProfile, + processMolecularProfile, }; From 71bcabfd64b8af8755124ba3f986d45d562b447c Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Mon, 27 May 2024 14:31:20 -0700 Subject: [PATCH 03/28] Add variants required in EvidenceItems specs --- src/civic/specs.json | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/civic/specs.json b/src/civic/specs.json index 338a107..740e28c 100644 --- a/src/civic/specs.json +++ b/src/civic/specs.json @@ -151,13 +151,11 @@ "name" ], "type": [ - "null", "object" ] }, "type": [ - "array", - "null" + "array" ] } }, From 3c5d3df4890222cf4fbfc006f1a896d1edb8cf36 Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Mon, 27 May 2024 14:42:34 -0700 Subject: [PATCH 04/28] Pubmed caching from module.exports --- src/civic/index.js | 9 ++++----- src/civic/publication.js | 7 +++++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/civic/index.js b/src/civic/index.js index 40aeab3..bb0e37e 100644 --- a/src/civic/index.js +++ b/src/civic/index.js @@ -15,12 +15,11 @@ const { shouldUpdate, } = require('../graphkb'); const { logger } = require('../logging'); -const _pubmed = require('../entrez/pubmed'); const _entrezGene = require('../entrez/gene'); const { civic: SOURCE_DEFN, ncit: NCIT_SOURCE_DEFN } = require('../sources'); const { processVariantRecord } = require('./variant'); const { getRelevance } = require('./relevance'); -const { getPublication } = require('./publication'); +const { getPublication, loadPubmedCache } = require('./publication'); const { processMolecularProfile } = require('./profile'); const { EvidenceItem: evidenceSpec } = require('./specs.json'); @@ -529,9 +528,9 @@ const upload = async ({ }); previouslyEntered = new Set(previouslyEntered.map(r => r.sourceId)); logger.info(`Found ${previouslyEntered.size} records previously added from ${SOURCE_DEFN.name}`); - // Get list of all Pubmed publication reccords from GraphKB - logger.info('caching publication records'); - _pubmed.preLoadCache(conn); + // PubMed caching + logger.info('Caching Pubmed publication'); + await loadPubmedCache(conn); // Get evidence records from CIVIC (Accepted, or Submitted from a trusted curator) const { counts, errorList, records } = await downloadEvidenceRecords(url, trustedCurators); diff --git a/src/civic/publication.js b/src/civic/publication.js index ee8e1a3..af95446 100644 --- a/src/civic/publication.js +++ b/src/civic/publication.js @@ -61,5 +61,8 @@ const getPublication = async (conn, rawRecord) => { throw Error(`unable to process non-pubmed/non-asco evidence type (${rawRecord.source.sourceType}) for evidence item (${rawRecord.id})`); }; - -module.exports = { getPublication, titlesMatch }; +module.exports = { + getPublication, + loadPubmedCache: _pubmed.preLoadCache, + titlesMatch, +}; From 904ee7ca13ace9fdac568262e4d17dedfc5bf153 Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Mon, 27 May 2024 14:44:05 -0700 Subject: [PATCH 05/28] Better jsdocs and comments in publications.js --- src/civic/publication.js | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/src/civic/publication.js b/src/civic/publication.js index af95446..644111e 100644 --- a/src/civic/publication.js +++ b/src/civic/publication.js @@ -1,8 +1,18 @@ +const { error: { ErrorMixin } } = require('@bcgsc-pori/graphkb-parser'); + const _asco = require('../asco'); const _pubmed = require('../entrez/pubmed'); +class NotImplementedError extends ErrorMixin { } + + /** - * Check two strings are the same irrespective of casing, trailing periods and other formatting + * Check if two strings are the same irrespective of casing, + * trailing periods and other formatting + * + * @param {string} title1 a publication title + * @param {string} title2 a second publication title + * @returns {Boolean} whether both titles are matching or not */ const titlesMatch = (title1, title2) => { const title1Simple = title1.trim().toLowerCase().replace(/\.$/, '').replace(/<\/?(em|i|bold)>/g, ''); @@ -10,14 +20,15 @@ const titlesMatch = (title1, title2) => { return title1Simple === title2Simple; }; - /** - * Fetches the publication record either from pubmed or the ASCO abstract + * Fetches the publication record either from PubMed or the ASCO abstract * * @param {ApiConnection} conn graphkb API connector - * @param {object} rawRecord CIViC Evidence Item JSON record + * @param {object} rawRecord CIViC EvidenceItem record + * @returns {object} the publication record from GraphKB */ const getPublication = async (conn, rawRecord) => { + // Upload Publication to GraphKB FROM PUBMED if (rawRecord.source.sourceType === 'PUBMED') { const [publication] = await _pubmed.fetchAndLoadByIds(conn, [rawRecord.source.citationId]); @@ -26,6 +37,8 @@ const getPublication = async (conn, rawRecord) => { } return publication; } + + // Upload Publication to GraphKB FROM ASCO if (rawRecord.source.sourceType === 'ASCO') { const abstracts = await _asco.fetchAndLoadByIds(conn, [rawRecord.source.ascoAbstractId]); @@ -54,11 +67,12 @@ const getPublication = async (conn, rawRecord) => { } return abstracts[0]; } + + // Upload Publication to GraphKB FROM ASH - No loader yet! if (rawRecord.source.sourceType === 'ASH') { - // 6 cases - // TODO: ASH loader + // TODO: ASH loader. Only a handfull of cases though } - throw Error(`unable to process non-pubmed/non-asco evidence type (${rawRecord.source.sourceType}) for evidence item (${rawRecord.id})`); + throw new NotImplementedError(`unable to process non-pubmed/non-asco evidence type (${rawRecord.source.sourceType}) for evidence item (${rawRecord.id})`); }; module.exports = { From 978f9f003ffe3c77568fa0256a14b54cb0a28150 Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Mon, 27 May 2024 14:49:16 -0700 Subject: [PATCH 06/28] Minor changes to getRelevance() --- src/civic/index.js | 2 +- src/civic/relevance.js | 6 +----- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/src/civic/index.js b/src/civic/index.js index bb0e37e..aa8e1f4 100644 --- a/src/civic/index.js +++ b/src/civic/index.js @@ -215,7 +215,7 @@ const processEvidenceRecord = async (opt) => { // Relevance & EvidenceLevel const [level, relevance] = await Promise.all([ getEvidenceLevel(opt), - getRelevance(opt), + getRelevance(conn, { rawRecord }), ]); // Variant's Feature diff --git a/src/civic/relevance.js b/src/civic/relevance.js index c79a262..3c1bff1 100644 --- a/src/civic/relevance.js +++ b/src/civic/relevance.js @@ -2,7 +2,6 @@ const { error: { ErrorMixin } } = require('@bcgsc-pori/graphkb-parser'); class NotImplementedError extends ErrorMixin { } - const RELEVANCE_CACHE = {}; @@ -333,11 +332,10 @@ const translateRelevance = (evidenceType, evidenceDirection, significance) => { ); }; - /** * Convert the CIViC relevance types to GraphKB terms */ -const getRelevance = async ({ rawRecord, conn }) => { +const getRelevance = async (conn, { rawRecord }) => { // translate the type to a GraphKB vocabulary term let relevance = translateRelevance( rawRecord.evidenceType, @@ -354,8 +352,6 @@ const getRelevance = async ({ rawRecord, conn }) => { return relevance; }; - - module.exports = { getRelevance, translateRelevance, From 062a822c6c38fea3ed8db17cbc2ef6b78beaf0fd Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Mon, 27 May 2024 15:00:40 -0700 Subject: [PATCH 07/28] Add support for cache inside processVariantRecord() --- src/civic/index.js | 20 ++++++-------------- src/civic/variant.js | 36 ++++++++++++++++++++++++++++++------ 2 files changed, 36 insertions(+), 20 deletions(-) diff --git a/src/civic/index.js b/src/civic/index.js index aa8e1f4..4fb4166 100644 --- a/src/civic/index.js +++ b/src/civic/index.js @@ -236,20 +236,12 @@ const processEvidenceRecord = async (opt) => { // Variant let variants; - if (variantsCache.records[rawRecord.variant.id]) { - variants = variantsCache.records[rawRecord.variant.id]; - } else if (variantsCache.errors[rawRecord.variant.id]) { - throw variantsCache.errors[rawRecord.variant.id]; - } else { - try { - variants = await processVariantRecord(conn, rawRecord.variant, feature); - variantsCache.records[rawRecord.variant.id] = variants; - logger.verbose(`converted variant name (${rawRecord.variant.name}) to variants (${variants.map(v => v.displayName).join(', and ')})`); - } catch (err) { - variantsCache.errors[rawRecord.variant.id] = err; - logger.error(`evidence (${rawRecord.id}) Unable to process the variant (id=${rawRecord.variant.id}, name=${rawRecord.variant.name}): ${err}`); - throw err; - } + try { + variants = await processVariantRecord(conn, rawRecord.variant, feature); + logger.verbose(`converted variant name (${rawRecord.variant.name}) to variants (${variants.map(v => v.displayName).join(', and ')})`); + } catch (err) { + logger.error(`evidence (${rawRecord.id}) Unable to process the variant (id=${rawRecord.variant.id}, name=${rawRecord.variant.name}): ${err}`); + throw err; } // get the disease by doid diff --git a/src/civic/variant.js b/src/civic/variant.js index 5550069..63f05ec 100644 --- a/src/civic/variant.js +++ b/src/civic/variant.js @@ -7,6 +7,7 @@ const { civic: SOURCE_DEFN } = require('../sources'); const { error: { ErrorMixin, ParsingError } } = kbParser; class NotImplementedError extends ErrorMixin { } +const VARIANT_CACHE = new Map(); // based on discussion with cam here: https://www.bcgsc.ca/jira/browse/KBDEV-844 const SUBS = { @@ -330,10 +331,11 @@ const uploadNormalizedVariant = async (conn, normalizedVariant, feature) => { * @param {Object} feature the gene feature already grabbed from GraphKB */ const processVariantRecord = async (conn, civicVariantRecord, feature) => { - const featureInstance = civicVariantRecord.feature.featureInstance; + const { feature: { featureInstance } } = civicVariantRecord; let entrezId, entrezName; + // featureInstance if (featureInstance.__typename === 'Gene') { entrezId = featureInstance.entrezId; entrezName = featureInstance.name; @@ -345,21 +347,43 @@ const processVariantRecord = async (conn, civicVariantRecord, feature) => { ); } - const variants = normalizeVariantRecord({ + // Raw variant from CIViC to normalize & upload to GraphKB if needed + const rawVariant = { entrezId, entrezName, name: civicVariantRecord.name, - }); + }; + + // Trying cache first + const fromCache = VARIANT_CACHE.get(JSON.stringify(rawVariant)); + + if (fromCache) { + if (fromCache.err) { + throw new Error('Variant record previously processed with errors'); + } + if (fromCache.result) { + return fromCache.result; + } + } const result = []; - for (const normalizedVariant of variants) { - result.push(await uploadNormalizedVariant(conn, normalizedVariant, feature)); + try { + // Normalizing + const variants = normalizeVariantRecord(rawVariant); + + // Uploading + for (const normalizedVariant of variants) { + result.push(await uploadNormalizedVariant(conn, normalizedVariant, feature)); + } + } catch (err) { + VARIANT_CACHE.set(JSON.stringify(rawVariant), { err }); } + + VARIANT_CACHE.set(JSON.stringify(rawVariant), { result }); return result; }; - module.exports = { normalizeVariantRecord, processVariantRecord, From 974a6edc637a72fcdad35042bb09a7f6875752b5 Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Mon, 27 May 2024 15:01:45 -0700 Subject: [PATCH 08/28] Comments and exports in variant.js --- src/civic/variant.js | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/civic/variant.js b/src/civic/variant.js index 63f05ec..98ebee6 100644 --- a/src/civic/variant.js +++ b/src/civic/variant.js @@ -32,7 +32,13 @@ const SUBS = { 'p26.3-25.3 11mb del': 'y.p26.3_p25.3del', }; - +/** + * Compares two gene names together for equality + * + * @param {string} gene1 a gene name + * @param {string} gene2 a second gene name + * @returns {boolean} whether the genes names are equal or not + */ const compareGeneNames = (gene1, gene2) => { if (['abl1', 'abl'].includes(gene1.toLowerCase()) && ['abl1', 'abl'].includes(gene2.toLowerCase())) { return true; @@ -43,7 +49,14 @@ const compareGeneNames = (gene1, gene2) => { }; /** - * Given a CIViC Variant record entrez information and name, normalize into a set of graphkb-style variants + * Given a CIViC Variant record entrez information and name, + * normalize into a set of graphkb-style variants + * + * @param {object} param0 + * @param {string} param0.name + * @param {string} param0.entrezId + * @param {string} param0.entrezName + * @returns {object} */ const normalizeVariantRecord = ({ name: rawName, entrezId, entrezName: rawEntrezName, @@ -232,6 +245,7 @@ const normalizeVariantRecord = ({ * @param {ApiConnection} conn the connection to GraphKB * @param {Object} normalizedVariant the normalized variant record * @param {Object} feature the gene feature already grabbed from GraphKB + * @returns {object[]} */ const uploadNormalizedVariant = async (conn, normalizedVariant, feature) => { let result; @@ -322,13 +336,13 @@ const uploadNormalizedVariant = async (conn, normalizedVariant, feature) => { return result; }; - /** * Given some variant record and a feature, process the variant and return a GraphKB equivalent * * @param {ApiConnection} conn the connection to GraphKB * @param {Object} civicVariantRecord the raw variant record from CIViC * @param {Object} feature the gene feature already grabbed from GraphKB + * @returns {object[]} */ const processVariantRecord = async (conn, civicVariantRecord, feature) => { const { feature: { featureInstance } } = civicVariantRecord; @@ -385,6 +399,8 @@ const processVariantRecord = async (conn, civicVariantRecord, feature) => { }; module.exports = { + compareGeneNames, normalizeVariantRecord, processVariantRecord, + uploadNormalizedVariant, }; From 9e9c533f7bb58ca5087d0a81f091dee51fa16278 Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Mon, 27 May 2024 15:15:23 -0700 Subject: [PATCH 09/28] Refactoring therapies in seperated file --- src/civic/index.js | 105 ++----------------------- src/civic/therapy.js | 178 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 184 insertions(+), 99 deletions(-) create mode 100644 src/civic/therapy.js diff --git a/src/civic/index.js b/src/civic/index.js index 4fb4166..836237e 100644 --- a/src/civic/index.js +++ b/src/civic/index.js @@ -21,6 +21,7 @@ const { processVariantRecord } = require('./variant'); const { getRelevance } = require('./relevance'); const { getPublication, loadPubmedCache } = require('./publication'); const { processMolecularProfile } = require('./profile'); +const { addOrFetchTherapy, resolveTherapies } = require('./therapy'); const { EvidenceItem: evidenceSpec } = require('./specs.json'); class NotImplementedError extends ErrorMixin { } @@ -79,88 +80,6 @@ const requestEvidenceItems = async (url, opt) => { }; -/** - * Given some therapy name, find the therapy that is equivalent by name in GraphKB - */ -const getTherapy = async (conn, therapyRecord) => { - let originalError; - - // fetch from NCIt first if possible, or pubchem - // then use the name as a fallback - const name = therapyRecord.name.toLowerCase().trim(); - - if (therapyRecord.ncitId) { - try { - const therapy = await conn.getUniqueRecordBy({ - filters: [ - { source: { filters: { name: NCIT_SOURCE_DEFN.name }, target: 'Source' } }, - { sourceId: therapyRecord.ncitId }, - { name: therapyRecord.name }, - ], - sort: orderPreferredOntologyTerms, - target: 'Therapy', - }); - return therapy; - } catch (err) { - logger.error(`had NCIt therapy mapping (${therapyRecord.ncitId}) named (${therapyRecord.name}) but failed to fetch from graphkb: ${err}`); - throw err; - } - } - - try { - const therapy = await conn.getTherapy(name); - return therapy; - } catch (err) { - originalError = err; - } - - try { - const match = /^\s*(\S+)\s*\([^)]+\)$/.exec(name); - - if (match) { - return await conn.getTherapy(match[1]); - } - } catch (err) { } - logger.error(originalError); - throw originalError; -}; - - -/** - * Add or fetch a therapy combination if there is not an existing record - * Link the therapy combination to its individual elements - */ -const addOrFetchTherapy = async (conn, source, therapiesRecords, combinationType) => { - if (therapiesRecords.length <= 1) { - if (therapiesRecords[0] === null) { - return null; - } - return getTherapy(conn, therapiesRecords[0]); - } - const therapies = await Promise.all(therapiesRecords.map(async therapy => getTherapy(conn, therapy))); - const sourceId = therapies.map(e => e.sourceId).sort().join(' + '); - const name = therapies.map(e => e.name).sort().join(' + '); - const combinedTherapy = await conn.addRecord({ - content: { - combinationType, name, source: rid(source), sourceId, - }, - existsOk: true, - target: 'Therapy', - }); - - for (const therapy of therapies) { - await conn.addRecord({ - content: { - in: rid(combinedTherapy), out: rid(therapy), source: rid(source), - }, - existsOk: true, - target: 'ElementOf', - }); - } - return combinedTherapy; -}; - - /** * Add or fetch an evidence level if there is not an existing record */ @@ -585,23 +504,11 @@ const upload = async ({ target: 'Statement', })).map(rid)); - // Resolve combinations of therapies - // Splits civic evidence items therapies into separate evidence items based on their combination type. - if (record.therapies === null || record.therapies.length === 0) { - record.therapies = [null]; - } else if ( - record.therapyInteractionType === 'COMBINATION' - || record.therapyInteractionType === 'SEQUENTIAL' - ) { - record.therapies = [record.therapies]; - } else if (record.therapyInteractionType === 'SUBSTITUTES' || record.therapies.length < 2) { - record.therapies = record.therapies.map(therapy => [therapy]); - record.therapyInteractionType = null; - } else { - logger.error(`(evidence: ${record.id}) unsupported therapy interaction type (${record.therapyInteractionType}) for a multiple therapy (${record.therapies.length}) statement`); - counts.skip++; - continue; - } + // Resolve therapy combinations if any + // Updates record.therapies and record.therapyInteractionType properties + const { therapies, therapyInteractionType } = resolveTherapies(record); + record.therapies = therapies; + record.therapyInteractionType = therapyInteractionType; // Process Molecular Profiles expression into an array of conditions // Each condition is itself an array of variants, one array for each expected GraphKB Statement from this CIViC Evidence Item diff --git a/src/civic/therapy.js b/src/civic/therapy.js new file mode 100644 index 0000000..f9346d8 --- /dev/null +++ b/src/civic/therapy.js @@ -0,0 +1,178 @@ +const { logger } = require('../logging'); +const { ncit: NCIT_SOURCE_DEFN } = require('../sources'); +const { orderPreferredOntologyTerms, rid } = require('../graphkb'); + + +/** + * Given a CIViC EvidenceItem record, + * compile a list of its therapies into a list of combination of therapies, and + * returns modified 'therapies' & 'therapyInteractionType' properties + * + * record.therapies will be transformed into: + * - a list of 1 list of 1-or-many therapies ('COMBINATION' || 'SEQUENTIAL'), or + * - a list of 1-or-many lists of 1 therapy ('SUBSTITUTES'), or + * - a list of 1 null + * + * @param {object} evidenceItem the original CIViC EvidenceItem + * @returns {object} the modified EvidenceItem + */ +const resolveTherapies = (evidenceItem) => { + const record = JSON.parse(JSON.stringify(evidenceItem)); // Deep copy + + // No therapy + if (record.therapies === null || record.therapies.length === 0) { + record.therapies = [null]; + return record; + } + + // One or more therapies + if (record.therapies.length === 1 || record.therapyInteractionType === 'SUBSTITUTES') { + record.therapies = record.therapies.map(therapy => [therapy]); + record.therapyInteractionType = null; + } else if ( + record.therapyInteractionType === 'COMBINATION' + || record.therapyInteractionType === 'SEQUENTIAL' + ) { + record.therapies = [record.therapies]; + } else { + logger.error(`(evidence: ${record.id}) unsupported therapy interaction type (${record.therapyInteractionType}) for a multiple therapy (${record.therapies.length}) statement`); + throw new Error('Did not find unique record'); + } + + // Since duplicates can occure (from civic !?), lets remove them + // Need to strignify/parse since we're comparing arrays of objects + const unique = new Set(); + record.therapies.forEach(therapy => unique.add(JSON.stringify(therapy))); + record.therapies = []; + unique.forEach(therapy => record.therapies.push(JSON.parse(therapy))); + + return record; +}; + +/** + * Given a Therapy record from CIViC, + * returns a Therapy record from GraphKB + * + * @param {ApiConnection} conn the API connection object for GraphKB + * @param {object} therapyRecord a therapy from CIViC + * @returns {object} Therapy record from GraphKB + */ +const getTherapy = async (conn, therapyRecord) => { + const name = therapyRecord.name.toLowerCase().trim(); + + // Trying with the ncitId + if (therapyRecord.ncitId) { + try { + return await conn.getUniqueRecordBy({ + filters: [ + { source: { filters: { name: NCIT_SOURCE_DEFN.name }, target: 'Source' } }, + { sourceId: therapyRecord.ncitId }, + { name }, + ], + sort: orderPreferredOntologyTerms, + target: 'Therapy', + }); + } catch (err) { + logger.error(`had NCIt therapy mapping (${therapyRecord.ncitId}) named (${therapyRecord.name}) but failed to fetch from graphkb: ${err}`); + throw err; + } + } + + // Trying instead with the name + // Using the getTherapy method from the connection object + let originalError; + + try { + // With the name as-is first + return await conn.getTherapy(name); + } catch (err) { + originalError = err; + } + + try { + // Then with the name parsed + const match = /^\s*(\S+)\s*\([^)]+\)$/.exec(name); + + if (match) { + return await conn.getTherapy(match[1]); + } + } catch (err) { } + + // Logging errors + logger.error(originalError); + throw originalError; +}; + +/** + * Given a list of CIViC Therapy Records, + * + * (If one therapy) + * returns the corresponding Therapy record from GraphKB + * + * (If a combination of therapies) + * will add a therapy combination if there is not an existing record, + * will link the therapy combination to its individual elements with 'ElementOf' edges, then + * returns the corresponding Therapy record from GraphKB + * + * @param {ApiConnection} conn the API connection object for GraphKB + * @param {string} sourceRid + * @param {object[]} therapiesRecords + * @param {string} combinationType + * @returns {object} the corresponding Therapy record from GraphKB + */ +const addOrFetchTherapy = async (conn, sourceRid, therapiesRecords, combinationType) => { + /* ONE OR NO THERAPY */ + + if (therapiesRecords.length === 0) { + return null; + } + if (therapiesRecords.length === 1) { + if (therapiesRecords[0] === null) { + return null; + } + // Get the corresponding Therapy record from GraphKB + return getTherapy(conn, therapiesRecords[0]); + } + + /* COMBINATION OF THERAPIES */ + + // For each therapy, get the corresponding Therapy record from GraphKB + const therapies = await Promise.all( + therapiesRecords.map( + async therapy => getTherapy(conn, therapy), + ), + ); + // concatenating sourceIds and names + const sourceId = therapies.map(e => e.sourceId).sort().join(' + '); + const name = therapies.map(e => e.name).sort().join(' + '); + + // Add a Therapy Vertice for the combined therapies + const combinedTherapy = await conn.addRecord({ + content: { + combinationType, name, source: sourceRid, sourceId, + }, + existsOk: true, + target: 'Therapy', + }); + + // Add ElementOf Edges between corresponding records + for (const therapy of therapies) { + await conn.addRecord({ + content: { + in: rid(combinedTherapy), + out: rid(therapy), + source: sourceRid, + }, + existsOk: true, + target: 'ElementOf', + }); + } + + return combinedTherapy; +}; + +module.exports = { + addOrFetchTherapy, + getTherapy, + resolveTherapies, +}; From e26b8aa3b15d226dd83a0783cdbd37434dde94b7 Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Mon, 27 May 2024 15:21:54 -0700 Subject: [PATCH 10/28] Minor test updates --- test/civic.relevance.test.js | 12 ++++++++++-- test/civic.variant.test.js | 10 ++++------ 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/test/civic.relevance.test.js b/test/civic.relevance.test.js index c7a0263..82e3cb7 100644 --- a/test/civic.relevance.test.js +++ b/test/civic.relevance.test.js @@ -32,7 +32,11 @@ describe('translateRelevance', () => { ['NA', 'PREDISPOSING', 'NA', 'likely predisposing'], ])( '%s|%s|%s returns %s', (evidenceDirection, evidenceType, clinicalSignificance, expected) => { - expect(translateRelevance(evidenceType, evidenceDirection, clinicalSignificance)).toEqual(expected); + expect(translateRelevance( + evidenceType, + evidenceDirection, + clinicalSignificance, + )).toEqual(expected); }, ); @@ -74,7 +78,11 @@ describe('translateRelevance', () => { ['--', '--', '--'], ])( '%s|%s|%s errors', (evidenceDirection, evidenceType, clinicalSignificance) => { - expect(() => translateRelevance(evidenceType, evidenceDirection, clinicalSignificance)).toThrow('unable to process relevance'); + expect(() => translateRelevance( + evidenceType, + evidenceDirection, + clinicalSignificance, + )).toThrow('unable to process relevance'); }, ); }); diff --git a/test/civic.variant.test.js b/test/civic.variant.test.js index d37649e..903396d 100644 --- a/test/civic.variant.test.js +++ b/test/civic.variant.test.js @@ -1,3 +1,4 @@ +/* eslint-disable jest/no-disabled-tests */ const { normalizeVariantRecord } = require('../src/civic/variant'); describe('normalizeVariantRecord', () => { @@ -218,7 +219,6 @@ describe('normalizeVariantRecord', () => { ]); }); - test('categorical variant with spaces', () => { const variants = normalizeVariantRecord({ entrezId: 1, @@ -334,7 +334,6 @@ describe('normalizeVariantRecord', () => { ]); }); - test('cds notation', () => { // BCR-ABL const variants = normalizeVariantRecord({ @@ -521,7 +520,6 @@ describe('normalizeVariantRecord', () => { ]); }); - test('protein dup with cds dup', () => { // p.s193_c196dupstsc (c.577_588dupagcaccagctgc) const variants = normalizeVariantRecord({ @@ -584,7 +582,7 @@ describe('normalizeVariantRecord', () => { ]); }); - test('catalogue variant', () => { + test.skip('catalogue variant', () => { // RS3910384 }); @@ -620,11 +618,11 @@ describe('normalizeVariantRecord', () => { ]); }); - test('duplicate fusion', () => { + test.skip('duplicate fusion', () => { // AGGF1-PDGFRB, AGGF1-PDGFRB C843G }); - test('non-specific positional mutaiton', () => { + test.skip('non-specific positional mutaiton', () => { // E1813 mutations }); From bb39bb6092a7a064ba0c0bb055380f1fa21f65cd Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Mon, 27 May 2024 15:24:51 -0700 Subject: [PATCH 11/28] Moving civic tests in own directory --- test/{ => civic}/civic.profile.test.js | 2 +- test/{ => civic}/civic.publication.test.js | 2 +- test/{ => civic}/civic.relevance.test.js | 2 +- test/{ => civic}/civic.variant.test.js | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) rename test/{ => civic}/civic.profile.test.js (99%) rename test/{ => civic}/civic.publication.test.js (94%) rename test/{ => civic}/civic.relevance.test.js (98%) rename test/{ => civic}/civic.variant.test.js (99%) diff --git a/test/civic.profile.test.js b/test/civic/civic.profile.test.js similarity index 99% rename from test/civic.profile.test.js rename to test/civic/civic.profile.test.js index f02f2df..ee8ea60 100644 --- a/test/civic.profile.test.js +++ b/test/civic/civic.profile.test.js @@ -1,4 +1,4 @@ -const { MolecularProfile } = require('../src/civic/profile'); +const { MolecularProfile } = require('../../src/civic/profile'); describe('MolecularProfile._combine()', () => { diff --git a/test/civic.publication.test.js b/test/civic/civic.publication.test.js similarity index 94% rename from test/civic.publication.test.js rename to test/civic/civic.publication.test.js index fde748d..1445ec7 100644 --- a/test/civic.publication.test.js +++ b/test/civic/civic.publication.test.js @@ -1,4 +1,4 @@ -const { titlesMatch } = require('../src/civic/publication'); +const { titlesMatch } = require('../../src/civic/publication'); describe('titlesMatch', () => { diff --git a/test/civic.relevance.test.js b/test/civic/civic.relevance.test.js similarity index 98% rename from test/civic.relevance.test.js rename to test/civic/civic.relevance.test.js index 82e3cb7..bb922db 100644 --- a/test/civic.relevance.test.js +++ b/test/civic/civic.relevance.test.js @@ -1,4 +1,4 @@ -const { translateRelevance } = require('../src/civic/relevance'); +const { translateRelevance } = require('../../src/civic/relevance'); describe('translateRelevance', () => { test.each([ diff --git a/test/civic.variant.test.js b/test/civic/civic.variant.test.js similarity index 99% rename from test/civic.variant.test.js rename to test/civic/civic.variant.test.js index 903396d..4814157 100644 --- a/test/civic.variant.test.js +++ b/test/civic/civic.variant.test.js @@ -1,5 +1,5 @@ /* eslint-disable jest/no-disabled-tests */ -const { normalizeVariantRecord } = require('../src/civic/variant'); +const { normalizeVariantRecord } = require('../../src/civic/variant'); describe('normalizeVariantRecord', () => { test('exon mutation', () => { From 44f20a92ed4788413537aee51ee7272d081c6a12 Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Mon, 27 May 2024 15:31:00 -0700 Subject: [PATCH 12/28] Moving EvidenceLevels in seperated file --- src/civic/evidenceLevel.js | 59 ++++++++++++++++++++++++++++++++++++++ src/civic/index.js | 56 +----------------------------------- 2 files changed, 60 insertions(+), 55 deletions(-) create mode 100644 src/civic/evidenceLevel.js diff --git a/src/civic/evidenceLevel.js b/src/civic/evidenceLevel.js new file mode 100644 index 0000000..172be88 --- /dev/null +++ b/src/civic/evidenceLevel.js @@ -0,0 +1,59 @@ +/** + * 1-5 : https://docs.civicdb.org/en/latest/model/evidence/evidence_rating.html + * A-E : https://docs.civicdb.org/en/latest/model/evidence/level.html +*/ +const VOCAB = { + 1: 'Claim is not supported well by experimental evidence. Results are not reproducible, or have very small sample size. No follow-up is done to validate novel claims.', + 2: 'Evidence is not well supported by experimental data, and little follow-up data is available. Publication is from a journal with low academic impact. Experiments may lack proper controls, have small sample size, or are not statistically convincing.', + 3: 'Evidence is convincing, but not supported by a breadth of experiments. May be smaller scale projects, or novel results without many follow-up experiments. Discrepancies from expected results are explained and not concerning.', + 4: 'Strong, well supported evidence. Experiments are well controlled, and results are convincing. Any discrepancies from expected results are well-explained and not concerning.', + 5: 'Strong, well supported evidence from a lab or journal with respected academic standing. Experiments are well controlled, and results are clean and reproducible across multiple replicates. Evidence confirmed using independent methods. The study is statistically well powered.', + A: 'Proven/consensus association in human medicine.', + B: 'Clinical trial or other primary patient data supports association.', + C: 'Individual case reports from clinical journals.', + D: 'In vivo or in vitro models support association.', + E: 'Indirect evidence.', + url: 'https://docs.civicdb.org/en/latest/model/evidence.html', +}; + +const EVIDENCE_LEVEL_CACHE = {}; + +/** + * Fetch an evidence level, and add it if there is not an existing record + * + * @param {ApiConnection} conn graphkb API connector + * @param {object} param1 + * @param {object} param1.rawRecord an EvidenceItem record from CIViC + * @param {object} param1.source the CIViC source rid in GraphKB + * @returns {object} an EvidenceLevel recors from GraphKB + */ +const getEvidenceLevel = async (conn, { rawRecord, source, sourceDisplayName }) => { + // get the evidenceLevel + let level = `${rawRecord.evidenceLevel}${rawRecord.evidenceRating || ''}`.toLowerCase(); + + if (EVIDENCE_LEVEL_CACHE[level] === undefined) { + level = await conn.addRecord({ + content: { + description: `${VOCAB[rawRecord.evidenceLevel]} ${VOCAB[rawRecord.evidenceRating] || ''}`, + displayName: `${sourceDisplayName} ${level.toUpperCase()}`, + name: level, + source, + sourceId: level, + url: VOCAB.url, + }, + existsOk: true, + fetchConditions: { + AND: + [{ sourceId: level }, { name: level }, { source }], + }, + target: 'EvidenceLevel', + + }); + EVIDENCE_LEVEL_CACHE[level.sourceId] = level; + } else { + level = EVIDENCE_LEVEL_CACHE[level]; + } + return level; +}; + +module.exports = { getEvidenceLevel }; diff --git a/src/civic/index.js b/src/civic/index.js index 836237e..3a8e2d2 100644 --- a/src/civic/index.js +++ b/src/civic/index.js @@ -19,6 +19,7 @@ const _entrezGene = require('../entrez/gene'); const { civic: SOURCE_DEFN, ncit: NCIT_SOURCE_DEFN } = require('../sources'); const { processVariantRecord } = require('./variant'); const { getRelevance } = require('./relevance'); +const { getEvidenceLevel } = require('./evidenceLevel'); const { getPublication, loadPubmedCache } = require('./publication'); const { processMolecularProfile } = require('./profile'); const { addOrFetchTherapy, resolveTherapies } = require('./therapy'); @@ -28,26 +29,6 @@ class NotImplementedError extends ErrorMixin { } const BASE_URL = 'https://civicdb.org/api/graphql'; -/** - * 1-5 : https://docs.civicdb.org/en/latest/model/evidence/evidence_rating.html - * A-E : https://docs.civicdb.org/en/latest/model/evidence/level.html - */ -const VOCAB = { - 1: 'Claim is not supported well by experimental evidence. Results are not reproducible, or have very small sample size. No follow-up is done to validate novel claims.', - 2: 'Evidence is not well supported by experimental data, and little follow-up data is available. Publication is from a journal with low academic impact. Experiments may lack proper controls, have small sample size, or are not statistically convincing.', - 3: 'Evidence is convincing, but not supported by a breadth of experiments. May be smaller scale projects, or novel results without many follow-up experiments. Discrepancies from expected results are explained and not concerning.', - 4: 'Strong, well supported evidence. Experiments are well controlled, and results are convincing. Any discrepancies from expected results are well-explained and not concerning.', - 5: 'Strong, well supported evidence from a lab or journal with respected academic standing. Experiments are well controlled, and results are clean and reproducible across multiple replicates. Evidence confirmed using independent methods. The study is statistically well powered.', - A: 'Proven/consensus association in human medicine.', - B: 'Clinical trial or other primary patient data supports association.', - C: 'Individual case reports from clinical journals.', - D: 'In vivo or in vitro models support association.', - E: 'Indirect evidence.', - url: 'https://docs.civicdb.org/en/latest/model/evidence.html', -}; - -const EVIDENCE_LEVEL_CACHE = {}; // avoid unecessary requests by caching the evidence levels - // Spec compiler const ajv = new Ajv(); const validateEvidenceSpec = ajv.compile(evidenceSpec); @@ -80,41 +61,6 @@ const requestEvidenceItems = async (url, opt) => { }; -/** - * Add or fetch an evidence level if there is not an existing record - */ -const getEvidenceLevel = async ({ - conn, rawRecord, sources, -}) => { - // get the evidenceLevel - let level = `${rawRecord.evidenceLevel}${rawRecord.evidenceRating || ''}`.toLowerCase(); - - if (EVIDENCE_LEVEL_CACHE[level] === undefined) { - level = await conn.addRecord({ - content: { - description: `${VOCAB[rawRecord.evidenceLevel]} ${VOCAB[rawRecord.evidenceRating] || ''}`, - displayName: `${SOURCE_DEFN.displayName} ${level.toUpperCase()}`, - name: level, - source: rid(sources.civic), - sourceId: level, - url: VOCAB.url, - }, - existsOk: true, - fetchConditions: { - AND: - [{ sourceId: level }, { name: level }, { source: rid(sources.civic) }], - }, - target: 'EvidenceLevel', - - }); - EVIDENCE_LEVEL_CACHE[level.sourceId] = level; - } else { - level = EVIDENCE_LEVEL_CACHE[level]; - } - return level; -}; - - /** * Transform a CIViC evidence record into a GraphKB statement * From d55fe827e3ed0d039cb858bfe091c6d8bb7b0985 Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Mon, 27 May 2024 15:38:24 -0700 Subject: [PATCH 13/28] Moving Disease in seperated file --- src/civic/disease.js | 41 +++++++++++++++++++++++++++++++++++++++++ src/civic/index.js | 28 ++++------------------------ 2 files changed, 45 insertions(+), 24 deletions(-) create mode 100644 src/civic/disease.js diff --git a/src/civic/disease.js b/src/civic/disease.js new file mode 100644 index 0000000..b1573e9 --- /dev/null +++ b/src/civic/disease.js @@ -0,0 +1,41 @@ +const { orderPreferredOntologyTerms } = require('../graphkb'); + +/** + * Given a CIViC EvidenceItem record with its disease property, + * returns the corresponding disease record from GraphKB + * + * @param {ApiConnection} conn graphkb API connector + * @param {object} param1 + * @param {object} param1.rawRecord the EvidenceItem from CIViC + * @returns {object} the disease record from GraphKB + */ +const getDisease = async (conn, { rawRecord }) => { + let disease; + + // Get corresponding GraphKB Disease by it's doid (disease ontology id) + if (rawRecord.disease) { + let diseaseQueryFilters = {}; + + if (rawRecord.disease.doid) { + diseaseQueryFilters = { + AND: [ + { sourceId: `doid:${rawRecord.disease.doid}` }, + { source: { filters: { name: 'disease ontology' }, target: 'Source' } }, + ], + }; + } else { + diseaseQueryFilters = { name: rawRecord.disease.name }; + } + + disease = await conn.getUniqueRecordBy({ + filters: diseaseQueryFilters, + sort: orderPreferredOntologyTerms, + target: 'Disease', + }); + } + return disease; +}; + +module.exports = { + getDisease, +}; diff --git a/src/civic/index.js b/src/civic/index.js index 3a8e2d2..ffb5eee 100644 --- a/src/civic/index.js +++ b/src/civic/index.js @@ -17,6 +17,7 @@ const { const { logger } = require('../logging'); const _entrezGene = require('../entrez/gene'); const { civic: SOURCE_DEFN, ncit: NCIT_SOURCE_DEFN } = require('../sources'); +const { getDisease } = require('./disease'); const { processVariantRecord } = require('./variant'); const { getRelevance } = require('./relevance'); const { getEvidenceLevel } = require('./evidenceLevel'); @@ -109,30 +110,6 @@ const processEvidenceRecord = async (opt) => { throw err; } - // get the disease by doid - let disease; - - // find the disease if it is not null - if (rawRecord.disease) { - let diseaseQueryFilters = {}; - - if (rawRecord.disease.doid) { - diseaseQueryFilters = { - AND: [ - { sourceId: `doid:${rawRecord.disease.doid}` }, - { source: { filters: { name: 'disease ontology' }, target: 'Source' } }, - ], - }; - } else { - diseaseQueryFilters = { name: rawRecord.disease.name }; - } - - disease = await conn.getUniqueRecordBy({ - filters: diseaseQueryFilters, - sort: orderPreferredOntologyTerms, - target: 'Disease', - }); - } // get the therapy/therapies by name let therapy; @@ -168,6 +145,9 @@ const processEvidenceRecord = async (opt) => { sourceId: rawRecord.id, }; + // get the disease by doid + const disease = getDisease(conn, { rawRecord }); + // create the statement and connecting edges if (rawRecord.evidenceType === 'DIAGNOSTIC' || rawRecord.evidenceType === 'PREDISPOSING') { if (!disease) { From 7d2f189c7573c461a6c842c340268020fc8e7373 Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Mon, 27 May 2024 15:44:00 -0700 Subject: [PATCH 14/28] Add comments in graphkb.js --- src/graphkb.js | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/graphkb.js b/src/graphkb.js index acc4333..914d2e0 100644 --- a/src/graphkb.js +++ b/src/graphkb.js @@ -557,6 +557,7 @@ class ApiConnection { const model = schema.get(target); const filters = fetchConditions || convertRecordToQueryFilters(content); + // Will first try to fetch and/or update the record if it already exists if (fetchFirst || upsert) { try { const result = await this.getUniqueRecordBy({ @@ -577,6 +578,7 @@ class ApiConnection { throw new Error(`cannot find model from target (${target})`); } + // Then (since record dosen't already exists) will create a new record try { const { result } = jc.retrocycle(await this.request({ body: content, From 69552cf9fcb0a2b36ec53767fed58ce8befed193 Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Mon, 27 May 2024 15:57:12 -0700 Subject: [PATCH 15/28] Remove unused variables --- src/civic/index.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/civic/index.js b/src/civic/index.js index ffb5eee..318b27e 100644 --- a/src/civic/index.js +++ b/src/civic/index.js @@ -10,13 +10,12 @@ const { error: { ErrorMixin } } = require('@bcgsc-pori/graphkb-parser'); const { checkSpec, request } = require('../util'); const { - orderPreferredOntologyTerms, rid, shouldUpdate, } = require('../graphkb'); const { logger } = require('../logging'); const _entrezGene = require('../entrez/gene'); -const { civic: SOURCE_DEFN, ncit: NCIT_SOURCE_DEFN } = require('../sources'); +const { civic: SOURCE_DEFN } = require('../sources'); const { getDisease } = require('./disease'); const { processVariantRecord } = require('./variant'); const { getRelevance } = require('./relevance'); From 3e665125a9a7258e4ad5909688f434d2bbb72374 Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Mon, 27 May 2024 16:08:31 -0700 Subject: [PATCH 16/28] Add EvidenceItem file in preperation for major civic upload function refactoring --- src/civic/evidenceItem.js | 298 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 298 insertions(+) create mode 100644 src/civic/evidenceItem.js diff --git a/src/civic/evidenceItem.js b/src/civic/evidenceItem.js new file mode 100644 index 0000000..40c4b8c --- /dev/null +++ b/src/civic/evidenceItem.js @@ -0,0 +1,298 @@ +const fs = require('fs'); +const path = require('path'); + +const _ = require('lodash'); +const Ajv = require('ajv'); +const { error: { ErrorMixin } } = require('@bcgsc-pori/graphkb-parser'); + +const { checkSpec, request } = require('../util'); +const { logger } = require('../logging'); +const { civic: SOURCE_DEFN } = require('../sources'); +const { EvidenceItem: evidenceSpec } = require('./specs.json'); +const _entrezGene = require('../entrez/gene'); +const { processVariantRecord } = require('./variant'); +const { processMolecularProfile } = require('./profile'); +const { addOrFetchTherapy, resolveTherapies } = require('./therapy'); +const { rid } = require('../graphkb'); + + +class NotImplementedError extends ErrorMixin { } + +// Spec compiler +const ajv = new Ajv(); +const validateEvidenceSpec = ajv.compile(evidenceSpec); + +/** + * Requests evidence items from CIViC using their graphql API + * + * @param {string} url the query url + * @param {object} opt the query options + * @returns {object[]} an array of EvidenceItem records + */ +const requestEvidenceItems = async (url, opt) => { + const body = { ...opt }; + const allRecords = []; + let hasNextPage = true; + + while (hasNextPage) { + try { + const page = await request({ + body, + json: true, + method: 'POST', + uri: url, + }); + allRecords.push(...page.data.evidenceItems.nodes); + body.variables = { + ...body.variables, + after: page.data.evidenceItems.pageInfo.endCursor, + }; + hasNextPage = page.data.evidenceItems.pageInfo.hasNextPage; + } catch (err) { + logger.error(err); + throw (err); + } + } + return allRecords; +}; + +/** + * Fetch CIViC approved evidence entries + * as well as those submitted by trusted curators + * + * @param {string} url the url for the request + * @param {string[]} trustedCurators a list of curator IDs for submitted-only EvidenceItems + * @returns {object} an object with the validated records and the encountered errors + */ +const downloadEvidenceItems = async (url, trustedCurators) => { + const evidenceItems = []; + const query = fs.readFileSync(path.join(__dirname, 'evidenceItems.graphql')).toString(); + + // Get accepted evidenceItems + const accepted = await requestEvidenceItems(url, { + query, + variables: { + status: 'ACCEPTED', + }, + }); + logger.info(`${accepted.length} accepted entries from ${SOURCE_DEFN.name}`); + evidenceItems.push(...accepted); + + // Get submitted evidenceItems from trusted curators + for (const curator of Array.from(new Set(trustedCurators))) { + if (!Number.isNaN(curator)) { + const submittedByATrustedCurator = await requestEvidenceItems(url, { + query, + variables: { + status: 'SUBMITTED', + userId: parseInt(curator, 10), + }, + }); + evidenceItems.push(...submittedByATrustedCurator); + logger.info(`${submittedByATrustedCurator.length} submitted entries by trusted curator ${curator} from ${SOURCE_DEFN.name}`); + } + } + + logger.info(`${evidenceItems.length} total records from ${SOURCE_DEFN.name}`); + + // Validation + const validatedRecords = [], + errors = []; + + for (const record of evidenceItems) { + try { + checkSpec(validateEvidenceSpec, record); + } catch (err) { + errors.push({ error: err, errorMessage: err.toString(), record }); + logger.error(err); + continue; + } + validatedRecords.push(record); + } + + logger.info(`${validatedRecords.length}/${evidenceItems.length} validated records`); + return { errors, records: validatedRecords }; +}; + +/** + * Format one combination from a CIViC EvidenceItem into an object + * ready to be compared with a corresponding GraphKB statement + * + * @param {ApiConnection} conn the API connection object for GraphKB + * @param {object} param1 + * @param {object} param1.record the unparsed record from CIViC + * @param {object} param1.sourceRid the souce rid for CIViC in GraphKB + * @returns {object} the formatted content from one combination + */ +const processCombination = async (conn, { + record: rawRecord, + sourceRid, +}) => { + /* + PROCESSING EVIDENCEITEM DATA SPECIFIC TO THAT COMBINATION/STATEMENT + */ + + // THERAPY + // Get corresponding GraphKB Therapies + let therapy; + + if (rawRecord.therapies) { + try { + therapy = await addOrFetchTherapy( + conn, + sourceRid, + rawRecord.therapies, // therapiesRecords + (rawRecord.therapyInteractionType || '').toLowerCase(), // combinationType + ); + } catch (err) { + logger.error(`failed to fetch therapy: ${JSON.stringify(rawRecord.therapies)}`); + throw err; + } + } + + // VARIANTS + // Note: the combination can have more than 1 variant + // if the Molecular profile was using AND operators + const { variants: civicVariants } = rawRecord; + const variants = []; + + for (const variant of civicVariants) { + // Variant's Feature + const { feature: { featureInstance } } = variant; + + // TODO: Deal with __typename === 'Factor'. No actual case as April 22nd, 2024 + if (featureInstance.__typename !== 'Gene') { + throw new NotImplementedError( + 'unable to process variant\'s feature of type other than Gene (e.g. Factor)', + ); + } + + let feature; + + try { + [feature] = await _entrezGene.fetchAndLoadByIds(conn, [featureInstance.entrezId]); + } catch (err) { + logger.error(`failed to fetch variant's feature: ${featureInstance.entrezId}`); + throw err; + } + + // Variant + try { + const processedVariants = await processVariantRecord(conn, variant, feature); + logger.verbose(`converted variant name (${variant.name}) to variants (${processedVariants.map(v => v.displayName).join(', and ')})`); + variants.push(...processedVariants); + } catch (err) { + logger.error(`unable to process the variant (id=${rawRecord.variant.id}, name=${rawRecord.variant.name})`); + throw err; + } + } + + /* + FORMATTING CONTENT FOR GRAPHKB STATEMENT + */ + + const { content } = rawRecord; + + // SUBJECT + // Adding Disease as subject + if (rawRecord.evidenceType === 'DIAGNOSTIC' || rawRecord.evidenceType === 'PREDISPOSING') { + if (!content.disease) { + throw new Error('unable to create a diagnostic or predisposing statement without a corresponding disease'); + } + content.subject = content.disease; + } + + // Adding Therapy as subject + if (rawRecord.evidenceType === 'PREDICTIVE' && therapy) { + content.subject = rid(therapy); + } + + // Adding 'patient' Vocabulary as subject + if (rawRecord.evidenceType === 'PROGNOSTIC') { + try { + content.subject = rid( + // get the patient vocabulary object + await conn.getVocabularyTerm('patient'), + ); + } catch (err) { + logger.error('unable to fetch Vocabulary record for term patient'); + throw err; + } + } + + // Adding feature (reference1) or Variant (1st variant as the default) as subject. + if (rawRecord.evidenceType === 'FUNCTIONAL') { + content.subject = rid(variants[0].reference1); + } + if (rawRecord.evidenceType === 'ONCOGENIC') { + content.subject = variants.length === 1 + ? rid(variants[0]) + : rid(variants[0].reference1); + } + + // Checking for Subject + if (!content.subject) { + throw Error('unable to determine statement subject'); + } + + // CONDITIONS + // Adding variants as conditions + content.conditions = [...variants.map(v => rid(v))]; + + // Adding Disease as condition + if (content.disease) { + content.conditions.push(content.disease); + } + delete content.disease; // Removing unwanted properties no longer needed + + // Adding content's subject as condition if not already + if (content.subject && !content.conditions.includes(content.subject)) { + content.conditions.push(content.subject); + } + // Sorting conditions for downstream object comparison + content.conditions.sort(); + + return content; +}; + +/** + * Process an EvidenceItem from CIViC into an array of one or more combinations + * + * @param {object} evidenceItem the CIViC EvidenceItem + * @returns {object[]} an array of combinations + */ +const processEvidenceItem = async (evidenceItem) => { + let record = JSON.parse(JSON.stringify(evidenceItem)); // Deep copy + logger.debug(`processing EvidenceItem ${record.id}`); + + // Resolve therapy combinations if any + // Updates record.therapies and record.therapyInteractionType properties + record = resolveTherapies(record); + + // Molecular Profile (conditions w/ variants) + record.conditions = processMolecularProfile(record.molecularProfile).conditions; + + // PROCESSING EVIDENCEITEM INTO AN ARRAY OF COMBINATIONS + const combinations = []; + + for (const condition of record.conditions) { + for (const therapies of record.therapies) { + const content = JSON.parse(JSON.stringify(record.content)); // Deep copy + combinations.push({ + ..._.omit(record, ['conditions']), + content, + therapies, + variants: [...condition], + }); + } + } + + return combinations; +}; + +module.exports = { + downloadEvidenceItems, + processCombination, + processEvidenceItem, + requestEvidenceItems, +}; From a767a229cf763c5585eeedba3ba1044bbabef03c Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Mon, 27 May 2024 16:11:52 -0700 Subject: [PATCH 17/28] Add Statement file and tests in preperation for major civic upload function refactoring --- src/civic/statement.js | 296 +++++++++++++++++++++++++++++ test/civic/civic.statement.test.js | 167 ++++++++++++++++ 2 files changed, 463 insertions(+) create mode 100644 src/civic/statement.js create mode 100644 test/civic/civic.statement.test.js diff --git a/src/civic/statement.js b/src/civic/statement.js new file mode 100644 index 0000000..033d398 --- /dev/null +++ b/src/civic/statement.js @@ -0,0 +1,296 @@ +const _ = require('lodash'); + +const { logger } = require('../logging'); + + +/** + * Evaluate if two statement's content can be matched to one another. + * Used to map each EvidenceLevel's combination to its corresponding GraphKB statement + * + * @param {object} fromCivic new content from CIViC + * @param {object} fromGkb actual content from GraphKB + * @returns {boolean} whether both contents are matching or not + */ +const isMatching = ({ fromCivic, fromGkb, p = ['conditions', 'subject'] }) => { + const c = JSON.stringify(_.pick(fromCivic, ...p)); + const g = JSON.stringify(_.pick(fromGkb, ...p)); + const rel = c === g; + // console.log({ p, c, g, rel }); + return rel; // JSON.stringify(_.pick(fromCivic, ...p)) === JSON.stringify(_.pick(fromGkb, ...p)); +}; + +/** + * Evaluate if a statement needs to be updated + * when compared to its matching EvidenceLevel's combination + * + * @param {object} param0 + * @param {object} param0.fromCivic new content from CIViC + * @param {object} param0.fromGkb actual content from GraphKB + * @returns {boolean} whether the GraphKB record needs to be updated or not + */ +const needsUpdate = ({ fromCivic, fromGkb }) => { + const isEqual = JSON.stringify(fromCivic) === JSON.stringify(_.omit(fromGkb, ['@rid'])); + + // Logging details if not equal + if (!isEqual) { + const updatedFields = []; + + for (const [key, value] of Object.entries(fromCivic)) { + if (JSON.stringify(value) !== JSON.stringify(fromGkb[key])) { + updatedFields.push(key); + } + } + logger.info(`Update needed on ${updatedFields.toString()}`); + } + + return !isEqual; +}; + +/** + * Given an array of content from civic and an array of actual statements from GraphKG, + * match corresponding content together + * + * @param {object} param0 + * @param {object[]} param0.allFromCivic array of new content from CIViC + * @param {object[]} param0.allFromGkb array of actual content from GraphKB + * @param boolean} param0.matchingOnSubjectAlone if additional matching on subject alone + * @param boolean} param0.matchingWithoutComparing if random matching with remaining records + * @returns {object} content of records to create, update and delete in GrpahKB + */ +const contentMatching = ({ + allFromCivic, + allFromGkb, + matchingOnSubjectAlone = true, + matchingWithoutComparing = true, +}) => { + const records = { + toCreate: [], // Array of content from CIViC to create as GraphKB statements + toDelete: [], // Array of GraphKB statements to delete + toUpdate: [], /* Array of CIViC-GraphKB pairs of content for statement update + Note: statement will be updated only if needed */ + }; + + /* + MATCHING ONE TO ONE + + Will automatically be submitted for update, without deletion/creation + */ + + if (allFromCivic.length === 1 && allFromGkb.length === 1) { + records.toUpdate.push({ fromCivic: allFromCivic[0], fromGkb: allFromGkb[0] }); + return records; + } + + /* + MATCHING ON CONDITIONS AND SUBJECT + */ + + const remainingFromGkb = [...allFromGkb]; + allFromCivic.forEach(el => { + let matched = false; + + for (let i = 0; i < remainingFromGkb.length; i++) { + // matching on conditions and subject (default) + if (isMatching({ + fromCivic: el, + fromGkb: remainingFromGkb[i], + })) { + records.toUpdate.push({ + fromCivic: el, + fromGkb: remainingFromGkb[i], + }); + remainingFromGkb.splice(i, 1); + matched = true; + break; + } + } + + if (!matched) { + records.toCreate.push(el); + } + }); + records.toDelete = [...remainingFromGkb]; + + /* + MATCHING ON SUBJECT ALONE + */ + if (!matchingOnSubjectAlone) { return records; } + + let numUnmatched = Math.min( + records.toCreate.length, + records.toDelete.length, + ); + + if (numUnmatched > 0) { + const remainingToCreate = []; + + for (let j = 0; j < records.toCreate.length; j++) { + let matched = false; + + for (let i = 0; i < records.toDelete.length; i++) { + // matching on subject + if (isMatching({ + fromCivic: records.toCreate[j], + fromGkb: records.toDelete[i], + p: ['subject'], + })) { + records.toUpdate.push({ + fromCivic: records.toCreate[j], + fromGkb: records.toDelete[i], + }); + records.toDelete.splice(i, 1); + matched = true; + break; + } + } + + if (!matched) { + remainingToCreate.push(records.toCreate[j]); + } + } + records.toCreate = [...remainingToCreate]; + } + + /* + ARTIFICIAL MATCHING WITHOUT COMPARISON + + In order to reduce unnecessary create/delete statements, + artificially match pairs until only some records.toCreate record(s) remains + or some records.toDelete record(s) remains. + */ + if (!matchingWithoutComparing) { return records; } + + numUnmatched = Math.min( + records.toCreate.length, + records.toDelete.length, + ); + + // Randomly match remaining content + if (numUnmatched > 0) { + for (let i = 0; i < numUnmatched; i++) { + // 'Artificial' pairing + records.toUpdate.push( + { fromCivic: records.toCreate[i], fromGkb: records.toDelete[i] }, + ); + } + // Remove from records.toCreate and records.toDelete + records.toCreate.splice(0, numUnmatched); + records.toDelete.splice(0, numUnmatched); + } + + return records; +}; + +/** + * Given content from CIViC, try to create the GraphKB record + * + * @param {ApiConnection} conn the API connection object for GraphKB + * @param {object} param1 + * @param {object[]} param1.fromCivic new content from CIViC + * @returns {object} a count object for error and success + */ +const createStatement = async (conn, { fromCivic }) => { + const counts = { err: 0, success: 0 }; + + try { + await conn.addRecord({ content: fromCivic, target: 'Statement' }); + counts.success++; + } catch (err) { + logger.error(err); + counts.err++; + } + + return counts; +}; + +/** + * Given content from CIViC and a corresponding GraphKB Statement rid, + * try to update the GraphKB record + * + * @param {ApiConnection} conn the API connection object for GraphKB + * @param {object} param1 + * @param {object[]} param1.fromCivic new content from CIViC + * @param {object[]} param1.fromGkb actual content from GraphKB + * @returns {object} a count object for error and success + */ +const updateStatement = async (conn, { fromCivic, fromGkb }) => { + const counts = { err: 0, success: 0 }; + + try { + await conn.addRecord({ + content: fromCivic, + existsOk: true, + fetchConditions: { + // Since CIViC content has already been matched + // to its corresponding GraphKB statement + '@rid': fromGkb['@rid'], + }, + target: 'Statement', + upsert: true, + }); + counts.success++; + } catch (err) { + logger.error(err); + counts.err++; + } + + return counts; +}; + +/** + * Soft-delete GraphKB Statements from either an array of Statement's RIDs + * or an array of sourceIds and its corresponding source + * + * @param {ApiConnection} conn the api connection object for GraphKB + * @param {object} param1 + * @param {?string[]} param1.rids an array of Statement's RIDs + * @param {string} param1.source the source RID + * @param {string[]} param1.sourceIds an array of sourceIds + * @returns {object} a count object for error and success + */ +const deleteStatements = async (conn, { rids = [], source, sourceIds }) => { + const counts = { err: 0, success: 0 }; + + // Get rids to delete if none provided + if (rids.length === 0) { + logger.info('Loading corresponding GraphKB statement RIDs to delete'); + const records = await conn.getRecords({ + filters: { + AND: [ + { sourceId: sourceIds }, + { source }, + ], + }, + target: 'Statement', + }); + rids.push(...records.map( + (el) => el['@rid'], + )); + logger.info(`${rids.length} RIDs found`); + } + + // Delete statements + logger.info(`Deleting ${rids.length} statement(s)...`); + logger.info(rids); + + for (const r of rids) { + try { + await conn.deleteRecord('Statement', r); + counts.success++; + } catch (err) { + logger.error(err); + counts.err++; + } + } + + return counts; +}; + +module.exports = { + contentMatching, + createStatement, + deleteStatements, + isMatching, + needsUpdate, + updateStatement, +}; diff --git a/test/civic/civic.statement.test.js b/test/civic/civic.statement.test.js new file mode 100644 index 0000000..a74d126 --- /dev/null +++ b/test/civic/civic.statement.test.js @@ -0,0 +1,167 @@ +/* eslint-disable jest/no-disabled-tests */ +const { + contentMatching, + isMatching, + needsUpdate, +} = require('../../src/civic/statement'); + + +// Generic content +const content = { + conditions: ['#123:1', '#123:2'], // conditions NEEDS to be already sorted in ascending order + description: 'test', + evidence: ['#123:1'], + evidenceLevel: ['#123:1'], + relevance: '#123:1', + reviewStatus: 'not required', + source: '#123:1', + sourceId: '9999', + subject: '#123:1', +}; + +// Combination of matching and not matching content +const allFromCivic = [ + { ...content, subject: '#888:0' }, // matching with allFromGkb[3] + { ...content, subject: '#888:1' }, // matching with allFromGkb[1] + { ...content, subject: '#888:2' }, // not matching +]; +const allFromGkb = [ + { ...content, '@rid': '#999:0', subject: '#888:3' }, // not matching + { ...content, '@rid': '#999:1', subject: '#888:1' }, // matching with allFromCivic[1] + { ...content, '@rid': '#999:2', subject: '#888:4' }, // not matching + { ...content, '@rid': '#999:3', subject: '#888:0' }, // matching with allFromCivic[0] +]; + +describe('needsUpdate', () => { + // No need to update + test('identical content', () => { + expect(needsUpdate({ + fromCivic: content, + fromGkb: content, + })).toBe(false); + }); + + test('discarding gkb rid', () => { + expect(needsUpdate({ + fromCivic: content, + fromGkb: { ...content, '@rid': '#123:1' }, + })).toBe(false); + }); + + // Need to update + test('any difference', () => { + expect(needsUpdate({ + fromCivic: content, + fromGkb: { ...content, description: '' }, + })).toBe(true); + }); +}); + +describe('isMatching', () => { + // No matching + test('difference on conditions', () => { + expect(isMatching({ + fromCivic: content, + fromGkb: { ...content, conditions: ['#123:1', '#123:3'] }, + })).toBe(false); + }); + + test('difference on subject', () => { + expect(isMatching({ + fromCivic: content, + fromGkb: { ...content, subject: '#123:2' }, + })).toBe(false); + }); + + // Matching + test('difference on conditions while matching only on subject', () => { + expect(isMatching({ + fromCivic: content, + fromGkb: { ...content, conditions: ['#123:1', '#123:3'] }, + p: ['subject'], + })).toBe(true); + }); + + // Matching on subject alone + test('any other difference', () => { + expect(isMatching({ + fromCivic: content, + fromGkb: { ...content, description: '' }, + })).toBe(true); + }); +}); + +describe('contentMatching', () => { + test('matching only on conditions and subject', () => { + const records = contentMatching({ + allFromCivic, + allFromGkb, + matchingOnSubjectAlone: false, + }); + + // matching content + expect(records.toUpdate.length).toBe(2); + + // allFromGkb with no matches + expect(records.toDelete.length).toBe(2); + + // allFromCivic with no matches + expect(records.toCreate.length).toBe(1); + + // matching content + expect(records.toUpdate[0]).toEqual({ + fromCivic: allFromCivic[0], + fromGkb: allFromGkb[3], + }); + expect(records.toUpdate[1]).toEqual({ + fromCivic: allFromCivic[1], + fromGkb: allFromGkb[1], + }); + + // allFromGkb with no matches + expect(records.toDelete[0]).toEqual(allFromGkb[0]); + expect(records.toDelete[1]).toEqual(allFromGkb[2]); + + // allFromCivic with no matches + expect(records.toCreate[0]).toEqual(allFromCivic[2]); + }); + + test('matching also on subject alone, without artificial matching', () => { + const records = contentMatching({ + allFromCivic: [ + { ...content, conditions: ['#777:77'], subject: '#777:1' }, + { ...content, conditions: ['#777:77'], subject: '#777:2' }, + ], + allFromGkb: [ + { ...content, conditions: ['#888:88'], subject: '#777:1' }, + { ...content, conditions: ['#888:88'], subject: '#888:2' }, + ], + matchingWithoutComparing: false, + }); + + // matching content + expect(records.toUpdate.length).toBe(1); + + // allFromGkb with no matches + expect(records.toDelete.length).toBe(1); + + // allFromCivic with no matches + expect(records.toCreate.length).toBe(1); + }); + + test('matching until artificial matching', () => { + const records = contentMatching({ + allFromCivic, + allFromGkb, + }); + + // matching content + expect(records.toUpdate.length).toBe(3); + + // allFromGkb with no matches + expect(records.toDelete.length).toBe(1); + + // allFromCivic with no matches + expect(records.toCreate.length).toBe(0); + }); +}); From 3561dac17e4d412113833caaa7df5bcb7b2715b9 Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Mon, 27 May 2024 16:14:43 -0700 Subject: [PATCH 18/28] remove debugger --- bin/load.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/bin/load.js b/bin/load.js index 838c03d..e8ccbd6 100644 --- a/bin/load.js +++ b/bin/load.js @@ -132,14 +132,12 @@ let loaderFunction; if (input) { loaderFunction = ALL_MODULES[moduleName || subparser_name].uploadFile; } else { - debugger; loaderFunction = ALL_MODULES[moduleName || subparser_name].upload; } const loaderOptions = { ...options }; if (input) { - debugger; loaderOptions.filename = input; } From 35ac6b7bbea51595032eb48c89bc0176d706cfef Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Mon, 27 May 2024 16:15:12 -0700 Subject: [PATCH 19/28] Add noUpdate flag to civic parser --- bin/load.js | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/bin/load.js b/bin/load.js index e8ccbd6..166ef61 100644 --- a/bin/load.js +++ b/bin/load.js @@ -1,6 +1,3 @@ -const fs = require('fs'); -const path = require('path'); - const { runLoader } = require('../src'); const { createOptionsMenu, fileExists } = require('../src/cli'); @@ -36,7 +33,12 @@ const cosmicResistance = require('../src/cosmic/resistance'); const cosmicFusions = require('../src/cosmic/fusions'); const API_MODULES = { - asco, dgidb, docm, fdaApprovals, moa, oncotree, + asco, + dgidb, + docm, + fdaApprovals, + moa, + oncotree, }; const FILE_MODULES = { @@ -102,6 +104,11 @@ civicParser.add_argument('--trustedCurators', { help: 'CIViC User IDs of curators whose statements should be imported even if they have not yet been reviewed (evidence is submitted but not accepted)', nargs: '+', }); +civicParser.add_argument('--noUpdate', { + action: 'store_true', + default: false, + help: 'Will not check for updating content of existing GraphKB Statements', +}); const clinicaltrialsgovParser = subparsers.add_parser('clinicaltrialsgov'); clinicaltrialsgovParser.add_argument('--days', { From ff73e991cd7290a62983ff5d3058bee406858f5e Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Mon, 27 May 2024 16:17:16 -0700 Subject: [PATCH 20/28] Refactoring main loop in civic upload --- src/civic/index.js | 850 ++++++++++++++++++++------------------------- 1 file changed, 382 insertions(+), 468 deletions(-) diff --git a/src/civic/index.js b/src/civic/index.js index 318b27e..d221998 100644 --- a/src/civic/index.js +++ b/src/civic/index.js @@ -1,555 +1,469 @@ /** * @module importer/civic */ -const _ = require('lodash'); -const Ajv = require('ajv'); const fs = require('fs'); -const path = require('path'); -const { error: { ErrorMixin } } = require('@bcgsc-pori/graphkb-parser'); - -const { checkSpec, request } = require('../util'); -const { - rid, - shouldUpdate, -} = require('../graphkb'); +const { rid } = require('../graphkb'); const { logger } = require('../logging'); -const _entrezGene = require('../entrez/gene'); const { civic: SOURCE_DEFN } = require('../sources'); const { getDisease } = require('./disease'); -const { processVariantRecord } = require('./variant'); const { getRelevance } = require('./relevance'); const { getEvidenceLevel } = require('./evidenceLevel'); const { getPublication, loadPubmedCache } = require('./publication'); -const { processMolecularProfile } = require('./profile'); -const { addOrFetchTherapy, resolveTherapies } = require('./therapy'); -const { EvidenceItem: evidenceSpec } = require('./specs.json'); - -class NotImplementedError extends ErrorMixin { } +const { + downloadEvidenceItems, + processCombination, + processEvidenceItem, +} = require('./evidenceItem'); +const { + contentMatching, + createStatement, + deleteStatements, + needsUpdate, + updateStatement, +} = require('./statement'); const BASE_URL = 'https://civicdb.org/api/graphql'; -// Spec compiler -const ajv = new Ajv(); -const validateEvidenceSpec = ajv.compile(evidenceSpec); - /** - * Requests evidence items from CIViC using their graphql API + * Increment counter on GraphKB Statement CRUD operations + * + * @param {object} initial the counter + * @param {object} updates the increment to apply + * @returns {object} the incremented counter */ -const requestEvidenceItems = async (url, opt) => { - const allRecords = []; - let hasNextPage = true; +const incrementCounts = (initial, updates) => { + if (!initial) { + return updates; + } - while (hasNextPage) { - try { - const page = await request({ - body: { ...opt }, - json: true, - method: 'POST', - uri: url, - }); - allRecords.push(...page.data.evidenceItems.nodes); - opt.variables = { ...opt.variables, after: page.data.evidenceItems.pageInfo.endCursor }; - hasNextPage = page.data.evidenceItems.pageInfo.hasNextPage; - } catch (err) { - logger.error(err); - throw (err); + // deep copy + const updated = JSON.parse(JSON.stringify(initial)); + + for (const level1 of Object.keys(updated)) { + for (const level2 of Object.keys(updated[level1])) { + updated[level1][level2] += updates[level1][level2]; } } - return allRecords; -}; + return updated; +}; /** - * Transform a CIViC evidence record into a GraphKB statement + * Access the CIVic API, parse content, transform and load into GraphKB * - * @param {object} opt - * @param {ApiConnection} opt.conn the API connection object for GraphKB - * @param {object} opt.rawRecord the unparsed record from CIViC - * @param {object} opt.sources the sources by name - * @param {boolean} opt.oneToOne civic statements to graphkb statements is a 1 to 1 mapping - * @param {object} opt.variantsCache used to avoid repeat processing of civic variants. stores the graphkb variant(s) if success or the error if not - * @param + * @param {object} param0 + * @param {ApiConnection} param0.conn the api connection object for GraphKB + * @param {string} param0.errorLogPrefix prefix to the generated error json file + * @param {number} param0.maxRecords limit of EvidenceItem records to be processed and upload + * @param {?boolean} param0.noUpdate for avoiding deletion/update of existing GraphKB Statements + * @param {string[]} param0.trustedCurators a list of curator IDs for submitted-only EvidenceItems + * @param {?string} param0.url url to use as the base for accessing the civic ApiConnection */ -const processEvidenceRecord = async (opt) => { - const { - conn, rawRecord, sources, variantsCache, oneToOne = false, - } = opt; - - // Relevance & EvidenceLevel - const [level, relevance] = await Promise.all([ - getEvidenceLevel(opt), - getRelevance(conn, { rawRecord }), - ]); - - // Variant's Feature - let feature; - const civicFeature = rawRecord.variant.feature.featureInstance; - - if (civicFeature.__typename === 'Gene') { - [feature] = await _entrezGene.fetchAndLoadByIds(conn, [civicFeature.entrezId]); - } else if (civicFeature.__typename === 'Factor') { - // TODO: Deal with __typename === 'Factor' - // No actual case as April 22nd, 2024 - throw new NotImplementedError( - 'unable to process variant\'s feature of type Factor', - ); - } - - - // Variant - let variants; - - try { - variants = await processVariantRecord(conn, rawRecord.variant, feature); - logger.verbose(`converted variant name (${rawRecord.variant.name}) to variants (${variants.map(v => v.displayName).join(', and ')})`); - } catch (err) { - logger.error(`evidence (${rawRecord.id}) Unable to process the variant (id=${rawRecord.variant.id}, name=${rawRecord.variant.name}): ${err}`); - throw err; - } - - // get the therapy/therapies by name - let therapy; - - if (rawRecord.therapies) { - try { - therapy = await addOrFetchTherapy( - conn, - rid(sources.civic), - rawRecord.therapies, - (rawRecord.therapyInteractionType || '').toLowerCase(), - ); - } catch (err) { - logger.error(err); - logger.error(`failed to fetch therapy: ${JSON.stringify(rawRecord.therapies)}`); - throw err; - } - } - - const publication = await getPublication(conn, rawRecord); - - // common content - const content = { - conditions: [...variants.map(v => rid(v))], - description: rawRecord.description, - evidence: [rid(publication)], - evidenceLevel: [rid(level)], - relevance: rid(relevance), - reviewStatus: (rawRecord.status === 'ACCEPTED' - ? 'not required' - : 'pending' - ), - source: rid(sources.civic), - sourceId: rawRecord.id, +const upload = async ({ + conn, + errorLogPrefix, + maxRecords, + noUpdate = false, + trustedCurators, + url = BASE_URL, +}) => { + const countsEI = { + error: 0, + partialSuccess: 0, + skip: 0, + success: 0, }; + let countsST; - // get the disease by doid - const disease = getDisease(conn, { rawRecord }); - - // create the statement and connecting edges - if (rawRecord.evidenceType === 'DIAGNOSTIC' || rawRecord.evidenceType === 'PREDISPOSING') { - if (!disease) { - throw new Error('Unable to create a diagnostic or predisposing statement without a corresponding disease'); - } - content.subject = rid(disease); - } else if (disease) { - content.conditions.push(rid(disease)); - } - - if (rawRecord.evidenceType === 'PREDICTIVE' && therapy) { - content.subject = rid(therapy); - } if (rawRecord.evidenceType === 'PROGNOSTIC') { - // get the patient vocabulary object - content.subject = rid(await conn.getVocabularyTerm('patient')); - } if (rawRecord.evidenceType === 'FUNCTIONAL') { - content.subject = rid(feature); - } if (rawRecord.evidenceType === 'ONCOGENIC') { - content.subject = variants.length === 1 - ? rid(variants[0]) - : rid(feature); - } - - if (content.subject && !content.conditions.includes(content.subject)) { - content.conditions.push(content.subject); - } - - if (!content.subject) { - throw Error(`unable to determine statement subject for evidence (${content.sourceId}) record`); - } + // Adding CIViC as source if not already in GraphKB + const source = await conn.addSource(SOURCE_DEFN); + const sourceRid = rid(source); - const fetchConditions = [ - { sourceId: content.sourceId }, - { source: content.source }, - { evidence: content.evidence }, // civic evidence items are per publication - ]; - - if (!oneToOne) { - fetchConditions.push(...[ - { relevance: content.relevance }, - { subject: content.subject }, - { conditions: content.conditions }, - ]); - } + /* + 1. DOWNLOAD & PREPROCESSING + */ - let original; - - if (oneToOne) { - // get previous iteration - const originals = await conn.getRecords({ - filters: { - AND: [ - { source: rid(sources.civic) }, - { sourceId: rawRecord.id }, - ], - }, - target: 'Statement', - }); + // GETTING CIVIC EVIDENCEITEMS FROM CIVIC API + // Evidences accepted, or submitted from a trusted curator + logger.info(`loading evidenceItems from ${url}`); + const { + errors: downloadEvidenceItemsErr, + records: evidenceItems, + } = await downloadEvidenceItems(url, trustedCurators); - if (originals.length > 1) { - throw Error(`Supposed to be 1to1 mapping between graphKB and civic but found multiple records with source ID (${rawRecord.id})`); - } - if (originals.length === 1) { - [original] = originals; - - const excludeTerms = [ - '@rid', - '@version', - 'comment', - 'createdAt', - 'createdBy', - 'reviews', - 'updatedAt', - 'updatedBy', - ]; - - if (!shouldUpdate('Statement', original, content, excludeTerms)) { - return original; - } - } - } + // Validation errors + const validationErrorList = []; - if (original) { - // update the existing record - return conn.updateRecord('Statement', rid(original), content); + if (downloadEvidenceItemsErr.length > 0) { + countsEI.error += downloadEvidenceItemsErr.length; + validationErrorList.push(...downloadEvidenceItemsErr); } - // create a new record - return conn.addRecord({ - content, - existsOk: true, - fetchConditions: { - AND: fetchConditions, - }, - target: 'Statement', - upsert: true, - upsertCheckExclude: [ - 'comment', - 'displayNameTemplate', - 'reviews', + // GETTING CIVIC STATEMENTS FROM GRAPHKB API + // Note: One or more GKB Statement can come from the same CIVIC id (sourceId) + logger.info('loading related statements from GraphKB'); + const statements = await conn.getRecords({ + filters: { source: sourceRid }, + returnProperties: [ + '@rid', + 'conditions', + 'description', + 'evidence', + 'evidenceLevel', + 'relevance', + 'reviewStatus', + 'source', + 'sourceId', + 'subject', ], + target: 'Statement', }); -}; - - -/** - * Get a list of CIViC Evidence Items which have since been deleted. - * Returns the list of evidence item IDs to be purged from GraphKB - * - * @param {string} url endpoint for the CIViC API - */ -const fetchDeletedEvidenceItems = async (url) => { - const ids = new Set(); - - // Get rejected evidenceItems - logger.info(`loading rejected evidenceItems from ${url}`); - const rejected = await requestEvidenceItems(url, { - query: `query evidenceItems($after: String, $status: EvidenceStatusFilter) { - evidenceItems(after: $after, status: $status) { - nodes {id} - pageCount - pageInfo {endCursor, hasNextPage} - totalCount - } - }`, - variables: { - status: 'REJECTED', - }, - }); - rejected.forEach(node => ids.add(node.id)); - logger.info(`fetched ${ids.size} rejected entries from CIViC`); - return ids; -}; - - -/** - * Fetch civic approved evidence entries as well as those submitted by trusted curators - * - * @param {string} url the endpoint for the request - * @param {string[]} trustedCurators a list of curator IDs to also fetch submitted only evidence items for - */ -const downloadEvidenceRecords = async (url, trustedCurators) => { - const records = []; - const errorList = []; - const counts = { - error: 0, exists: 0, skip: 0, success: 0, - }; - - const evidenceItems = []; - const query = fs.readFileSync(path.join(__dirname, 'evidenceItems.graphql')).toString(); - - // Get accepted evidenceItems - logger.info(`loading accepted evidenceItems from ${url}`); - const accepted = await requestEvidenceItems(url, { - query, - variables: { - status: 'ACCEPTED', - }, - }); - logger.info(`fetched ${accepted.length} accepted entries from CIViC`); - evidenceItems.push(...accepted); - - // Get submitted evidenceItems from trusted curators - for (const curator of Array.from(new Set(trustedCurators))) { - if (!Number.isNaN(curator)) { - logger.info(`loading submitted evidenceItems by trusted curator ${curator} from ${url}`); - const submittedByATrustedCurator = await requestEvidenceItems(url, { - query, - variables: { - status: 'SUBMITTED', - userId: parseInt(curator, 10), - }, - }); - evidenceItems.push(...submittedByATrustedCurator); + const sourceIdsFromGKB = new Set(statements.map(r => r.sourceId)); + logger.info(`${sourceIdsFromGKB.size} distinct ${SOURCE_DEFN.name} sourceId in GraphKB statements`); + logger.info(`${statements.length} total statements previously added to GraphKB from ${SOURCE_DEFN.name}`); + + // REFACTORING GRAPHKB STATEMENTS INTO STATEMENTSBYSOURCEID + // where each sourceId is a key associated with an array + // of one or more GKB Statement records + const statementsBySourceId = {}; + + for (const record of statements) { + if (!statementsBySourceId[record.sourceId]) { + statementsBySourceId[record.sourceId] = []; } + // Sorting conditions for downstream object comparison + record.conditions.sort(); + statementsBySourceId[record.sourceId].push(record); } - const submittedCount = evidenceItems.length - accepted.length; - logger.info(`loaded ${submittedCount} unaccepted entries by trusted submitters from CIViC`); - // Validation - for (const record of evidenceItems) { - try { - checkSpec(validateEvidenceSpec, record); - } catch (err) { - errorList.push({ error: err, errorMessage: err.toString(), record }); - logger.error(err); - counts.error++; + // REFACTORING CIVIC EVIDENCEITEMS INTO EVIDENCEITEMSBYID + // where each id is a key associated with one CIViC EvidenceItem as value + logger.info(`Pre-pocessing ${evidenceItems.length} records`); + const evidenceItemsById = {}; + + // Performing some checks. Skipping some records if needed + // eslint-disable-next-line guard-for-in + for (const i in evidenceItems) { + // Check if max records limit has been reached + if (maxRecords && Object.keys(evidenceItemsById).length >= maxRecords) { + logger.warn(`Not loading all content due to max records limit (${maxRecords})`); + countsEI.skip += (evidenceItems.length - i); + break; + } + // Check if record id is unique + if (evidenceItemsById[evidenceItems[i].id]) { + logger.error(`Multiple Evidence Items with the same id: ${evidenceItems[i].id}. Violates assumptions. Only the 1st one was kept.`); + countsEI.skip++; continue; } - records.push(record); + // Adding EvidenceItem to object for upload + evidenceItemsById[evidenceItems[i].id] = evidenceItems[i]; } - logger.info(`${records.length}/${evidenceItems.length} evidenceItem records successfully validated with the specs`); - return { counts, errorList, records }; -}; + const noRecords = Object.keys(evidenceItemsById).length; + logger.info(`${noRecords}/${evidenceItems.length} Evidence Items to process`); + /* + 2. PROCESSING EACH CIVIC EVIDENCEITEM INTO ONE OR MORE GKB STATEMENTS + */ -/** - * Access the CIVic API, parse content, transform and load into GraphKB - * - * @param {object} opt options - * @param {ApiConnection} opt.conn the api connection object for GraphKB - * @param {string} [opt.url] url to use as the base for accessing the civic ApiConnection - * @param {string[]} opt.trustedCurators a list of curator IDs to also fetch submitted only evidence items for - */ -const upload = async ({ - conn, errorLogPrefix, trustedCurators, ignoreCache = false, maxRecords, url = BASE_URL, -}) => { - const source = await conn.addSource(SOURCE_DEFN); - - // Get list of all previous statements from CIVIC in GraphKB - let previouslyEntered = await conn.getRecords({ - filters: { source: rid(source) }, - returnProperties: ['sourceId'], - target: 'Statement', - }); - previouslyEntered = new Set(previouslyEntered.map(r => r.sourceId)); - logger.info(`Found ${previouslyEntered.size} records previously added from ${SOURCE_DEFN.name}`); // PubMed caching logger.info('Caching Pubmed publication'); await loadPubmedCache(conn); - // Get evidence records from CIVIC (Accepted, or Submitted from a trusted curator) - const { counts, errorList, records } = await downloadEvidenceRecords(url, trustedCurators); - // Get rejected evidence records ids from CIVIC - const purgeableEvidenceItems = await fetchDeletedEvidenceItems(url); - - logger.info(`Processing ${records.length} records`); - // keep track of errors and already processed variants by their CIViC ID to avoid repeat logging - const variantsCache = { - errors: {}, - records: {}, + // Keeping track of EvidenceItem sourceIds who raised errors during processing + const errorSourceIds = { + disease: new Map(), + evidence: new Map(), + evidenceLevel: new Map(), + individualCombinationProcessing: new Map(), + processingIntoCombinations: new Map(), + relevance: new Map(), }; - // Refactor records into recordsById - const recordsById = {}; + logger.info(`\n\n${'#'.repeat(80)}\n## PROCESSING RECORDS\n${'#'.repeat(80)}\n`); + let recordNumber = 0; - for (const record of records) { - // Check if max records limit has been reached - if (maxRecords && Object.keys(recordsById).length >= maxRecords) { - logger.warn(`not loading all content due to max records limit (${maxRecords})`); - break; - } + // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! + const specialId = null; // '11681'; // - // Check if record id is unique - if (recordsById[record.id]) { - logger.error(`Multiple evidenceItems with the same id: ${record.id}. Violates assumptions. Only the 1st one was kept.`); - counts.skip++; + // MAIN LOOP + // Looping through Evidence Items + for (const [id, evidenceItem] of Object.entries(evidenceItemsById)) { + if (specialId && id !== specialId) { continue; } - - if (!record.molecularProfile) { - logger.error(`Evidence Item without Molecular Profile. Violates assumptions: ${record.id}`); - counts.skip++; + /* PROCESSING EVIDENCEITEMS */ + + recordNumber++; + logger.info(); + logger.info(`***** ${recordNumber}/${noRecords} : processing id ${id} *****`); + + const numberOfStatements = statementsBySourceId[id] + ? statementsBySourceId[id].length + : 0; + logger.info(`${numberOfStatements} related statement(s)`); + + // Base object (properties order matters) + // Common content will be deep copied downstream for each combination + evidenceItem.content = { + conditions: [], + description: evidenceItem.description || '', + evidence: [], + evidenceLevel: [], + relevance: undefined, + reviewStatus: (evidenceItem.status === 'ACCEPTED' + ? 'not required' + : 'pending' + ), + source: sourceRid, + sourceId: id, + subject: undefined, + }; + + // PROCESSING DATA COMMON TO ALL COMBINATIONS + + // Removing extra spaces in description. Needed before content comparison + evidenceItem.content.description = evidenceItem.content.description.replace(/\s+/g, ' ').trim(); + + // Get evidence (publication) rid + try { + evidenceItem.content.evidence.push(rid( + await getPublication(conn, evidenceItem), + )); + } catch (err) { + logger.error(err); + countsEI.error++; + errorSourceIds.evidence.set(id, err); continue; } - if (!record.molecularProfile.variants || record.molecularProfile.variants.length === 0) { - logger.error(`Molecular Profile without Variants. Violates assumptions: ${record.molecularProfile.id}`); - counts.skip++; + + // Get evidenceLevel rid + try { + evidenceItem.content.evidenceLevel.push(rid( + await getEvidenceLevel(conn, { + rawRecord: evidenceItem, + source: sourceRid, + sourceDisplayName: SOURCE_DEFN.displayName, + }), + )); + } catch (err) { + logger.error(err); + countsEI.error++; + errorSourceIds.evidenceLevel.set(id, err); continue; } - // Adding EvidenceItem to object for upload - recordsById[record.id] = record; - } - - // Main loop on recordsById - for (const [sourceId, record] of Object.entries(recordsById)) { - if (previouslyEntered.has(sourceId) && !ignoreCache) { - counts.exists++; + // Get relevance rid + try { + evidenceItem.content.relevance = rid( + await getRelevance(conn, { rawRecord: evidenceItem }), + ); + } catch (err) { + logger.error(err); + countsEI.error++; + errorSourceIds.relevance.set(id, err); continue; } - if (purgeableEvidenceItems.has(sourceId)) { - // this should never happen. If it does we have made an invalid assumption about how civic uses IDs. - throw new Error(`Record ID is both deleted and to-be loaded. Violates assumptions: ${sourceId}`); - } - const preupload = new Set((await conn.getRecords({ - filters: [ - { source: rid(source) }, { sourceId }, - ], - target: 'Statement', - })).map(rid)); - - // Resolve therapy combinations if any - // Updates record.therapies and record.therapyInteractionType properties - const { therapies, therapyInteractionType } = resolveTherapies(record); - record.therapies = therapies; - record.therapyInteractionType = therapyInteractionType; - - // Process Molecular Profiles expression into an array of conditions - // Each condition is itself an array of variants, one array for each expected GraphKB Statement from this CIViC Evidence Item + + // Get disease rid try { - // Molecular Profile (conditions w/ variants) - record.conditions = processMolecularProfile(record.molecularProfile).conditions; + // Will be removed downstream after being used as content's subject and/or condition + evidenceItem.content.disease = rid( + await getDisease(conn, { rawRecord: evidenceItem }), + true, // nullOk=true since some EvidenceItems aren't related to any specific disease + ); } catch (err) { - logger.error(`evidence (${record.id}) ${err}`); - counts.skip++; + logger.error(err); + countsEI.error++; + errorSourceIds.disease.set(id, err); continue; } - const postupload = []; - - // Upload all GraphKB statements for this CIViC Evidence Item - for (const condition of record.conditions) { - const oneToOne = (condition.length * record.therapies.length) === 1 && preupload.size === 1; - - for (const variant of condition) { - for (const therapies of record.therapies) { - try { - logger.debug(`processing ${record.id}`); - const result = await processEvidenceRecord({ - conn, - oneToOne, - rawRecord: { ..._.omit(record, ['therapies', 'variants']), therapies, variant }, - sources: { civic: source }, - variantsCache, - }); - postupload.push(rid(result)); - counts.success += 1; - } catch (err) { - if ( - err.toString().includes('is not a function') - || err.toString().includes('of undefined') - ) { - console.error(err); - } - if (err instanceof NotImplementedError) { - // accepted evidence that we do not support loading. Should delete as it may have changed from something we did support - purgeableEvidenceItems.add(sourceId); - } - errorList.push({ error: err, errorMessage: err.toString(), record }); - logger.error(`evidence (${record.id}) ${err}`); - counts.error += 1; - } - } - } + // PROCESSING INDIVIDUAL EVIDENCEITEM INTO AN ARRAY OF COMBINATIONS + // (One combination per expected GraphKB statement) + const combinations = []; + + try { + combinations.push(...await processEvidenceItem(evidenceItem)); + } catch (err) { + logger.error(err); + countsEI.error++; + errorSourceIds.processingIntoCombinations.set(id, err); + continue; } - // compare statments before/after upload to determine if any records should be soft-deleted - postupload.forEach((id) => { - preupload.delete(id); - }); + logger.info(`${combinations.length} combination(s)`); - if (preupload.size && purgeableEvidenceItems.has(sourceId)) { - logger.warn(` - Removing ${preupload.size} CIViC Entries (EID:${sourceId}) of unsupported format - `); + // PROCESSING INDIVIDUAL COMBINATION + // Formatting each combination's content for GraphKB statement requirements + const contents = []; + let processCombinationErrors = 0; + for (const combination of combinations) { try { - await Promise.all( - Array.from(preupload).map( - async outdatedId => conn.deleteRecord('Statement', outdatedId), - ), + contents.push( + await processCombination(conn, { + record: combination, + sourceRid, + }), ); } catch (err) { logger.error(err); + processCombinationErrors++; + + if (!errorSourceIds.individualCombinationProcessing.get(id)) { + errorSourceIds.individualCombinationProcessing.set(id, []); + } + const v = errorSourceIds.individualCombinationProcessing.get(id); + errorSourceIds.individualCombinationProcessing.set(id, [...v, err]); } - } else if (preupload.size) { - if (postupload.length) { - logger.warn(`deleting ${preupload.size} outdated statement records (${Array.from(preupload).join(' ')}) has new/retained statements (${postupload.join(' ')})`); - - try { - await Promise.all( - Array.from(preupload).map( - async outdatedId => conn.deleteRecord('Statement', outdatedId), - ), - ); - } catch (err) { - logger.error(err); + } + + const successRatio = `${combinations.length - processCombinationErrors}/${combinations.length}`; + const processCombinationsMsg = `Processed ${successRatio} combination(s)`; + + // If at least some combinations succeeds, then it's a success + if (processCombinationErrors === 0) { + countsEI.success++; + logger.info(processCombinationsMsg); + } else if (processCombinationErrors < combinations.length) { + countsEI.partialSuccess++; + logger.warn(processCombinationsMsg); + } else { + countsEI.error++; + logger.error(processCombinationsMsg); + } + + + /* MATCHING EVIDENCEITEMS WITH STATEMENTS */ + + // Content matching between CIViC and GraphKB records + // so we know which CRUD operation to perform on each statement + const { toCreate, toDelete, toUpdate } = contentMatching({ + allFromCivic: contents, + allFromGkb: statementsBySourceId[id] || [], + }); + + /* CREATE/UPDATE/DELETE STATEMENTS */ + + const loaclCountsST = { + create: { err: 0, success: 0 }, + delete: { err: 0, success: 0 }, + noUpdateNeeded: { success: 0 }, + update: { err: 0, success: 0 }, + }; + + // UPDATE + if (!noUpdate && toUpdate.length > 0) { + for (let i = 0; i < toUpdate.length; i++) { + const { fromCivic, fromGkb } = toUpdate[i]; + + // Check if an update is needed to avoid unnecessary API calls + if (needsUpdate({ fromCivic, fromGkb })) { + const updatedCount = await updateStatement(conn, { fromCivic, fromGkb }); + loaclCountsST.update.err += updatedCount.err; + loaclCountsST.update.success += updatedCount.success; + } else { + loaclCountsST.noUpdateNeeded.success++; } - } else { - logger.error(`NOT deleting ${preupload.size} outdated statement records (${Array.from(preupload).join(' ')}) because failed to create replacements`); } } + + // DELETE + if (!noUpdate && toDelete.length > 0) { + loaclCountsST.delete = await deleteStatements(conn, + { rids: toDelete.map(el => el['@rid']) }, + ); + } + + // CREATE + if (toCreate.length > 0) { + for (let i = 0; i < toCreate.length; i++) { + const createdCount = await createStatement(conn, { fromCivic: toCreate[i] }); + loaclCountsST.create.err += createdCount.err; + loaclCountsST.create.success += createdCount.success; + } + } + + // logging + for (const level of Object.keys(loaclCountsST)) { + if (loaclCountsST[level].err > 0 || loaclCountsST[level].success > 0) { + logger.info(`${level}: ${JSON.stringify(loaclCountsST[level])}`); + } + } + countsST = incrementCounts(countsST, loaclCountsST); + + if (specialId && id === specialId) { + process.exit(); + } } + logger.info(`\n\n${'#'.repeat(80)}\n## END OF RECORD PROCESSING\n${'#'.repeat(80)}\n`); - // purge any remaining entries that are in GraphKB but have since been rejected/deleted by CIViC - const toDelete = await conn.getRecords({ - filters: { - AND: [ - { sourceId: Array.from(purgeableEvidenceItems) }, - { source: rid(source) }, - ], - }, - target: 'Statement', - }); + // Logging EvidenceItem processing counts + logger.info(); + logger.info('***** CIViC EvidenceItem records processing report: *****'); + logger.info(JSON.stringify(countsEI)); - try { - logger.warn(`Deleting ${toDelete.length} outdated CIViC statements from GraphKB`); - await Promise.all(toDelete.map(async statement => conn.deleteRecord( - 'Statement', rid(statement), - ))); - } catch (err) { - logger.error(err); + // Logging detailed EvidenceItem processing counts + logger.info('Processing errors report:'); + + for (const [key, value] of Object.entries(errorSourceIds)) { + logger.info(`${key}: ${value.size}`); + // Also formatting Maps into objects for saving to file downstream + errorSourceIds[key] = Object.fromEntries(errorSourceIds[key]); } - logger.info(JSON.stringify(counts)); + // DELETING UNWANTED GRAPHKB STATEMENTS + // sourceIds no longer in CIViC (not accepted/submitted by trustedCurators) but still in GraphKB + const allIdsFromCivic = new Set(evidenceItems.map(r => r.id.toString())); + const toDelete = new Set([...sourceIdsFromGKB].filter(x => !allIdsFromCivic.has(x))); + logger.info(); + logger.info('***** Deprecated items *****'); + logger.warn(`${toDelete.size} deprecated ${SOURCE_DEFN.name} Evidence Items still in GraphKB Statement`); + + if (toDelete.size > 0) { + logger.info(`sourceIds: ${Array.from(toDelete)}`); + } + + // GraphKB Statements Soft-deletion + if (!noUpdate && toDelete.size > 0) { + const deletedCount = await deleteStatements(conn, { + source: sourceRid, + sourceIds: Array.from(toDelete), + }); + const attempts = deletedCount.success + deletedCount.err; + logger.info(`${deletedCount.success}/${attempts} soft-deleted statements`); + + if (countsST) { + countsST.delete.err += deletedCount.err; + countsST.delete.success += deletedCount.success; + } else { + countsST = { delete: { err: deletedCount.err, success: deletedCount.success } }; + } + } + + // Logging Statement CRUD operations counts + if (countsST) { + logger.info(); + logger.info('***** GraphKB Statement records CRUD operations report: *****'); + + for (const op of Object.keys(countsST)) { + logger.info(`${op}: ${JSON.stringify(countsST[op])}`); + } + } + + // SAVING LOGGED ERRORS TO FILE + const errorFileContent = { + ...errorSourceIds, + validationErrors: validationErrorList, + }; const errorJson = `${errorLogPrefix}-civic.json`; - logger.info(`writing ${errorJson}`); - fs.writeFileSync(errorJson, JSON.stringify(errorList, null, 2)); + logger.info(); + logger.info(`***** Global report: *****\nwriting ${errorJson}`); + fs.writeFileSync(errorJson, JSON.stringify(errorFileContent, null, 2)); }; - module.exports = { - SOURCE_DEFN, - specs: { validateEvidenceSpec }, upload, }; From 84615a5333f0183f93f7cb44600ba99e39e48e46 Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Mon, 27 May 2024 16:29:50 -0700 Subject: [PATCH 21/28] Linting and removing extra --- src/civic/index.js | 10 ---------- src/civic/statement.js | 10 +++------- 2 files changed, 3 insertions(+), 17 deletions(-) diff --git a/src/civic/index.js b/src/civic/index.js index d221998..51a8823 100644 --- a/src/civic/index.js +++ b/src/civic/index.js @@ -185,15 +185,9 @@ const upload = async ({ logger.info(`\n\n${'#'.repeat(80)}\n## PROCESSING RECORDS\n${'#'.repeat(80)}\n`); let recordNumber = 0; - // !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! - const specialId = null; // '11681'; // - // MAIN LOOP // Looping through Evidence Items for (const [id, evidenceItem] of Object.entries(evidenceItemsById)) { - if (specialId && id !== specialId) { - continue; - } /* PROCESSING EVIDENCEITEMS */ recordNumber++; @@ -393,10 +387,6 @@ const upload = async ({ } } countsST = incrementCounts(countsST, loaclCountsST); - - if (specialId && id === specialId) { - process.exit(); - } } logger.info(`\n\n${'#'.repeat(80)}\n## END OF RECORD PROCESSING\n${'#'.repeat(80)}\n`); diff --git a/src/civic/statement.js b/src/civic/statement.js index 033d398..e4049bc 100644 --- a/src/civic/statement.js +++ b/src/civic/statement.js @@ -11,13 +11,9 @@ const { logger } = require('../logging'); * @param {object} fromGkb actual content from GraphKB * @returns {boolean} whether both contents are matching or not */ -const isMatching = ({ fromCivic, fromGkb, p = ['conditions', 'subject'] }) => { - const c = JSON.stringify(_.pick(fromCivic, ...p)); - const g = JSON.stringify(_.pick(fromGkb, ...p)); - const rel = c === g; - // console.log({ p, c, g, rel }); - return rel; // JSON.stringify(_.pick(fromCivic, ...p)) === JSON.stringify(_.pick(fromGkb, ...p)); -}; +const isMatching = ({ fromCivic, fromGkb, p = ['conditions', 'subject'] }) => ( + JSON.stringify(_.pick(fromCivic, ...p)) === JSON.stringify(_.pick(fromGkb, ...p)) +); /** * Evaluate if a statement needs to be updated From 48a4e873728996bd9dbfa1f71004c5e90f9a804c Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Tue, 28 May 2024 10:47:48 -0700 Subject: [PATCH 22/28] getTherapy() on lowercases --- src/civic/therapy.js | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/civic/therapy.js b/src/civic/therapy.js index f9346d8..dded526 100644 --- a/src/civic/therapy.js +++ b/src/civic/therapy.js @@ -59,21 +59,22 @@ const resolveTherapies = (evidenceItem) => { */ const getTherapy = async (conn, therapyRecord) => { const name = therapyRecord.name.toLowerCase().trim(); + const ncitId = therapyRecord.ncitId.toLowerCase().trim(); // Trying with the ncitId - if (therapyRecord.ncitId) { + if (ncitId) { try { return await conn.getUniqueRecordBy({ filters: [ { source: { filters: { name: NCIT_SOURCE_DEFN.name }, target: 'Source' } }, - { sourceId: therapyRecord.ncitId }, + { sourceId: ncitId }, { name }, ], sort: orderPreferredOntologyTerms, target: 'Therapy', }); } catch (err) { - logger.error(`had NCIt therapy mapping (${therapyRecord.ncitId}) named (${therapyRecord.name}) but failed to fetch from graphkb: ${err}`); + logger.error(`had NCIt therapy mapping (${ncitId}) named (${therapyRecord.name}) but failed to fetch from graphkb`); throw err; } } From 59fffe64be38a5b61001f4408069ee641a0fc9a9 Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Tue, 28 May 2024 11:33:50 -0700 Subject: [PATCH 23/28] Fix bug in getTherapy --- src/civic/therapy.js | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/civic/therapy.js b/src/civic/therapy.js index dded526..8dab93a 100644 --- a/src/civic/therapy.js +++ b/src/civic/therapy.js @@ -1,6 +1,7 @@ const { logger } = require('../logging'); const { ncit: NCIT_SOURCE_DEFN } = require('../sources'); const { orderPreferredOntologyTerms, rid } = require('../graphkb'); +const { isString } = require('lodash'); /** @@ -59,7 +60,9 @@ const resolveTherapies = (evidenceItem) => { */ const getTherapy = async (conn, therapyRecord) => { const name = therapyRecord.name.toLowerCase().trim(); - const ncitId = therapyRecord.ncitId.toLowerCase().trim(); + const ncitId = therapyRecord.ncitId && isString(therapyRecord.ncitId) + ? therapyRecord.ncitId.toLowerCase().trim() + : therapyRecord.ncitId; // Trying with the ncitId if (ncitId) { From bce6f4c48ee12c44c2ff2c533a5ba9bdef4dc0e6 Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Tue, 28 May 2024 12:45:21 -0700 Subject: [PATCH 24/28] linting --- src/civic/therapy.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/civic/therapy.js b/src/civic/therapy.js index 8dab93a..f58d828 100644 --- a/src/civic/therapy.js +++ b/src/civic/therapy.js @@ -1,7 +1,6 @@ const { logger } = require('../logging'); const { ncit: NCIT_SOURCE_DEFN } = require('../sources'); const { orderPreferredOntologyTerms, rid } = require('../graphkb'); -const { isString } = require('lodash'); /** @@ -60,7 +59,7 @@ const resolveTherapies = (evidenceItem) => { */ const getTherapy = async (conn, therapyRecord) => { const name = therapyRecord.name.toLowerCase().trim(); - const ncitId = therapyRecord.ncitId && isString(therapyRecord.ncitId) + const ncitId = therapyRecord.ncitId && typeof therapyRecord.ncitId === 'string' ? therapyRecord.ncitId.toLowerCase().trim() : therapyRecord.ncitId; From 47b5256cf281a60ebb3314b779a33f44461ac3d4 Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Tue, 28 May 2024 16:43:57 -0700 Subject: [PATCH 25/28] get therapy by ncit only --- src/civic/therapy.js | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/src/civic/therapy.js b/src/civic/therapy.js index f58d828..e64d527 100644 --- a/src/civic/therapy.js +++ b/src/civic/therapy.js @@ -63,8 +63,10 @@ const getTherapy = async (conn, therapyRecord) => { ? therapyRecord.ncitId.toLowerCase().trim() : therapyRecord.ncitId; - // Trying with the ncitId + let originalError; + if (ncitId) { + // Trying with the ncitId and the name try { return await conn.getUniqueRecordBy({ filters: [ @@ -76,15 +78,33 @@ const getTherapy = async (conn, therapyRecord) => { target: 'Therapy', }); } catch (err) { - logger.error(`had NCIt therapy mapping (${ncitId}) named (${therapyRecord.name}) but failed to fetch from graphkb`); + logger.warn(`Failed to fetch therapy with NCIt id (${ncitId}) & name (${therapyRecord.name}) from graphkb`); + } + + // Trying with the ncitId only + // Choosing the most recently created one + try { + const matchingTherapies = await conn.getRecords({ + filters: { + AND: [ + { source: { filters: { name: NCIT_SOURCE_DEFN.name }, target: 'Source' } }, + { sourceId: ncitId }, + ], + }, + target: 'Therapy', + }); + // In-place sorting + matchingTherapies.sort((a, b) => b.createdAt - a.createdAt); + // returning 1st one (latest created) + return matchingTherapies[0]; + } catch (err) { + logger.error(`Failed to fetch therapy with NCIt id (${ncitId}) from graphkb`); throw err; } } // Trying instead with the name // Using the getTherapy method from the connection object - let originalError; - try { // With the name as-is first return await conn.getTherapy(name); From 2d45778c1ab9d56c447dabbea5734824f53bc0bf Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Wed, 29 May 2024 10:00:42 -0700 Subject: [PATCH 26/28] Therapy error logging fix --- src/civic/evidenceItem.js | 3 +-- src/civic/therapy.js | 5 ++--- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/civic/evidenceItem.js b/src/civic/evidenceItem.js index 40c4b8c..f999f8d 100644 --- a/src/civic/evidenceItem.js +++ b/src/civic/evidenceItem.js @@ -145,8 +145,7 @@ const processCombination = async (conn, { (rawRecord.therapyInteractionType || '').toLowerCase(), // combinationType ); } catch (err) { - logger.error(`failed to fetch therapy: ${JSON.stringify(rawRecord.therapies)}`); - throw err; + throw new Error(`failed to fetch therapy: ${JSON.stringify(rawRecord.therapies)}\nerr:${err}`); } } diff --git a/src/civic/therapy.js b/src/civic/therapy.js index e64d527..021f88f 100644 --- a/src/civic/therapy.js +++ b/src/civic/therapy.js @@ -63,8 +63,6 @@ const getTherapy = async (conn, therapyRecord) => { ? therapyRecord.ncitId.toLowerCase().trim() : therapyRecord.ncitId; - let originalError; - if (ncitId) { // Trying with the ncitId and the name try { @@ -103,6 +101,8 @@ const getTherapy = async (conn, therapyRecord) => { } } + let originalError; + // Trying instead with the name // Using the getTherapy method from the connection object try { @@ -122,7 +122,6 @@ const getTherapy = async (conn, therapyRecord) => { } catch (err) { } // Logging errors - logger.error(originalError); throw originalError; }; From fade4018a63e9e3ae6e3868b3fe32ae0126281dc Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Mon, 3 Jun 2024 10:02:32 -0700 Subject: [PATCH 27/28] Add support for casesToReview --- src/civic/index.js | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/civic/index.js b/src/civic/index.js index 51a8823..2f0aee3 100644 --- a/src/civic/index.js +++ b/src/civic/index.js @@ -181,6 +181,7 @@ const upload = async ({ processingIntoCombinations: new Map(), relevance: new Map(), }; + const casesToReview = new Map(); logger.info(`\n\n${'#'.repeat(80)}\n## PROCESSING RECORDS\n${'#'.repeat(80)}\n`); let recordNumber = 0; @@ -366,9 +367,15 @@ const upload = async ({ // DELETE if (!noUpdate && toDelete.length > 0) { - loaclCountsST.delete = await deleteStatements(conn, - { rids: toDelete.map(el => el['@rid']) }, - ); + const rids = toDelete.map(el => el['@rid']); + + if (processCombinationErrors > 0) { + // Do not delete any statements if some combinations have processing errors + logger.info(`${toDelete.length} unmatched statement(s) to be reviewed for deletion`); + casesToReview.set(id, rids); + } else { + loaclCountsST.delete = await deleteStatements(conn, { rids }); + } } // CREATE @@ -387,6 +394,8 @@ const upload = async ({ } } countsST = incrementCounts(countsST, loaclCountsST); + + // END OF MAIN LOOP } logger.info(`\n\n${'#'.repeat(80)}\n## END OF RECORD PROCESSING\n${'#'.repeat(80)}\n`); @@ -433,6 +442,13 @@ const upload = async ({ } } + // Logging processing error cases to be reviewed, + // so a reviewer can decide if corresponding statements need to be deleted or not + logger.info(); + logger.info('***** Cases to be reviewed for deletion *****'); + logger.warn(`${casesToReview.size} Evidence Item(s) with processing errors leading to unmatched Statement(s)`); + casesToReview.forEach((v, k) => logger.info(`${k} -> ${JSON.stringify(v)}`)); + // Logging Statement CRUD operations counts if (countsST) { logger.info(); From 2bd2f6f14c2e62808c9296202270fae45f0c1f6c Mon Sep 17 00:00:00 2001 From: mathieulemieux Date: Tue, 4 Jun 2024 14:06:32 -0700 Subject: [PATCH 28/28] Version bump from 8.0.1 to 8.0.2 --- package-lock.json | 4 ++-- package.json | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/package-lock.json b/package-lock.json index 07f31ab..5212d6a 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@bcgsc-pori/graphkb-loader", - "version": "8.0.1", + "version": "8.0.2", "lockfileVersion": 2, "requires": true, "packages": { "": { "name": "@bcgsc-pori/graphkb-loader", - "version": "8.0.1", + "version": "8.0.2", "license": "GPL-3", "dependencies": { "@bcgsc-pori/graphkb-parser": "^1.1.1", diff --git a/package.json b/package.json index 42db176..b4b5938 100644 --- a/package.json +++ b/package.json @@ -1,7 +1,7 @@ { "name": "@bcgsc-pori/graphkb-loader", "main": "src/index.js", - "version": "8.0.1", + "version": "8.0.2", "repository": { "type": "git", "url": "https://github.com/bcgsc/pori_graphkb_loader.git"