Skip to content

Commit

Permalink
Fix bug in upstream data with letter case in gene name
Browse files Browse the repository at this point in the history
  • Loading branch information
eweitz committed Nov 30, 2021
1 parent e74b67b commit 89bbd93
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 4 deletions.
11 changes: 9 additions & 2 deletions src/js/gene-cache.js
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ function getEnsemblId(ensemblPrefix, slimEnsemblId) {
/** Parse a gene cache TSV file, return array of useful transforms */
function parseCache(rawTsv, orgName) {
const names = [];
const nameCaseMap = {};
const namesById = {};
const idsByName = {};
const lociByName = {};
Expand Down Expand Up @@ -113,6 +114,7 @@ function parseCache(rawTsv, orgName) {
const locus = [chromosome, start, stop];

names.push(gene);
nameCaseMap[gene.toLowerCase()] = gene;
namesById[ensemblId] = gene;
idsByName[gene] = ensemblId;
lociByName[gene] = locus;
Expand All @@ -124,7 +126,10 @@ function parseCache(rawTsv, orgName) {
const sortedAnnots = parseAnnots(preAnnots);
perfTimes.parseAnnots = Math.round(performance.now() - t1);

return [names, namesById, idsByName, lociByName, lociById, sortedAnnots];
return [
names, nameCaseMap, namesById, idsByName, lociByName, lociById,
sortedAnnots
];
}

/** Get organism's metadata fields */
Expand Down Expand Up @@ -181,12 +186,14 @@ export default async function initGeneCache(orgName, ideo, cacheDir=null) {
perfTimes.fetch = Math.round(fetchEndTime - fetchStartTime);

const [
interestingNames, namesById, idsByName, lociByName, lociById, sortedAnnots
interestingNames, nameCaseMap, namesById, idsByName,
lociByName, lociById, sortedAnnots
] = parseCache(data, orgName);
perfTimes.parseCache = Math.round(performance.now() - fetchEndTime);

ideo.geneCache = {
interestingNames, // Array ordered by general or scholarly interest
nameCaseMap, // Maps of lowercase gene names to proper gene names
namesById,
idsByName,
lociByName, // Object of gene positions, keyed by gene name
Expand Down
14 changes: 12 additions & 2 deletions src/js/kit/related-genes.js
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ function maybeGeneSymbol(ixn, gene) {
function isInteractionRelevant(rawIxn, gene, nameId, seenNameIds, ideo) {
let isGeneSymbol;
if ('geneCache' in ideo && gene.name) {
isGeneSymbol = rawIxn in ideo.geneCache.lociByName;
isGeneSymbol = rawIxn.toLowerCase() in ideo.geneCache.nameCaseMap;
} else {
isGeneSymbol = maybeGeneSymbol(rawIxn, gene);
}
Expand Down Expand Up @@ -263,14 +263,24 @@ function fetchGenesFromCache(names, type, ideo) {

const hits = names.map(name => {

if (!locusMap[name]) {
const nameLc = name.toLowerCase();

if (!locusMap[name] && !cache.nameCaseMap[nameLc]) {
if (isSymbol) {
throwGeneNotFound(name, ideo);
} else {
return;
}
}

// Canonicalize name if it is mistaken in upstream data source.
// This can sometimes happen in WikiPathways, e.g. when searching
// interactions for rat Pten, it includes a result for "PIK3CA".
// In that case, this would correct PIK3CA to be Pik3ca.
if (isSymbol && !locusMap[name] && cache.nameCaseMap[nameLc]) {
name = cache.nameCaseMap[nameLc];
}

const locus = locusMap[name];
const symbol = isSymbol ? name : nameMap[name];
const ensemblId = isSymbol ? nameMap[name] : name;
Expand Down

0 comments on commit 89bbd93

Please sign in to comment.