Skip to content

Commit

Permalink
update ensembl loader to handle files without proteins
Browse files Browse the repository at this point in the history
  • Loading branch information
sshugsc committed Feb 21, 2024
1 parent c942558 commit 8230d59
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 54 deletions.
118 changes: 65 additions & 53 deletions src/ensembl/index.js
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,10 @@ const uploadFile = async (opt) => {
for (const row of rows) {
[row.geneId, row.geneIdVersion] = row.geneIdVersion.toLowerCase().split('.');
[row.transcriptId, row.transcriptIdVersion] = row.transcriptIdVersion.toLowerCase().split('.');
[row.proteinId, row.proteinIdVersion] = row.proteinIdVersion.toLowerCase().split('.');

if (row.proteinIdVersion !== '') {
[row.proteinId, row.proteinIdVersion] = row.proteinIdVersion.toLowerCase().split('.');
}
}

const source = await conn.addSource(SOURCE_DEFN);
Expand Down Expand Up @@ -292,10 +295,15 @@ const uploadFile = async (opt) => {
sourceId: transcriptId,
sourceIdVersion: transcriptIdVersion,
});
const proteinVersion = generateCacheKey({
sourceId: proteinId,
sourceIdVersion: proteinIdVersion,
});

let proteinVersion;

if (proteinIdVersion !== '') {
proteinVersion = generateCacheKey({
sourceId: proteinId,
sourceIdVersion: proteinIdVersion,
});
}

logger.info(`processing ${geneId}.${geneIdVersion || ''} (${index} / ${rows.length})`);

Expand Down Expand Up @@ -416,70 +424,71 @@ const uploadFile = async (opt) => {


// protein
if (preLoadedProtein.has(proteinVersion)) {
visited[proteinVersion] = proteinList.find((protein) => `${protein.sourceId}-${protein.sourceIdVersion}` === proteinVersion);
visited[proteinId] = proteinList.find((protein) => `${protein.sourceId}` === proteinId && protein.sourceIdVersion === null);
skip++;
} else {
if (visited[proteinVersion] === undefined) {
visited[proteinVersion] = await conn.addRecord({
content: {
biotype: 'protein',
source: rid(source),
sourceId: record.proteinId,
sourceIdVersion: record.proteinIdVersion,
},
existsOk: true,
target: 'Feature',
});
}
if (visited[proteinId] === undefined) {
visited[proteinId] = await conn.addRecord({
content: {
biotype: 'protein',
source: rid(source),
sourceId: record.proteinId,
sourceIdVersion: null,
},
existsOk: true,
target: 'Feature',
});
// protein -> elementof -> transcript
if (proteinVersion) {
if (preLoadedProtein.has(proteinVersion)) {
visited[proteinVersion] = proteinList.find((protein) => `${protein.sourceId}-${protein.sourceIdVersion}` === proteinVersion);
visited[proteinId] = proteinList.find((protein) => `${protein.sourceId}` === proteinId && protein.sourceIdVersion === null);
skip++;
} else {
if (visited[proteinVersion] === undefined) {
visited[proteinVersion] = await conn.addRecord({
content: {
biotype: 'protein',
source: rid(source),
sourceId: record.proteinId,
sourceIdVersion: record.proteinIdVersion,
},
existsOk: true,
target: 'Feature',
});
}
if (visited[proteinId] === undefined) {
visited[proteinId] = await conn.addRecord({
content: {
biotype: 'protein',
source: rid(source),
sourceId: record.proteinId,
sourceIdVersion: null,
},
existsOk: true,
target: 'Feature',
});
// protein -> elementof -> transcript
await conn.addRecord({
content: {
in: rid(transcript), out: rid(visited[proteinId]), source: rid(source),
},
existsOk: true,
fetchExisting: false,
target: 'elementof',
});
}

await conn.addRecord({
content: {
in: rid(transcript), out: rid(visited[proteinId]), source: rid(source),
in: rid(visited[proteinVersion]),
out: rid(visited[proteinId]),
source: rid(source),
},
existsOk: true,
fetchExisting: false,
target: 'elementof',
target: 'generalizationof',
});
}

// versioned: protein -> elementof -> transcript
await conn.addRecord({
content: {
in: rid(visited[proteinVersion]),
out: rid(visited[proteinId]),
in: rid(versionedTranscript),
out: rid(visited[proteinVersion]),
source: rid(source),
},
existsOk: true,
fetchExisting: false,
target: 'generalizationof',
target: 'elementof',
});
}

// versioned: protein -> elementof -> transcript
await conn.addRecord({
content: {
in: rid(versionedTranscript),
out: rid(visited[proteinVersion]),
source: rid(source),
},
existsOk: true,
fetchExisting: false,
target: 'elementof',
});



// transcript -> crossreferenceof -> refseq
if (record.refseqId) {
Expand Down Expand Up @@ -529,7 +538,10 @@ const uploadFile = async (opt) => {
logger.log('error', `failed cross-linking from ${gene.sourceid} to ${record.hgncId}`);
}
}
if (skip === 3) {
if (proteinVersion && skip === 3) {
counts.skip++;
continue;
} else if (proteinVersion === undefined && skip === 2) {
counts.skip++;
continue;
}
Expand Down
2 changes: 1 addition & 1 deletion src/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,7 @@ const convertRowFields = (header, row) => {
const result = {};

for (const [name, col] of Object.entries(header)) {
result[name] = row[col];
result[name] = row[col] || '';
}
return result;
};
Expand Down

0 comments on commit 8230d59

Please sign in to comment.