Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

task/KBDEV-1194 get updated studies for last two weeks in clinicaltrials #143

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 15 additions & 22 deletions bin/load.js
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ const cosmicResistance = require('../src/cosmic/resistance');
const cosmicFusions = require('../src/cosmic/fusions');

const API_MODULES = {
asco, clinicaltrialsgov, dgidb, docm, fdaApprovals, moa, oncotree
mathieulemieux marked this conversation as resolved.
Show resolved Hide resolved
asco, dgidb, docm, fdaApprovals, moa, oncotree,
};

const FILE_MODULES = {
Expand All @@ -45,7 +45,6 @@ const FILE_MODULES = {
cancerhotspots,
cgi,
cgl,
clinicaltrialsgov,
mathieulemieux marked this conversation as resolved.
Show resolved Hide resolved
diseaseOntology,
drugbank,
ensembl,
Expand All @@ -54,8 +53,8 @@ const FILE_MODULES = {
ncit,
ncitFdaXref,
ontology,
sources,
refseq,
sources,
uberon,
variants,
};
Expand All @@ -70,11 +69,12 @@ const ALL_MODULES = {
...FILE_MODULES,
...COSMIC_MODULES,
civic,
clinicaltrialsgov,
};

const parser = createOptionsMenu();

const subparsers = parser.add_subparsers({ help: 'Sub-command help', required: true });
const subparsers = parser.add_subparsers({ dest: 'subparser_name', help: 'Sub-command help', required: true });
const apiParser = subparsers.add_parser('api');
apiParser.add_argument('module', {
choices: Object.keys(API_MODULES),
Expand Down Expand Up @@ -103,6 +103,11 @@ civicParser.add_argument('--trustedCurators', {
nargs: '+',
});

const clinicaltrialsgovParser = subparsers.add_parser('clinicaltrialsgov');
clinicaltrialsgovParser.add_argument('--days', {
help: 'Load new and existing studies added or modified (last update posted) in the last # of days',
type: Number,
});

const cosmicParser = subparsers.add_parser('cosmic');
cosmicParser.add_argument('module', {
Expand All @@ -118,36 +123,24 @@ cosmicParser.add_argument('classification', {
type: fileExists,
});

const { module: moduleName, input, ...options } = parser.parse_args();
const {
subparser_name, module: moduleName, input, ...options
} = parser.parse_args();

let loaderFunction;

if (input) {
loaderFunction = ALL_MODULES[moduleName || 'civic'].uploadFile;
loaderFunction = ALL_MODULES[moduleName || subparser_name].uploadFile;
} else {
debugger;
loaderFunction = ALL_MODULES[moduleName || 'civic'].upload;
loaderFunction = ALL_MODULES[moduleName || subparser_name].upload;
}

const loaderOptions = { ...options };

if (input) {
debugger;
if (moduleName === 'clinicaltrialsgov') {
if (fs.lstatSync(input).isDirectory()) {
const files = fs.readdirSync(input)
.map(filename => path.join(input, filename));
loaderOptions.files = files;
} else {
loaderOptions.files = [input];
}
} else {
loaderOptions.filename = input;

if (options.module === 'cosmic') {
loaderOptions.classification = options.classification;
}
}
loaderOptions.filename = input;
mathieulemieux marked this conversation as resolved.
Show resolved Hide resolved
}

runLoader(options, loaderFunction, loaderOptions)
Expand Down
10 changes: 8 additions & 2 deletions src/clinicaltrialsgov/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,14 @@ This module loads clinical trials data into GraphKB from [https://www.clinicaltr
> :warning: Since this loader produces statements, ontology and vocabulary data should be loaded first


Uses REST API to load clinical trials data
Uses REST API to load clinical trials data.
```bash
node bin/load.js clinicaltrialsgov
```

By default this loader loads all studies that related to cancer, which will be a huge number of records.
Using `--days` can load the new and existing studies added or modified (last update posted) in the last # of days.
```bash
node bin/load.js api clinicaltrialsgov
node bin/load.js clinicaltrialsgov --days 7
```
Loading the studies added or modified in the last week.
169 changes: 39 additions & 130 deletions src/clinicaltrialsgov/index.js
Original file line number Diff line number Diff line change
@@ -1,21 +1,10 @@
/**
* Module to import clinical trials data exported from clinicaltrials.gov
*
* 1. Perform a search on their site, for example https://clinicaltrials.gov/ct2/results?cond=Cancer&cntry=CA&Search=Apply&recrs=b&recrs=a&age_v=&gndr=&type=Intr&rslt=
* 2. Click their Download link/Button
* 3. Adjust the settings in the Pop up dialog (Include all studies, all columns, and export as XML)
* 4. Download and save the file
* 5. Upload the file to GraphKB using this module
*
* @module importer/clinicaltrialsgov
*/
const path = require('path');
const Ajv = require('ajv');
const fs = require('fs');

const {
loadXmlToJson,
parseXmlToJson,
checkSpec,
requestWithRetry,
} = require('../util');
Expand All @@ -25,15 +14,13 @@ const {
} = require('../graphkb');
const { logger } = require('../logging');
const { clinicalTrialsGov: SOURCE_DEFN } = require('../sources');
const { api: apiSpec, rss: rssSpec } = require('./specs.json');
const { studies: studiesSpecs } = require('./specs.json');

const BASE_URL = 'https://clinicaltrials.gov/api/v2/studies';
const RSS_URL = 'https://clinicaltrials.gov/ct2/results/rss.xml';
const CACHE = {};

const ajv = new Ajv();
const validateAPITrialRecord = ajv.compile(apiSpec);
const validateRssFeed = ajv.compile(rssSpec);
const validateAPITrialRecord = ajv.compile(studiesSpecs);


/**
Expand Down Expand Up @@ -124,11 +111,11 @@ const processPhases = (phaseList) => {


/**
* Process the XML trial record. Attempt to link the drug and/or disease information
* Process the record. Attempt to link the drug and/or disease information
*
* @param {object} opt
* @param {ApiConnection} opt.conn the GraphKB connection object
* @param {object} opt.record the XML record (pre-parsed into JSON)
* @param {object} opt.record the record (pre-parsed into JSON)
* @param {object|string} opt.source the 'source' record for clinicaltrials.gov
*
* @todo: handle updates to existing clinical trial records
Expand Down Expand Up @@ -306,98 +293,66 @@ const fetchAndLoadById = async (conn, nctID, { upsert = false } = {}) => {
return trial;
};

const formatDate = (date) => `${date.getFullYear()}-${date.getMonth() + 1}-${date.getDate()}`;

/**
* Uploads a file exported from clinicaltrials.gov as XML
* @param {object} opt
* @param {ApiConnection} opt.conn the GraphKB connection object
* @param {string} opt.filename the path to the XML export
* Loading all clinical trials related to cancer
*/
const uploadFiles = async ({ conn, files }) => {
const upload = async ({ conn, maxRecords, days }) => {
const source = await conn.addSource(SOURCE_DEFN);

logger.info(`loading ${files.length} records`);
const counts = {
error: 0, success: 0,
};

for (const filepath of files) {
const filename = path.basename(filepath);

if (!filename.endsWith('.xml')) {
logger.warn(`ignoring non-xml file: ${filename}`);
continue;
}
let options = {};

try {
const xml = await loadXmlToJson(filepath);
const record = convertAPIRecord(xml);
await processRecord({
conn, record, source, upsert: true,
});
counts.success++;
} catch (err) {
logger.error(`[${filename}] ${err}`);
counts.error++;
}
if (days) {
const startDate = new Date(Date.now() - days * 24 * 60 * 60 * 1000);
options = { 'query.term': `AREA[LastUpdatePostDate]RANGE[${formatDate(startDate)},MAX]` };
logger.info(`loading records updated from ${formatDate(startDate)} to ${formatDate(new Date())}`);
}
logger.info(JSON.stringify(counts));
};

const upload = async ({ conn }) => {
const source = await conn.addSource(SOURCE_DEFN);


mathieulemieux marked this conversation as resolved.
Show resolved Hide resolved

let trials = await requestWithRetry({
json: true,
method: 'GET',
qs: {
aggFilters: 'studyType:int',
countTotal: true,
pageSize: 1000,
'query.cond': 'cancer',
},
uri: BASE_URL,
});


logger.info(`loading ${trials.totalCount} records`);
const counts = {
error: 0, success: 0,
};

for (const trial of trials.studies) {
try {
const record = convertAPIRecord(trial);
await processRecord({
conn, record, source, upsert: true,
});
counts.success++;
} catch (err) {
counts.error++;
logger.error(`[${trial}] ${err}`);
}
}

let next = trials.nextPageToken;
let processCount = 1,
next = true,
nextToken,
total = maxRecords;

while (next) {
trials = await requestWithRetry({
if (nextToken) {
options = { pageToken: nextToken, ...options };
}
const trials = await requestWithRetry({
json: true,
method: 'GET',
qs: {
aggFilters: 'studyType:int',
countTotal: true,
pageSize: 1000,
pageToken: next,
'query.cond': 'cancer',
sort: 'LastUpdatePostDate',
...options,
},
uri: BASE_URL,
});

if (!total) {
total = trials.totalCount;
}

if (processCount > total) {
break;
}

for (const trial of trials.studies) {
if (processCount > total) {
break;
}

try {
const record = convertAPIRecord(trial);
logger.info(`processing (${processCount}/${total}) record: ${record.sourceId}`);
processCount++;
await processRecord({
conn, record, source, upsert: true,
});
Expand All @@ -408,52 +363,8 @@ const upload = async ({ conn }) => {
}
}

next = trials.nextPageToken;
}
logger.info(JSON.stringify(counts));
};



/**
* Parses clinical trial RSS Feed results for clinical trials in Canada and the US
* which were updated in the last 2 weeks
*/
const loadNewTrials = async ({ conn }) => {
// ping them both to get the list of recently updated trials
const recentlyUpdatedTrials = [];

const resp = await requestWithRetry({
method: 'GET',
qs: {
cond: 'cancer', // cancer related trials
count: 10000,
lup_d: 14,
rcv_d: '',
recrs: 'abdef',
sel_rss: 'mod14', // mod14 for last 2 weeks updated
type: 'Intr', // interventional only
},
uri: RSS_URL,
});
const xml = await parseXmlToJson(resp);
fs.writeFileSync('output.json', JSON.stringify(xml, null, 2));
checkSpec(validateRssFeed, xml);
recentlyUpdatedTrials.push(
...xml.rss.channel[0].item.map(item => item.guid[0]._),
);

logger.info(`loading ${recentlyUpdatedTrials.length} recently updated trials`);
const counts = { error: 0, success: 0 };

for (const trialId of recentlyUpdatedTrials) {
try {
await fetchAndLoadById(conn, trialId, { upsert: true });
counts.success++;
} catch (err) {
counts.error++;
logger.error(`[${trialId}] ${err}`);
}
nextToken = trials.nextPageToken;
next = nextToken !== undefined;
}
logger.info(JSON.stringify(counts));
};
Expand All @@ -463,7 +374,5 @@ module.exports = {
convertAPIRecord,
fetchAndLoadById,
kb: true,
loadNewTrials,
upload,
uploadFiles,
};
Loading
Loading