From ff674cc52aff9eead74263527e5a6869aaf86ff5 Mon Sep 17 00:00:00 2001 From: Davi Ortega Date: Tue, 4 Dec 2018 01:58:46 -0800 Subject: [PATCH] taxonomy getParents and getParentsMany --- src/Taxonomy.js | 95 ++++ src/Taxonomy.tests.js | 993 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 1088 insertions(+) create mode 100644 src/Taxonomy.js create mode 100644 src/Taxonomy.tests.js diff --git a/src/Taxonomy.js b/src/Taxonomy.js new file mode 100644 index 0000000..071db8e --- /dev/null +++ b/src/Taxonomy.js @@ -0,0 +1,95 @@ +'use strict' + +const https = require('https') +const bunyan = require('bunyan') + +const NodeMist3 = require('./NodeMist3Abstract') + +const kDefaults = { + maxRequests: 30 +} + +module.exports = +class Taxonomy extends NodeMist3 { + constructor(options, logLevel = 'info') { + super(options) + this.log = bunyan.createLogger( + { + name: 'node-mist3-taxonomy', + level: logLevel + } + ) + } + + getParents(taxid, options = {skipFailed: false}) { + this.log.info(`Getting parents of taxid: ${taxid} with options ${JSON.stringify(options)}`) + this.httpsOptions.method = 'GET' + this.httpsOptions.path = `/v1/taxonomy/${taxid}/parents` + return new Promise((resolve, reject) => { + const request = https.request(this.httpsOptions, (response) => { + if (response.statusCode === 504) { + if (options.skipFailed){ + this.log.warn(`taxid ${taxid} ::: ${response.statusCode} - ${response.statusMessage}`) + return resolve([]) + } + else { + this.log.error(`taxid ${taxid} ::: ${response.statusCode} - ${response.statusMessage}`) + return reject(response.statusCode) + } + } + const chunks = [] + response.on('data', (chunk) => chunks.push(chunk)) + response.on('end', () => { + const buffer = Buffer.concat(chunks) + let taxonomy = [] + try { + taxonomy = JSON.parse(buffer) + resolve(taxonomy) + } + catch (err) { + if (options.skipFailed === true){ + this.log.warn(buffer.toString()) + resolve([]) + } + else { + console.log('hey') + this.log.error(buffer.toString()) + reject(err) + } + } + }) + response.on('error', (err) => { + this.log.fatal(err) + reject(err) + }) + }) + request.end() + }) + } + + getParentsMany(taxids, options = {skipFailed: true}) { + const tmpTaxids = [] + taxids.forEach((taxid) => tmpTaxids.push(taxid)) + this.log.info(`getting taxonomy from ${tmpTaxids.length} taxids`) + const self = this + async function asyncGetMany (listOfTaxids) { + const allResponses = [] + self.log.debug(`Full list: ${listOfTaxids}`) + while (listOfTaxids.length !== 0) { + const requests = [] + const batch = listOfTaxids.splice(0, kDefaults.maxRequests) + self.log.debug(`new Batch: ${batch}`) + batch.forEach((taxid) => requests.push(self.getParents(taxid, options))) + await Promise.all(requests).then((responses) => { + responses.forEach((response) => { + allResponses.push(response) + }) + }).catch((err) => { + throw err + }) + } + return allResponses + } + return asyncGetMany(tmpTaxids) + } +} diff --git a/src/Taxonomy.tests.js b/src/Taxonomy.tests.js new file mode 100644 index 0000000..e48df80 --- /dev/null +++ b/src/Taxonomy.tests.js @@ -0,0 +1,993 @@ +/* eslint-disable no-magic-numbers */ +'use strict' + +const chai = require('chai') +const chaiAsPromised = require('chai-as-promised') + +chai.use(chaiAsPromised) + +const expect = chai.expect +const should = chai.should() + +const Taxonomy = require('./Taxonomy') + +describe('Taxonomy', function() { + describe('getParents', function() { + it('should work with one', function() { + const taxid = 562 + const expected = [ + { + id: 131567, + parent_taxonomy_id: 1, + name: 'cellular organisms', + rank: 'no rank' + }, + { + id: 2, + parent_taxonomy_id: 131567, + name: 'Bacteria', + rank: 'superkingdom' + }, + { + id: 1224, + parent_taxonomy_id: 2, + name: 'Proteobacteria', + rank: 'phylum' + }, + { + id: 1236, + parent_taxonomy_id: 1224, + name: 'Gammaproteobacteria', + rank: 'class' + }, + { + id: 91347, + parent_taxonomy_id: 1236, + name: 'Enterobacterales', + rank: 'order' + }, + { + id: 543, + parent_taxonomy_id: 91347, + name: 'Enterobacteriaceae', + rank: 'family' + }, + { + id: 561, + parent_taxonomy_id: 543, + name: 'Escherichia', + rank: 'genus' + }, + { + id: 562, + parent_taxonomy_id: 561, + name: 'Escherichia coli', + rank: 'species' + } + ] + const taxonomy = new Taxonomy() + return taxonomy.getParents(taxid).then((results) => { + expect(results).eql(expected) + }) + }) + it('should reject with invalid taxid', function() { + this.timeout(60000) + const taxid = 11676 + const taxonomy = new Taxonomy() + return taxonomy.getParents(taxid).should.be.rejected + }) + }) + describe('getParentsMany', function() { + it('should work with valid list of ids', function () { + const taxids = [ + 2285, + 2287, + 2289, + 2293, + 235, + 235279, + 2374, + 24 + ] + const expected = [ + [ + { + id: 131567, + name: 'cellular organisms', + parent_taxonomy_id: 1, + rank: 'no rank' + }, + { + id: 2157, + name: 'Archaea', + parent_taxonomy_id: 131567, + rank: 'superkingdom' + }, + { + id: 1783275, + name: 'TACK group', + parent_taxonomy_id: 2157, + rank: 'no rank' + }, + { + id: 28889, + name: 'Crenarchaeota', + parent_taxonomy_id: 1783275, + rank: 'phylum' + }, + { + id: 183924, + name: 'Thermoprotei', + parent_taxonomy_id: 28889, + rank: 'class' + }, + { + id: 2281, + name: 'Sulfolobales', + parent_taxonomy_id: 183924, + rank: 'order' + }, + { + id: 118883, + name: 'Sulfolobaceae', + parent_taxonomy_id: 2281, + rank: 'family' + }, + { + id: 2284, + name: 'Sulfolobus', + parent_taxonomy_id: 118883, + rank: 'genus' + }, + { + id: 2285, + name: 'Sulfolobus acidocaldarius', + parent_taxonomy_id: 2284, + rank: 'species' + } + ], + [ + { + id: 131567, + name: 'cellular organisms', + parent_taxonomy_id: 1, + rank: 'no rank' + }, + { + id: 2157, + name: 'Archaea', + parent_taxonomy_id: 131567, + rank: 'superkingdom' + }, + { + id: 1783275, + name: 'TACK group', + parent_taxonomy_id: 2157, + rank: 'no rank' + }, + { + id: 28889, + name: 'Crenarchaeota', + parent_taxonomy_id: 1783275, + rank: 'phylum' + }, + { + id: 183924, + name: 'Thermoprotei', + parent_taxonomy_id: 28889, + rank: 'class' + }, + { + id: 2281, + name: 'Sulfolobales', + parent_taxonomy_id: 183924, + rank: 'order' + }, + { + id: 118883, + name: 'Sulfolobaceae', + parent_taxonomy_id: 2281, + rank: 'family' + }, + { + id: 2284, + name: 'Sulfolobus', + parent_taxonomy_id: 118883, + rank: 'genus' + }, + { + id: 2287, + name: 'Sulfolobus solfataricus', + parent_taxonomy_id: 2284, + rank: 'species' + } + ], + [ + { + id: 131567, + name: 'cellular organisms', + parent_taxonomy_id: 1, + rank: 'no rank' + }, + { + id: 2, + name: 'Bacteria', + parent_taxonomy_id: 131567, + rank: 'superkingdom' + }, + { + id: 1224, + name: 'Proteobacteria', + parent_taxonomy_id: 2, + rank: 'phylum' + }, + { + id: 68525, + name: 'delta/epsilon subdivisions', + parent_taxonomy_id: 1224, + rank: 'subphylum' + }, + { + id: 28221, + name: 'Deltaproteobacteria', + parent_taxonomy_id: 68525, + rank: 'class' + }, + { + id: 213118, + name: 'Desulfobacterales', + parent_taxonomy_id: 28221, + rank: 'order' + }, + { + id: 213119, + name: 'Desulfobacteraceae', + parent_taxonomy_id: 213118, + rank: 'family' + }, + { + id: 2289, + name: 'Desulfobacter', + parent_taxonomy_id: 213119, + rank: 'genus' + } + ], + [ + { + id: 131567, + name: 'cellular organisms', + parent_taxonomy_id: 1, + rank: 'no rank' + }, + { + id: 2, + name: 'Bacteria', + parent_taxonomy_id: 131567, + rank: 'superkingdom' + }, + { + id: 1224, + name: 'Proteobacteria', + parent_taxonomy_id: 2, + rank: 'phylum' + }, + { + id: 68525, + name: 'delta/epsilon subdivisions', + parent_taxonomy_id: 1224, + rank: 'subphylum' + }, + { + id: 28221, + name: 'Deltaproteobacteria', + parent_taxonomy_id: 68525, + rank: 'class' + }, + { + id: 213118, + name: 'Desulfobacterales', + parent_taxonomy_id: 28221, + rank: 'order' + }, + { + id: 213119, + name: 'Desulfobacteraceae', + parent_taxonomy_id: 213118, + rank: 'family' + }, + { + id: 2289, + name: 'Desulfobacter', + parent_taxonomy_id: 213119, + rank: 'genus' + }, + { + id: 2293, + name: 'Desulfobacter postgatei', + parent_taxonomy_id: 2289, + rank: 'species' + } + ], + [ + { + id: 131567, + name: 'cellular organisms', + parent_taxonomy_id: 1, + rank: 'no rank' + }, + { + id: 2, + name: 'Bacteria', + parent_taxonomy_id: 131567, + rank: 'superkingdom' + }, + { + id: 1224, + name: 'Proteobacteria', + parent_taxonomy_id: 2, + rank: 'phylum' + }, + { + id: 28211, + name: 'Alphaproteobacteria', + parent_taxonomy_id: 1224, + rank: 'class' + }, + { + id: 356, + name: 'Rhizobiales', + parent_taxonomy_id: 28211, + rank: 'order' + }, + { + id: 118882, + name: 'Brucellaceae', + parent_taxonomy_id: 356, + rank: 'family' + }, + { + id: 234, + name: 'Brucella', + parent_taxonomy_id: 118882, + rank: 'genus' + }, + { + id: 235, + name: 'Brucella abortus', + parent_taxonomy_id: 234, + rank: 'species' + } + ], + [ + { + id: 131567, + name: 'cellular organisms', + parent_taxonomy_id: 1, + rank: 'no rank' + }, + { + id: 2, + name: 'Bacteria', + parent_taxonomy_id: 131567, + rank: 'superkingdom' + }, + { + id: 1224, + name: 'Proteobacteria', + parent_taxonomy_id: 2, + rank: 'phylum' + }, + { + id: 68525, + name: 'delta/epsilon subdivisions', + parent_taxonomy_id: 1224, + rank: 'subphylum' + }, + { + id: 29547, + name: 'Epsilonproteobacteria', + parent_taxonomy_id: 68525, + rank: 'class' + }, + { + id: 213849, + name: 'Campylobacterales', + parent_taxonomy_id: 29547, + rank: 'order' + }, + { + id: 72293, + name: 'Helicobacteraceae', + parent_taxonomy_id: 213849, + rank: 'family' + }, + { + id: 209, + name: 'Helicobacter', + parent_taxonomy_id: 72293, + rank: 'genus' + }, + { + id: 32025, + name: 'Helicobacter hepaticus', + parent_taxonomy_id: 209, + rank: 'species' + }, + { + id: 235279, + name: 'Helicobacter hepaticus ATCC 51449', + parent_taxonomy_id: 32025, + rank: 'no rank' + } + ], + [ + { + id: 131567, + name: 'cellular organisms', + parent_taxonomy_id: 1, + rank: 'no rank' + }, + { + id: 2, + name: 'Bacteria', + parent_taxonomy_id: 131567, + rank: 'superkingdom' + }, + { + id: 1783272, + name: 'Terrabacteria group', + parent_taxonomy_id: 2, + rank: 'no rank' + }, + { + id: 1239, + name: 'Firmicutes', + parent_taxonomy_id: 1783272, + rank: 'phylum' + }, + { + id: 909932, + name: 'Negativicutes', + parent_taxonomy_id: 1239, + rank: 'class' + }, + { + id: 909929, + name: 'Selenomonadales', + parent_taxonomy_id: 909932, + rank: 'order' + }, + { + id: 1843490, + name: 'Sporomusaceae', + parent_taxonomy_id: 909929, + rank: 'family' + }, + { + id: 2373, + name: 'Acetonema', + parent_taxonomy_id: 1843490, + rank: 'genus' + }, + { + id: 2374, + name: 'Acetonema longum', + parent_taxonomy_id: 2373, + rank: 'species' + } + ], + [ + { + id: 131567, + name: 'cellular organisms', + parent_taxonomy_id: 1, + rank: 'no rank' + }, + { + id: 2, + name: 'Bacteria', + parent_taxonomy_id: 131567, + rank: 'superkingdom' + }, + { + id: 1224, + name: 'Proteobacteria', + parent_taxonomy_id: 2, + rank: 'phylum' + }, + { + id: 1236, + name: 'Gammaproteobacteria', + parent_taxonomy_id: 1224, + rank: 'class' + }, + { + id: 135622, + name: 'Alteromonadales', + parent_taxonomy_id: 1236, + rank: 'order' + }, + { + id: 267890, + name: 'Shewanellaceae', + parent_taxonomy_id: 135622, + rank: 'family' + }, + { + id: 22, + name: 'Shewanella', + parent_taxonomy_id: 267890, + rank: 'genus' + }, + { + id: 24, + name: 'Shewanella putrefaciens', + parent_taxonomy_id: 22, + rank: 'species' + } + ] + ] + const taxonomy = new Taxonomy() + return taxonomy.getParentsMany(taxids).then((results) => { + expect(results).eql(expected) + }) + }) + it('should work with valid list of ids', function () { + this.timeout(60000) + const taxids = [ + 2285, + 2287, + 2289, + 2293, + 235, + 235279, + 2374, + 24, + 11676 + ] + const expected = [ + [ + { + id: 131567, + name: 'cellular organisms', + parent_taxonomy_id: 1, + rank: 'no rank' + }, + { + id: 2157, + name: 'Archaea', + parent_taxonomy_id: 131567, + rank: 'superkingdom' + }, + { + id: 1783275, + name: 'TACK group', + parent_taxonomy_id: 2157, + rank: 'no rank' + }, + { + id: 28889, + name: 'Crenarchaeota', + parent_taxonomy_id: 1783275, + rank: 'phylum' + }, + { + id: 183924, + name: 'Thermoprotei', + parent_taxonomy_id: 28889, + rank: 'class' + }, + { + id: 2281, + name: 'Sulfolobales', + parent_taxonomy_id: 183924, + rank: 'order' + }, + { + id: 118883, + name: 'Sulfolobaceae', + parent_taxonomy_id: 2281, + rank: 'family' + }, + { + id: 2284, + name: 'Sulfolobus', + parent_taxonomy_id: 118883, + rank: 'genus' + }, + { + id: 2285, + name: 'Sulfolobus acidocaldarius', + parent_taxonomy_id: 2284, + rank: 'species' + } + ], + [ + { + id: 131567, + name: 'cellular organisms', + parent_taxonomy_id: 1, + rank: 'no rank' + }, + { + id: 2157, + name: 'Archaea', + parent_taxonomy_id: 131567, + rank: 'superkingdom' + }, + { + id: 1783275, + name: 'TACK group', + parent_taxonomy_id: 2157, + rank: 'no rank' + }, + { + id: 28889, + name: 'Crenarchaeota', + parent_taxonomy_id: 1783275, + rank: 'phylum' + }, + { + id: 183924, + name: 'Thermoprotei', + parent_taxonomy_id: 28889, + rank: 'class' + }, + { + id: 2281, + name: 'Sulfolobales', + parent_taxonomy_id: 183924, + rank: 'order' + }, + { + id: 118883, + name: 'Sulfolobaceae', + parent_taxonomy_id: 2281, + rank: 'family' + }, + { + id: 2284, + name: 'Sulfolobus', + parent_taxonomy_id: 118883, + rank: 'genus' + }, + { + id: 2287, + name: 'Sulfolobus solfataricus', + parent_taxonomy_id: 2284, + rank: 'species' + } + ], + [ + { + id: 131567, + name: 'cellular organisms', + parent_taxonomy_id: 1, + rank: 'no rank' + }, + { + id: 2, + name: 'Bacteria', + parent_taxonomy_id: 131567, + rank: 'superkingdom' + }, + { + id: 1224, + name: 'Proteobacteria', + parent_taxonomy_id: 2, + rank: 'phylum' + }, + { + id: 68525, + name: 'delta/epsilon subdivisions', + parent_taxonomy_id: 1224, + rank: 'subphylum' + }, + { + id: 28221, + name: 'Deltaproteobacteria', + parent_taxonomy_id: 68525, + rank: 'class' + }, + { + id: 213118, + name: 'Desulfobacterales', + parent_taxonomy_id: 28221, + rank: 'order' + }, + { + id: 213119, + name: 'Desulfobacteraceae', + parent_taxonomy_id: 213118, + rank: 'family' + }, + { + id: 2289, + name: 'Desulfobacter', + parent_taxonomy_id: 213119, + rank: 'genus' + } + ], + [ + { + id: 131567, + name: 'cellular organisms', + parent_taxonomy_id: 1, + rank: 'no rank' + }, + { + id: 2, + name: 'Bacteria', + parent_taxonomy_id: 131567, + rank: 'superkingdom' + }, + { + id: 1224, + name: 'Proteobacteria', + parent_taxonomy_id: 2, + rank: 'phylum' + }, + { + id: 68525, + name: 'delta/epsilon subdivisions', + parent_taxonomy_id: 1224, + rank: 'subphylum' + }, + { + id: 28221, + name: 'Deltaproteobacteria', + parent_taxonomy_id: 68525, + rank: 'class' + }, + { + id: 213118, + name: 'Desulfobacterales', + parent_taxonomy_id: 28221, + rank: 'order' + }, + { + id: 213119, + name: 'Desulfobacteraceae', + parent_taxonomy_id: 213118, + rank: 'family' + }, + { + id: 2289, + name: 'Desulfobacter', + parent_taxonomy_id: 213119, + rank: 'genus' + }, + { + id: 2293, + name: 'Desulfobacter postgatei', + parent_taxonomy_id: 2289, + rank: 'species' + } + ], + [ + { + id: 131567, + name: 'cellular organisms', + parent_taxonomy_id: 1, + rank: 'no rank' + }, + { + id: 2, + name: 'Bacteria', + parent_taxonomy_id: 131567, + rank: 'superkingdom' + }, + { + id: 1224, + name: 'Proteobacteria', + parent_taxonomy_id: 2, + rank: 'phylum' + }, + { + id: 28211, + name: 'Alphaproteobacteria', + parent_taxonomy_id: 1224, + rank: 'class' + }, + { + id: 356, + name: 'Rhizobiales', + parent_taxonomy_id: 28211, + rank: 'order' + }, + { + id: 118882, + name: 'Brucellaceae', + parent_taxonomy_id: 356, + rank: 'family' + }, + { + id: 234, + name: 'Brucella', + parent_taxonomy_id: 118882, + rank: 'genus' + }, + { + id: 235, + name: 'Brucella abortus', + parent_taxonomy_id: 234, + rank: 'species' + } + ], + [ + { + id: 131567, + name: 'cellular organisms', + parent_taxonomy_id: 1, + rank: 'no rank' + }, + { + id: 2, + name: 'Bacteria', + parent_taxonomy_id: 131567, + rank: 'superkingdom' + }, + { + id: 1224, + name: 'Proteobacteria', + parent_taxonomy_id: 2, + rank: 'phylum' + }, + { + id: 68525, + name: 'delta/epsilon subdivisions', + parent_taxonomy_id: 1224, + rank: 'subphylum' + }, + { + id: 29547, + name: 'Epsilonproteobacteria', + parent_taxonomy_id: 68525, + rank: 'class' + }, + { + id: 213849, + name: 'Campylobacterales', + parent_taxonomy_id: 29547, + rank: 'order' + }, + { + id: 72293, + name: 'Helicobacteraceae', + parent_taxonomy_id: 213849, + rank: 'family' + }, + { + id: 209, + name: 'Helicobacter', + parent_taxonomy_id: 72293, + rank: 'genus' + }, + { + id: 32025, + name: 'Helicobacter hepaticus', + parent_taxonomy_id: 209, + rank: 'species' + }, + { + id: 235279, + name: 'Helicobacter hepaticus ATCC 51449', + parent_taxonomy_id: 32025, + rank: 'no rank' + } + ], + [ + { + id: 131567, + name: 'cellular organisms', + parent_taxonomy_id: 1, + rank: 'no rank' + }, + { + id: 2, + name: 'Bacteria', + parent_taxonomy_id: 131567, + rank: 'superkingdom' + }, + { + id: 1783272, + name: 'Terrabacteria group', + parent_taxonomy_id: 2, + rank: 'no rank' + }, + { + id: 1239, + name: 'Firmicutes', + parent_taxonomy_id: 1783272, + rank: 'phylum' + }, + { + id: 909932, + name: 'Negativicutes', + parent_taxonomy_id: 1239, + rank: 'class' + }, + { + id: 909929, + name: 'Selenomonadales', + parent_taxonomy_id: 909932, + rank: 'order' + }, + { + id: 1843490, + name: 'Sporomusaceae', + parent_taxonomy_id: 909929, + rank: 'family' + }, + { + id: 2373, + name: 'Acetonema', + parent_taxonomy_id: 1843490, + rank: 'genus' + }, + { + id: 2374, + name: 'Acetonema longum', + parent_taxonomy_id: 2373, + rank: 'species' + } + ], + [ + { + id: 131567, + name: 'cellular organisms', + parent_taxonomy_id: 1, + rank: 'no rank' + }, + { + id: 2, + name: 'Bacteria', + parent_taxonomy_id: 131567, + rank: 'superkingdom' + }, + { + id: 1224, + name: 'Proteobacteria', + parent_taxonomy_id: 2, + rank: 'phylum' + }, + { + id: 1236, + name: 'Gammaproteobacteria', + parent_taxonomy_id: 1224, + rank: 'class' + }, + { + id: 135622, + name: 'Alteromonadales', + parent_taxonomy_id: 1236, + rank: 'order' + }, + { + id: 267890, + name: 'Shewanellaceae', + parent_taxonomy_id: 135622, + rank: 'family' + }, + { + id: 22, + name: 'Shewanella', + parent_taxonomy_id: 267890, + rank: 'genus' + }, + { + id: 24, + name: 'Shewanella putrefaciens', + parent_taxonomy_id: 22, + rank: 'species' + } + ], + [] + ] + const taxonomy = new Taxonomy() + return taxonomy.getParentsMany(taxids).then((results) => { + expect(results).eql(expected) + }) + }) + }) +}) \ No newline at end of file