Skip to content

Commit

Permalink
Merge pull request #77 from biothings/sri-resolver-2
Browse files Browse the repository at this point in the history
ID resolution using SRI-ID-resolver
  • Loading branch information
newgene committed Sep 10, 2021
2 parents f7d64c4 + ba6ace9 commit 814338f
Show file tree
Hide file tree
Showing 6 changed files with 249 additions and 75 deletions.
8 changes: 7 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -108,21 +108,27 @@ let input = {
const resolver = require('biomedical_id_resolver');

// input must be an object, with semantic type as the key, and array of CURIEs as value

let input = {
"Gene": ["NCBIGene:1017", "NCBIGene:1018", "HGNC:1177"],
"SmallMolecule": ["CHEBI:15377"],
"Disease": ["MONDO:0004976"],
"Cell": ["CL:0002372"]
};

// SRI resolver will figure out the semantic type if the input type is 'unknown', 'undefined', or 'NamedThing'
let input = {
"unknown": ["NCBIGene:1017", "MONDO:0004976"],
};

(async () => {
let res = await resolver.resolveSRI(input);
console.log(res);
})();
```
### Example Output
The output contains `id` and `equivalent_identifiers` straight from SRI as well as the same fields as the base resolver to make it backwards compatible with it.
The output contains `id` and `equivalent_identifiers` straight from SRI as well as the same fields as the base resolver to make it backwards compatible with it. If the SRI resolved semantic type doesn't agree with the input semantic type, there will be 2 entries in the array for the curie.
```json
{
"NCBIGene:1017": [
Expand Down
3 changes: 2 additions & 1 deletion __tests__/integration/biolink_based_resolver.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,8 @@ describe("Test ID Resolver", () => {
expect(res['kkk:123'][0].label).toEqual('kkk:123')
})

test("test large batch of inputs should be correctly resolved", async () => {
// uses old ID resolver and has issues right now...
test.skip("test large batch of inputs should be correctly resolved", async () => {
const fakeNCBIGeneInputs = [...Array(1990).keys()].map(item => 'NCBIGene:' + item.toString());
const fakeOMIMGeneInputs = [...Array(2300).keys()].map(item => "OMIM:" + item.toString());
const fakeDrugbankInputs = [...Array(3500).keys()].map(item => "DRUGBANK:DB00" + item.toString());
Expand Down
8 changes: 5 additions & 3 deletions __tests__/integration/default_resolver.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,9 @@ describe("Test ID Resolver", () => {
expect(res['kkk:123'][0].primaryID).toEqual('kkk:123');
expect(res['kkk:123'][0].label).toEqual('kkk:123')
})

test("test large batch of inputs should be correctly resolved", async () => {

// uses old ID resolver and has issues right now...
test.skip("test large batch of inputs should be correctly resolved", async () => {
const fakeNCBIGeneInputs = [...Array(1990).keys()].map(item => 'NCBIGene:' + item.toString());
const fakeOMIMGeneInputs = [...Array(2300).keys()].map(item => "OMIM:" + item.toString());
const fakeDrugbankInputs = [...Array(3500).keys()].map(item => "DRUGBANK:DB00" + item.toString());
Expand Down Expand Up @@ -170,7 +171,8 @@ describe("Test ID Resolver", () => {
expect(res['NCBIGene:1017'][1]).toBeInstanceOf(IrresolvableBioEntity);
})

test("Test chemical attributes are correctly retrieved", async () => {
// skip because new SRI-based ID resolver doesn't fetch node attributes
test.skip("Test chemical attributes are correctly retrieved", async () => {
const resolver = new DefaultIDResolver();
const res = await resolver.resolve({ "SmallMolecule": ["CHEMBL.COMPOUND:CHEMBL744"] });
expect(res["CHEMBL.COMPOUND:CHEMBL744"][0].attributes.drugbank_taxonomy_class).toContain("Benzothiazoles");
Expand Down
93 changes: 91 additions & 2 deletions __tests__/integration/sri_resolver.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -9,21 +9,110 @@ describe("Test SRI Resolver", () => {
"Cell": ["CL:0002372"]
};
const res = await resolveSRI(input);

expect(res["NCBIGene:1017"]).toEqual(expect.any(Array));
expect(res["NCBIGene:1017"][0].primaryID).toEqual("NCBIGene:1017");
expect(res["NCBIGene:1017"][0].label).toEqual("CDK2");
expect(res["NCBIGene:1017"][0].semanticType).toEqual("Gene");
expect(res["NCBIGene:1017"][0].semanticTypes).toEqual(expect.any(Array));
expect(res["NCBIGene:1017"][0].dbIDs).toEqual(expect.any(Object));
expect(res["NCBIGene:1017"][0].dbIDs.NCBIGene).toEqual(expect.any(Array));
expect(res["NCBIGene:1017"][0].dbIDs.name).toEqual(expect.any(Array));
expect(res["NCBIGene:1017"][0].curies).toEqual(expect.any(Array));
});

test("Test unresolvable curie", async () => {
test("Test unresolvable curie/bad input", async () => {
let input = {
"Gene": ["NCBIGene:ABCD"],
"Gene": ["NCBIGene:ABCD", "NCBIGene:GENE:1017"],
};
const res = await resolveSRI(input);

expect(res["NCBIGene:ABCD"]).toEqual(expect.any(Array));
expect(res["NCBIGene:ABCD"][0].semanticType).toEqual("Gene");
expect(res["NCBIGene:ABCD"][0].primaryID).toEqual("NCBIGene:ABCD");
expect(res["NCBIGene:ABCD"][0].label).toEqual("NCBIGene:ABCD");
expect(res["NCBIGene:ABCD"][0].dbIDs.name).toEqual(expect.any(Array));
expect(res["NCBIGene:ABCD"][0].dbIDs.NCBIGene).toEqual(expect.any(Array));

expect(res["NCBIGene:GENE:1017"]).toEqual(expect.any(Array));
expect(res["NCBIGene:GENE:1017"][0].semanticType).toEqual("Gene");
expect(res["NCBIGene:GENE:1017"][0].primaryID).toEqual("NCBIGene:GENE:1017");
expect(res["NCBIGene:GENE:1017"][0].label).toEqual("NCBIGene:GENE:1017");
expect(res["NCBIGene:GENE:1017"][0].dbIDs.name).toEqual(expect.any(Array));
expect(res["NCBIGene:GENE:1017"][0].dbIDs.NCBIGene).toEqual(expect.any(Array));
});

test("Test SRI Semantic type resolver with unknown", async () => {
let input = {
unknown: ["NCBIGene:3778"],
};
const res = await resolveSRI(input);

expect(res["NCBIGene:3778"]).toEqual(expect.any(Array));
expect(res["NCBIGene:3778"][0].semanticType).toEqual("Gene");
})

test("Test SRI Semantic type resolver with undefined", async () => {
let input = {
undefined: ["NCBIGene:3778"],
};
const res = await resolveSRI(input);

expect(res["NCBIGene:3778"]).toEqual(expect.any(Array));
expect(res["NCBIGene:3778"][0].semanticType).toEqual("Gene");
})

test("Test SRI Semantic type resolver with NamedThing", async () => {
let input = {
NamedThing: ["NCBIGene:3778"],
};
const res = await resolveSRI(input);

expect(res["NCBIGene:3778"]).toEqual(expect.any(Array));
expect(res["NCBIGene:3778"][0].semanticType).toEqual("Gene");
})

test("Test Same ID different semantic types", async () => {
let input = {
"Gene": ["NCBIGene:1017"],
"Disease": ["NCBIGene:1017"]
};
const res = await resolveSRI(input);

expect(res["NCBIGene:1017"].length).toBe(2);
expect(res["NCBIGene:1017"][0].semanticType).toEqual("Gene");
expect(res["NCBIGene:1017"][1].semanticType).toEqual("Disease");
});

test("Test using SRI to get semantic types", async () => {
let input = {
unknown: ["NCBIGene:1017"]
};
const res = await resolveSRI(input);

expect(res["NCBIGene:1017"].length).toBe(1);
expect(res["NCBIGene:1017"][0].semanticType).toEqual("Gene");
});

test("Test handling semantic type conflicts", async () => {
let input = {
"SmallMolecule": ["PUBCHEM.COMPOUND:23680530"]
};
const res = await resolveSRI(input);

expect(res["PUBCHEM.COMPOUND:23680530"].length).toBe(2);
expect(res["PUBCHEM.COMPOUND:23680530"][0].semanticType).toEqual("MolecularMixture");
expect(res["PUBCHEM.COMPOUND:23680530"][1].semanticType).toEqual("SmallMolecule");
});

test("Test large batch of inputs should be correctly resolved and should not give an error", async () => {
const fakeNCBIGeneInputs = [...Array(5000).keys()].map(item => 'NCBIGene:' + item.toString());
let input = {
Gene: fakeNCBIGeneInputs,
};
const res = await resolveSRI(input);

expect(Object.keys(res)).toHaveLength(fakeNCBIGeneInputs.length);
})

});
3 changes: 3 additions & 0 deletions src/common/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -154,8 +154,11 @@ export interface IDOBject {
label: string;
}
export interface SRIBioEntity extends IBioEntity {
_leafSemanticType: string; // @deprecated use semanticType instead
_dbIDs: DBIdsObject; // @deprecated use dbIDs instead
id: IDOBject;
equivalent_identifiers: IDOBject[];
type: string[];
}

export interface SRIResolverOutput {
Expand Down

0 comments on commit 814338f

Please sign in to comment.