From 001fa593c7110970fa287caab89c2038b8461554 Mon Sep 17 00:00:00 2001 From: Sierra Taylor Moxon Date: Mon, 8 May 2023 13:01:50 -0700 Subject: [PATCH 1/2] deprecating sources via conversation in slack about biggim, bigclam, and clinical-profiles --- infores_catalog_nodes.tsv | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/infores_catalog_nodes.tsv b/infores_catalog_nodes.tsv index 186ea1ab2d..87ed806dae 100644 --- a/infores_catalog_nodes.tsv +++ b/infores_catalog_nodes.tsv @@ -44,9 +44,9 @@ released Automat Ubergraph Nonredundant infores:automat-ubergraph-nonredundant h released Automat Viral Proteome infores:automat-viral-protome https://github.com/NCATSTranslator/Translator-All/wiki/Automat#viral-proteome "A graph consisting of viral proteins from UniProt, connected by similarity edges from UniRef." released Basic Formal Ontology infores:bfo http://www.obofoundry.org/ontology/bfo.html BFO released Bgee infores:bgee https://bgee.org/ BGEE -released Big Cell Line Association Miner infores:bigclam https://github.com/PriceLab/translator-bigquery-api BigClam +deprecated Big Cell Line Association Miner infores:bigclam https://github.com/PriceLab/translator-bigquery-api BigClam released BiGG Models infores:bigg-models http://bigg.ucsd.edu/ BIGG a knowledgebase of genome-scale metabolic network reconstructions -released Big Gene Interaction Miner infores:biggim https://github.com/PriceLab/translator-bigquery-api/ BigGIM +deprecated Big Gene Interaction Miner infores:biggim https://github.com/PriceLab/translator-bigquery-api/ BigGIM released BindingDB infores:bindingdb https://www.bindingdb.org The Binding Database "BindingDB is a public, web-accessible database of measured binding affinities, focusing chiefly on the interactions of protein considered to be drug-targets with small, drug-like molecules. As of July 31, 2021, BindingDB contains 41,300 Entries, each with a DOI, containing 2,303,972 binding data for 8,561 protein targets and 995,797 small molecules." released Bio2RDF infores:bio2rdf https://bio2rdf.org released BioCatalogue infores:biocatalogue @@ -102,7 +102,7 @@ released Chemotext infores:chemotext http://chemotext.mml.unc.edu/ released Clinital Interpretation of Variants in Cancer infores:civic https://civicdb.org/home CIViC released Cell Ontology infores:cl https://obofoundry.org/ontology/cl.html CL released Clinical Genome Resource (ClinGen) infores:clingen https://clinicalgenome.org/ ClinGen is a NIH-funded resource dedicated to building a central resource that defines the clinical relevance of genes and variants for use in precision medicine and research -released Clinical Profiles infores:clinical-profiles +deprecated Clinical Profiles infores:clinical-profiles released ClinicalTrials.gov infores:clinicaltrials https://clinicaltrials.gov released ClinVar infores:clinvar https://www.ncbi.nlm.nih.gov/clinvar/ "ClinVar is a freely accessible, public archive of reports of the relationships among human variations and phenotypes, with supporting evidence." released Connectivity Map infores:cmap https://clue.io/cmap CMAP "genome-scale library of cellular signatures that catalogs transcriptional responses to chemical, genetic, and disease perturbation" From f958f181132142e7e07de80eda2e66d4acbae95f Mon Sep 17 00:00:00 2001 From: Sierra Taylor Moxon Date: Mon, 8 May 2023 13:50:59 -0700 Subject: [PATCH 2/2] reformat --- ...date_biolink_yaml.yml => validate-biolink-yaml.yml} | 0 ...verify_pull_request.yml => verify-pull-request.yml} | 0 infores_catalog_nodes.tsv | 5 +++-- scripts/verify_infores.py | 10 ++++++---- 4 files changed, 9 insertions(+), 6 deletions(-) rename .github/workflows/{validate_biolink_yaml.yml => validate-biolink-yaml.yml} (100%) rename .github/workflows/{verify_pull_request.yml => verify-pull-request.yml} (100%) diff --git a/.github/workflows/validate_biolink_yaml.yml b/.github/workflows/validate-biolink-yaml.yml similarity index 100% rename from .github/workflows/validate_biolink_yaml.yml rename to .github/workflows/validate-biolink-yaml.yml diff --git a/.github/workflows/verify_pull_request.yml b/.github/workflows/verify-pull-request.yml similarity index 100% rename from .github/workflows/verify_pull_request.yml rename to .github/workflows/verify-pull-request.yml diff --git a/infores_catalog_nodes.tsv b/infores_catalog_nodes.tsv index 87ed806dae..2b23c3407d 100644 --- a/infores_catalog_nodes.tsv +++ b/infores_catalog_nodes.tsv @@ -44,9 +44,9 @@ released Automat Ubergraph Nonredundant infores:automat-ubergraph-nonredundant h released Automat Viral Proteome infores:automat-viral-protome https://github.com/NCATSTranslator/Translator-All/wiki/Automat#viral-proteome "A graph consisting of viral proteins from UniProt, connected by similarity edges from UniRef." released Basic Formal Ontology infores:bfo http://www.obofoundry.org/ontology/bfo.html BFO released Bgee infores:bgee https://bgee.org/ BGEE -deprecated Big Cell Line Association Miner infores:bigclam https://github.com/PriceLab/translator-bigquery-api BigClam +deprecated Big Cell Line Association Miner infores:bigclam https://github.com/PriceLab/translator-bigquery-api BigClam released BiGG Models infores:bigg-models http://bigg.ucsd.edu/ BIGG a knowledgebase of genome-scale metabolic network reconstructions -deprecated Big Gene Interaction Miner infores:biggim https://github.com/PriceLab/translator-bigquery-api/ BigGIM +deprecated Big Gene Interaction Miner infores:biggim https://github.com/PriceLab/translator-bigquery-api/ BigGIM released BindingDB infores:bindingdb https://www.bindingdb.org The Binding Database "BindingDB is a public, web-accessible database of measured binding affinities, focusing chiefly on the interactions of protein considered to be drug-targets with small, drug-like molecules. As of July 31, 2021, BindingDB contains 41,300 Entries, each with a DOI, containing 2,303,972 binding data for 8,561 protein targets and 995,797 small molecules." released Bio2RDF infores:bio2rdf https://bio2rdf.org released BioCatalogue infores:biocatalogue @@ -273,6 +273,7 @@ released PathWhiz infores:pathwhiz https://smpdb.ca/pathwhiz released Phenotype and Trait Ontology infores:pato http://www.obofoundry.org/ontology/pato.html PATO released Phenomics Integrated Ontology infores:phenio https://github.com/monarch-initiative/phenio PHENIO An ontology for accessing and comparing knowledge concerning phenotypes across species and genetic backgrounds. released Pathosystems Resource Integration Center infores:patric https://ngdc.cncb.ac.cn/databasecommons/database/id/230 PATRIC +released Text Mined pharmacogenomic polymorphisms infores:pgxmine https://pgxmine.pharmgkb.org/ PGxMine Text mined pharmacogenomic polymorphisms to assist curation of PharmGKB. released Physician Data Query (PDQ) (from UMLS) infores:pdq-umls https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/PDQ/index.html released Pfam infores:pfam https://www.ebi.ac.uk/interpro The Pfam database is a large collection of protein families, each represented by multiple sequence alignments and hidden Markov models (HMMs). Proteins are generally composed of one or more functional regions, commonly termed domains. Different combinations of domains give rise to the diverse range of proteins found in nature. The identification of domains that occur within proteins can therefore provide insights into their function. Pfam also generates higher-level groupings of related entries, known as clans. A clan is a collection of Pfam entries which are related by similarity of sequence, structure or profile-HMM. The data presented for each entry is based on the UniProt Reference Proteomes but information on individual UniProtKB sequences can still be found by entering the protein accession. Pfam full alignments are available from searching a variety of databases, either to provide different accessions (e.g. all UniProt and NCBI GI) or different levels of redundancy. released Pharmacotherapy Database infores:pharmacotherapydb https://github.com/dhimmel/indications PharmacotherapyDB diff --git a/scripts/verify_infores.py b/scripts/verify_infores.py index d761399cd9..f4a260afb1 100644 --- a/scripts/verify_infores.py +++ b/scripts/verify_infores.py @@ -3,7 +3,6 @@ import requests import csv import time -from typing import List INFORES_TSV = os.path.join('infores_catalog_nodes.tsv') @@ -37,11 +36,14 @@ def validate(self): with open(INFORES_TSV, 'r') as tsv_file: reader = csv.reader(tsv_file, delimiter='\t') for line in reader: - if line[2] == 'id' or line[3] == '': + if len(line) < 5: + raise ValueError("Invalid infores TSV: too few items in a line") + if line[2] == 'id' or line[3] == '' or line[0] == 'deprecated': continue + # exceptions for resolvable URLs that don't return 200 response for some reason (e.g. require + # user to accept a popup before resolving): if line[2] == 'infores:athena' \ or line[2] == 'infores:isb-wellness' \ - or line[0] == 'deprecated' \ or line[2] == 'infores:isb-incov' \ or line[2] == 'infores:preppi' \ or line[2] == 'infores:ttd' \ @@ -57,7 +59,7 @@ def validate(self): else: print(line) print("Invalid infores URL:" + line[3] + " for " + line[2]) - raise ValueError("Invalid infores URL") + raise ValueError("invalid return code for URL" + line[3] + " for " + line[2]) if __name__ == "__main__":