In [203]:
from src.utils.json_utils import load_tools_from_json, save_tools_to_json
from src.utils.epmc_utils import search_europe_pmc
from src.utils.tool_utils import update_publication_with_match

import requests

# Get tools from bio.tools

In [212]:
base_url = "https://bio.tools/api/tool/?topic=%22proteomics%22&format=json&page="
page = 1
tools = []

while(page < 5):
    response = requests.get(base_url + str(page))

    if response.status_code == 200:
        result = response.json()['list']
        tools.extend(result)
        page += 1
    else:
        break

print("Got {} tools from bio.tools".format(len(tools)))

Got 40 tools from bio.tools


## Separate tools into no DOI, no publication, and with publication

In [213]:
tools_no_doi, tools_no_publication = [], []

for tool in tools:
    if tool['publication']:
        if tool['publication'][0]['doi'] is None:
            tools_no_doi.append(tool)

    else:
        tools_no_publication.append(tool)

if tools_no_doi:
    print("Got {} tools without DOI".format(len(tools_no_doi)))

print("Got {} tools without publication".format(len(tools_no_publication)))

tools_subset = [item for item in tools if item not in tools_no_publication]

Got 5 tools without DOI
Got 4 tools without publication


In [208]:
tools_no_id = [tool for tool in tools_subset if tool['publication'][0]['pmid'] is None]
tools_no_pmcid = [tool for tool in tools_subset if tool['publication'][0]['pmcid'] is None]

print("Got {} tools without PMID".format(len(tools_no_id)))
print("Got {} tools without PMCID".format(len(tools_no_pmcid)))

Got 9 tools without PMID
Got 20 tools without PMCID


For tools with doi - query EuropePMC and update publication fields 

In [197]:
updated_tools =  []

for tool in tools_subset:
    if tool['publication'][0]['doi'] is None:
        continue

    doi = tool['publication'][0]['doi']
    response = search_europe_pmc(doi)

    if response and response.get('hitCount', 0) > 0:
        match = response['resultList']['result'][0]
        
        original_publication = {
            'doi': tool['publication'][0].get('doi'),
            'pmid': tool['publication'][0].get('pmid'),
            'pmcid': tool['publication'][0].get('pmcid')
        }
        updated_publication = update_publication_with_match(match)
        
        if updated_publication and updated_publication != original_publication:
            if 'pmid' not in updated_publication:
                updated_publication['pmid'] = None
            if 'pmcid' not in updated_publication:
                updated_publication['pmcid'] = None
            
            tool['publication'][0] = updated_publication
            updated_tools.append(tool)


print(f"Updated {len(updated_tools)} tools with publication information.")

save_tools_to_json(updated_tools, "updated_tools.json")



{'doi': '10.1002/pmic.200401244', 'pmid': None, 'pmcid': None}
{'doi': '10.1002/pmic.200401244', 'pmid': '15880814', 'pmcid': None}
msight
{'doi': '10.1021/ACS.JPROTEOME.9B00330', 'pmid': '31378069', 'pmcid': 'PMC6733628'}
{'doi': '10.1021/acs.jproteome.9b00330', 'pmid': '31378069', 'pmcid': 'PMC6733628'}
MetaMorpheus
{'doi': '10.1093/bioinformatics/btw548', 'pmid': None, 'pmcid': None}
{'doi': '10.1093/bioinformatics/btw548', 'pmid': '27542771', 'pmcid': None}
MSIdV
{'doi': '10.1093/bioinformatics/btn603', 'pmid': None, 'pmcid': None}
{'doi': '10.1093/bioinformatics/btn603', 'pmid': '19015140', 'pmcid': 'PMC2639009'}
sirius
{'doi': '10.1021/jasms.4c00178', 'pmid': None, 'pmcid': None}
{'doi': '10.1021/jasms.4c00178', 'pmid': '39221961', 'pmcid': None}
msigen
{'doi': '10.1021/JASMS.1C00013', 'pmid': '33822609', 'pmcid': 'PMC8102432'}
{'doi': '10.1021/jasms.1c00013', 'pmid': '33822609', 'pmcid': 'PMC8102432'}
macroms
Updated 6 tools with publication information.


In [200]:
tools_no_id = [tool for tool in tools2 if tool['publication'][0].get('pmid') is None]
tools_no_pmcid = [tool for tool in tools2 if tool['publication'][0].get('pmcid') is None]

print("Got {} tools without PMID".format(len(tools_no_id)))
print("Got {} tools without PMCID".format(len(tools_no_pmcid)))

Got 0 tools without PMID
Got 5 tools without PMCID


# Tools without DOI

In [214]:
updated_tools_doi = []

for tool in tools_no_doi:
    if tool['publication'][0]['pmid'] is not None:
        response = search_europe_pmc(tool['publication'][0]['pmid'])

        if response and response.get('hitCount', 0) > 0:
            match = response['resultList']['result'][0]
        
            original_publication = {
              'doi': tool['publication'][0].get('doi'),
              'pmid': tool['publication'][0].get('pmid'),
              'pmcid': tool['publication'][0].get('pmcid')
            }
            updated_publication = update_publication_with_match(match)

            if updated_publication and updated_publication != original_publication:
                if 'doi' not in updated_publication:
                    updated_publication['doi'] = None
                if 'pmid' not in updated_publication:
                    updated_publication['pmid'] = None
                if 'pmcid' not in updated_publication:
                    updated_publication['pmcid'] = None

                tool['publication'][0] = updated_publication
                updated_tools_doi.append(tool)

print(f"Updated {len(updated_tools_doi)} tools with publication information.")

# Replace original publication instead of the updated publication, replacing only the fields to keep all the other metadata

MZmine
[{'doi': None, 'pmid': '16026613', 'pmcid': None, 'type': [], 'version': None, 'note': None, 'metadata': {'title': 'Processing methods for differential analysis of LC/MS profile data', 'abstract': 'Background: Liquid chromatography coupled to mass spectrometry (LC/MS) has been widely used in proteomics and metabolomics research. In this context, the technology has been increasingly used for differential profiling, i.e. broad screening of biomolecular components across multiple samples in order to elucidate the observed phenotypes and discover biomarkers. One of the major challenges in this domain remains development of better solutions for processing of LC/MS data. Results: We present a software package MZmine that enables differential LC/MS analysis of metabolomics data. This software is a toolbox containing methods for all data processing stages preceding differential analysis: spectral filtering, peak detection, alignment and normalization. Specifically, we developed and impl