# Bio.tools | Update Publication Information using EuropePMC

This is a notebook for completing missing publication information in bio.tools using EuropePMC.
It also includes:

* saving open access publication information with corresponding tool name and biotoolsID;
* creating subsets of different topics and saving to json.

Up-to-date biotools dumps can be found in this repo under biotoolspub/biotoolsdump.zip


### Load bio.tools tools information

In [None]:
from bh24_literature_mining.utils import load_biotools_from_zip, save_to_json
from europe_pmc import EuropePMC

### Get missing information from EuropePMC and save open access publications

Takes ~4 hours to finish. (Dumps in biotoolspub/biotoolsdump.zip are already up-to-date)

In [None]:
tools = load_biotools_from_zip("../data/biotools/biotoolsdump.zip", "biotools.json")

In [None]:
open_access_publications = []

pmc_client = EuropePMC()

for tool in tools:

    publication = tool["publication"]
    pmc_publications = []

    for pub in publication:
        pub_id = pub.get("doi", "pmid")

        if pub_id:
            response = pmc_client.fetch(pub_id)

            if response.data:
                biotools_publication = {
                    key: pub.get(key) for key in ["doi", "pmid", "pmcid"]
                }

                pmc_publication = {
                    key: response.data.get(key) for key in ["doi", "pmid", "pmcid"]
                }

                # Select final publication to be added to tool
                final_publication = (
                    pmc_publication
                    if biotools_publication != pmc_publication
                    else biotools_publication
                )

                # Add 'type' field to publication from pub
                final_publication["type"] = pub.get("type")

                pmc_publications.append(final_publication)

                if response.data.get("isOpenAccess") == "Y":
                    open_access_publications.append(
                        [tool["biotoolsID"], tool["name"], final_publication]
                    )

    tool["publication"] = pmc_publication


save_to_json(tools, "biotools.json")
save_to_json(open_access_publications, "open_access_publications.json")

### Create subsets of tools by topic (Proteomics, Metabolomics and RNA-Seq)

Filter tools and save to json files.

In [None]:
def filter_tools_by_topic(tools, topic_name):

    tools_subset = [
        tool
        for tool in tools
        if any(topic["term"] == topic_name for topic in tool["topic"])
    ]

    file_name = topic_name.lower() + "_tools.json"
    save_to_json(tools_subset, file_name)

    return tools_subset

In [None]:
filter_tools_by_topic(tools, "Proteomics")
filter_tools_by_topic(tools, "Metabolomics")
filter_tools_by_topic(tools, "RNA-Seq")