Set up functions for Europe PMC API:

In [1]:
import requests

def fetch_hits(querystring, page_size=1000):
    base_url = "https://www.ebi.ac.uk/europepmc/webservices/rest/search"
    all_hits = set()  # Use a set to store unique IDs
    cursor_mark = "*"  # Start with the initial cursor

    while True:
        params = {
            "query": querystring,
            "resultType": "idlist",
            "format": "json",
            "cursorMark": cursor_mark,  # Cursor for pagination
            "pageSize": page_size
        }
        response = requests.get(base_url, params=params)

        if response.status_code != 200:
            print(f"Error fetching citations for query '{querystring}': {response.status_code}")
            break  # Stop on request failure

        data = response.json()

        # Extract 'id' from each result
        results = data.get("resultList", {}).get("result", [])
        ids = {item["id"] for item in results if "id" in item}

        if not ids:
            break  # Stop if no new results are found (last page reached)

        all_hits.update(ids)  # Add new IDs

        # Get the next cursorMark for pagination
        cursor_mark = data.get("nextCursorMark")

        if not cursor_mark:
            break  # Stop if there's no nextCursorMark (last page)

    return list(all_hits)  # Convert to list before returning

In [2]:
comet_hits = fetch_hits("comet")
print(f"\nFetched {len(comet_hits)} IDs.")

output_file="comet.txt"
with open(output_file, "w") as file:
  file.write("\n".join(comet_hits))

print(f"IDs saved to {output_file}.")


Fetched 44143 IDs.
IDs saved to comet.txt.


In [None]:
mascot_hits = fetch_hits("mascot")
print(f"\nFetched {len(mascot_hits)} IDs.")

output_file="mascot.txt"
with open(output_file, "w") as file:
  file.write("\n".join(mascot_hits))

print(f"IDs saved to {output_file}.")

In [None]:
mascot_server_hits = fetch_hits("\"mascot server\"")
print(f"\nFetched {len(mascot_server_hits)} IDs.")

output_file="mascot_server.txt"
with open(output_file, "w") as file:
  file.write("\n".join(mascot_server_hits))

print(f"IDs saved to {output_file}.")

In [None]:
maxquant_hits = fetch_hits("maxquant")
print(f"\nFetched {len(maxquant_hits)} IDs.")

output_file="maxquant.txt"
with open(output_file, "w") as file:
  file.write("\n".join(maxquant_hits))

print(f"IDs saved to {output_file}.")

In [None]:
proteomics_hits = fetch_hits("proteomics")
print(f"\nFetched {len(proteomics_hits)} IDs.")

output_file="proteomics.txt"
with open(output_file, "w") as file:
  file.write("\n".join(proteomics_hits))

print(f"IDs saved to {output_file}.")

In [None]:
metagenomics_hits = fetch_hits("metagenomics")
print(f"\nFetched {len(metagenomics_hits)} IDs.")

output_file="metagenomics.txt"
with open(output_file, "w") as file:
  file.write("\n".join(metagenomics_hits))

print(f"IDs saved to {output_file}.")

In [None]:
# Define two lists
list1 = maxquant_hits
list2 = proteomics_hits

# Convert lists to sets for uniqueness
set1 = set(list1)
set2 = set(list2)

# Compute set sizes
only_in_set1 = len(set1 - set2)
only_in_set2 = len(set2 - set1)
intersection = len(set1 & set2)

# Create Venn diagram
plt.figure(figsize=(5,5))
venn2(subsets=(only_in_set1, only_in_set2, intersection), set_labels=('MaxQuant', 'Proteomics'))

plt.show()

In [None]:
from matplotlib_venn import venn3

# Create Venn diagram
from matplotlib_venn import venn3

# Define three lists
list1 = comet_hits
list2 = proteomics_hits
list3 = metagenomics_hits

# Convert lists to sets for uniqueness
set1 = set(list1)
set2 = set(list2)
set3 = set(list3)

# Create Venn diagram
plt.figure(figsize=(5,5))
venn = venn3([set1, set2, set3], ('Comet', 'Proteomics', 'Metagenomics'))

plt.show()

In [None]:
# Define three lists
list1 = mascot_hits
list2 = proteomics_hits
list3 = mascot_server_hits

# Convert lists to sets for uniqueness
set1 = set(list1)
set2 = set(list2)
set3 = set(list3)

# Create Venn diagram
plt.figure(figsize=(5,5))
venn = venn3([set1, set2, set3], ('Mascot', 'Proteomics', 'Mascot Server'))

plt.show()