### Own attempt with requests

In [428]:
from jsonapi_client import Session
import pandas as pd
from time import sleep

In [429]:
base_url = "https://www.ebi.ac.uk/metagenomics/api/v1/"
lineage_gut_suffix = "studies?lineage=root:Host-associated:Human:Digestive system"
lineage_vagina_suffix = "studies?lineage=root:Host-associated:Human:Reproductive system:Vagina"

In [430]:
gut_csv_path = "gut_biome_studies.csv"
vaginal_csv_path = "vaginal_biome_studies.csv"

In [446]:
def get_microbiome_studies(base_url:str, gut_url_suffix:str, vaginal_url_suffix:str) -> tuple[list, list]:
    with Session(base_url) as mgnify:
        
        resources = map(lambda r: r.json, mgnify.iterate(gut_url_suffix))
        gut_biome_studies = [i for i in resources]

        resources = map(lambda r: r.json, mgnify.iterate(vaginal_url_suffix))
        vaginal_biome_studies = [i for i in resources]

    return gut_biome_studies, vaginal_biome_studies

def response_to_csv(biome_studies:list,csv_path:str) -> None:
    studies_attributes = [i["attributes"] for i in biome_studies]
    
    df = pd.DataFrame(studies_attributes)
    df.drop(columns=['study-abstract'],inplace=True)
    return df.to_csv(csv_path)

def get_dois(accessions:list, base_url:str) -> list:
    with Session(base_url) as mgnify:
        doi_list = []
        for accession_id in accessions[0:1]:
            print(accession_id, end=" ")
            pub_url = f"studies/{accession_id}/samples" 
            resp = map(lambda r: r.json, mgnify.iterate(pub_url))
            # doi = [i["attributes"]["doi"] for i in resp]
            # try:
            #     doi_list.append(doi[0])
            # except IndexError:
            #     doi_list.append(None)
    return [i for i in resp] #doi_list

def add_dois_to_csv(path:str, base_url:str) -> None:
    df = pd.read_csv(path)
    accessions = df["accession"].tolist()
    doi_list = get_dois(accessions, base_url)
    df["doi"] = doi_list
    return df.to_csv(path)

In [447]:
# gut_biome_studies, vaginal_biome_studies = get_microbiome_studies(base_url, lineage_gut_suffix, lineage_vagina_suffix)

In [448]:
# response_to_csv(gut_biome_studies, gut_csv_path)
# response_to_csv(vaginal_biome_studies, vaginal_csv_path)

In [449]:
# print("Adding dois to vaginal microbiome csv")
# add_dois_to_csv(vaginal_csv_path, base_url) 
# print("Adding dois to gut microbiome csv")
# add_dois_to_csv(gut_csv_path, base_url)

In [None]:
df = pd.read_csv(vaginal_csv_path)
accessions = df["accession"].tolist()
samples_list = get_dois(accessions, base_url)

In [None]:
samples_list[0].keys

In [423]:
df_gut = pd.read_csv(gut_csv_path)
gut_studies = set(df_gut["doi"].to_list())

df_vag = pd.read_csv(vaginal_csv_path)
vaginal_studies = set(df_vag["doi"].to_list())


In [None]:
print(len(gut_studies), len(vaginal_studies))


In [None]:
gut_studies.intersection(vaginal_studies)