In [5]:
from pathlib import Path
from time import sleep
import os
import requests
import json

# Directory to store downloaded MAF files
DOWNLOAD_DIR = "maf_files"
os.makedirs(DOWNLOAD_DIR, exist_ok=True)
INFO_DOWNLOAD_DIR = os.path.join(DOWNLOAD_DIR, "info")
os.makedirs(INFO_DOWNLOAD_DIR, exist_ok=True)

# Base URLs
STUDIES_API = "https://www.ebi.ac.uk/metabolights/ws/studies"
INFO_API = "https://www.ebi.ac.uk/metabolights/ws/studies/{study_id}"
FILES_API = "https://www.ebi.ac.uk/metabolights/ws/studies/{study_id}/files"

def fetch_all_studies():
    """Fetch all study IDs from MetaboLights API."""
    response = requests.get(STUDIES_API)
    response.raise_for_status()
    return response.json()["content"]

def fetch_study_info(study_id):
    """Fetch metadata and files for a given study."""
    url = INFO_API.format(study_id=study_id)
    response = requests.get(url)
    response.raise_for_status()
    return response.json()

def fetch_study_metadata(study_id):
    """Fetch metadata and files for a given study."""
    url = FILES_API.format(study_id=study_id)
    response = requests.get(url)
    response.raise_for_status()
    return response.json()

def download_file(file_url, save_path):
    """Download a file if it does not already exist."""
    if os.path.exists(save_path):
        print(f"File already exists, skipping: {save_path}")
        return
    print(f"Downloading {file_url}")
    response = requests.get(file_url)
    response.raise_for_status()
    rows = response.json()["data"]["rows"]
    print(f"Found {len(rows)} rows in the file.")
    if not rows:
        raise ValueError(f"No data found in the response for {file_url} // {response.text}")
    with open(save_path, 'w') as file:
        json.dump(rows, file)
    print(f"Saved: {save_path}")

def download_all():
    # Fetch all studies
    print("Fetching all studies...")
    studies = fetch_all_studies()
    print(f"Found {len(studies)} studies.")

    # Process each study
    for study_id in studies:
        info_save_path = os.path.join(INFO_DOWNLOAD_DIR, study_id) + ".json"
        print(f"Processing study: {study_id} ({info_save_path})")
        if os.path.exists(info_save_path):
            print("Info already exists, skipping")
            continue
        sleep(0.2)
        try:
            # Fetch study metadata and file details
            info = fetch_study_info(study_id)
            isa = info.get("isaInvestigation", {})
            studies = isa.get("studies", [])
            study = studies[0]
            if len(studies) > 1:
                print(f"Warning: More than one study found for {study_id}")
            study_title = study.get("title")
            print(f"Study title: {study_title}")
            
            with open(info_save_path, 'w') as f:
                json.dump(info, f, indent=4)
                
            # Fetch study metadata/files
            metadata = fetch_study_metadata(study_id)
            
            maf_files = [
                f for f in metadata.get("study", [])
                if f["type"] == "metadata_maf" and not f["directory"]
            ]

            print(f"Found {len(maf_files)} MAF files for study: {study_title}")

            # Download MAF files
            for maf in maf_files:
                file_name = maf["file"]
                file_url = f"https://www.ebi.ac.uk/metabolights/ws/studies/{study_id}/{file_name}"
                study_dir = Path(DOWNLOAD_DIR) / study_id
                study_dir.mkdir(exist_ok=True, parents=True)
                save_path = study_dir / (file_name + ".json")
                download_file(file_url, str(save_path))

        except Exception as e:
            print(f"Error processing study {study_id}: {e}")

download_all()


Fetching all studies...
Found 1730 studies.
Processing study: MTBLS10044 (maf_files/info/MTBLS10044.json)
Info already exists, skipping
Processing study: MTBLS1039 (maf_files/info/MTBLS1039.json)
Info already exists, skipping
Processing study: MTBLS1041 (maf_files/info/MTBLS1041.json)
Info already exists, skipping
Processing study: MTBLS1044 (maf_files/info/MTBLS1044.json)
Info already exists, skipping
Processing study: MTBLS105 (maf_files/info/MTBLS105.json)
Info already exists, skipping
Processing study: MTBLS106 (maf_files/info/MTBLS106.json)
Info already exists, skipping
Processing study: MTBLS405 (maf_files/info/MTBLS405.json)
Info already exists, skipping
Processing study: MTBLS418 (maf_files/info/MTBLS418.json)
Info already exists, skipping
Processing study: MTBLS10321 (maf_files/info/MTBLS10321.json)
Info already exists, skipping
Processing study: MTBLS1073 (maf_files/info/MTBLS1073.json)
Info already exists, skipping
Processing study: MTBLS422 (maf_files/info/MTBLS422.json)
In

In [6]:
print("Done")

Done


In [None]:
from c3p.maftools import enrich_maf_row
