# Download Images

In [None]:
import requests
import pandas as pd
import json
import os
from pathlib import Path

df_submitters = pd.read_csv("/gpfs/gibbs/project/cpsc452/cpsc452_jl4286/tcga_gbm_rna_seq_cases.csv")
submitter_ids = sorted(df_submitters["submitter_id"].tolist())

url = "https://api.gdc.cancer.gov/files"
filters = {
    "op": "and",
    "content": [
        {"op": "in", "content": {"field": "cases.project.project_id", "value": ["TCGA-GBM"]}},
        {"op": "in", "content": {"field": "cases.submitter_id", "value": submitter_ids}},
        {"op": "in", "content": {"field": "data_format", "value": ["SVS"]}}
    ]
}
params = {
    "filters": json.dumps(filters),
    "fields": "file_id,file_name,cases.submitter_id,data_format,access",
    "format": "JSON",
    "size": 10000
}

response = requests.get(url, params=params)
results = response.json()["data"]["hits"]

file_df = pd.DataFrame(results)
file_df = file_df.explode("cases")
file_df["submitter_id"] = file_df["cases"].apply(lambda x: x["submitter_id"] if isinstance(x, dict) else None)
file_df = file_df[file_df["file_name"].str.endswith(".svs")]
file_df = file_df[file_df["access"] == "open"]
file_df = file_df.sort_values("submitter_id").reset_index(drop=True)

print(f"\nTotal SVS files found: {len(file_df)}")
start_index = int(input("Enter start index (e.g., 0): "))
end_index = int(input("Enter end index (exclusive, e.g., 250): "))

selected_df = file_df.iloc[start_index:end_index].reset_index(drop=True)

desktop = Path.home() / "Desktop"
download_dir = desktop / f"image_svs_manual_{start_index}_{end_index}"
os.makedirs(download_dir, exist_ok=True)

for _, row in selected_df.iterrows():
    raw_name = row["file_name"]
    truncated_name = raw_name.split(".")[0] + ".svs"
    out_path = download_dir / truncated_name
    download_url = f"https://api.gdc.cancer.gov/data/{row['file_id']}"

    print(f"Downloading {row['submitter_id']} → {truncated_name}...")
    try:
        r = requests.get(download_url, stream=True, timeout=60)
        with open(out_path, 'wb') as f:
            for chunk in r.iter_content(chunk_size=8192):
                f.write(chunk)
    except Exception as e:
        print(f"Failed: {row['submitter_id']} — {str(e)}")