In [0]:
# --- CONFIGURATION ---
# The standard LUNA16 dataset is hosted on Zenodo (Record 3723295)
BASE_URL = "https://zenodo.org/records/3723295/files/"

# List of all files in the LUNA16 dataset
# WARNING: Downloading all of these will take ~60GB of disk space.
FILES_TO_DOWNLOAD = [
    "annotations.csv",
    "candidates.csv",
    "sampleSubmission.csv",
    "subset0.zip", 
    # Uncomment lines below to download the FULL dataset (Warning: Huge)
    # "subset1.zip",
    # "subset2.zip",
    # "subset3.zip",
    # "subset4.zip",
    # "subset5.zip",
    # "subset6.zip",
    # "subset7.zip",
    # "subset8.zip",
    # "subset9.zip",
]

OUTPUT_DIR = "/Volumes/ema_rina/pixels_solacc_tcia/pixels_volume/LUNA16/"

In [0]:
import os
import requests
import zipfile
from tqdm import tqdm

def download_file(url, dest_path):
    """
    Downloads a file with a progress bar.
    """
    if os.path.exists(dest_path):
        print(f"‚úÖ File already exists: {dest_path}")
        return

    print(f"‚¨áÔ∏è  Downloading: {url}")
    response = requests.get(url, stream=True)
    total_size = int(response.headers.get('content-length', 0))
    block_size = 1024 # 1 Kibibyte

    with open(dest_path, 'wb') as file, tqdm(
        desc=os.path.basename(dest_path),
        total=total_size,
        unit='iB',
        unit_scale=True,
        unit_divisor=1024,
    ) as bar:
        for data in response.iter_content(block_size):
            size = file.write(data)
            bar.update(size)

def extract_zip(zip_path, extract_to):
    """
    Unzips the file and then deletes the zip to save space (optional).
    """
    print(f"üì¶ Extracting: {zip_path}...")
    try:
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(extract_to)
        print(f"‚úÖ Extracted to {extract_to}")
        
        # Optional: Delete zip after extraction to save space
        # os.remove(zip_path) 
        # print(f"üóëÔ∏è  Deleted zip file: {zip_path}")
    except zipfile.BadZipFile:
        print(f"‚ùå Error: {zip_path} is corrupted. Please re-download.")


In [0]:
if not os.path.exists(OUTPUT_DIR):
  os.makedirs(OUTPUT_DIR)

print(f"üöÄ Starting LUNA16 Download to: {OUTPUT_DIR}")
print("Note: By default, this script only downloads 'subset0' (~6GB).")
print("Edit the 'FILES_TO_DOWNLOAD' list in the script to get the full dataset.\n")

for filename in FILES_TO_DOWNLOAD:
    # 1. Build URL and Path
    file_url = BASE_URL + filename
    dest_path = os.path.join(OUTPUT_DIR, filename)
    # 2. Download
    try:
        download_file(file_url, dest_path)
    except KeyboardInterrupt:
        print("\nüõë Download stopped by user.")
        break
    except Exception as e:
        print(f"\n‚ùå Error downloading {filename}: {e}")
        continue
    # 3. Extract (only for zips)
    if filename.endswith(".zip"):
        extract_zip(dest_path, OUTPUT_DIR)

print("\nüéâ Process Complete.")
print(f"Data is located in: {os.path.abspath(OUTPUT_DIR)}")

In [0]:
%sh
ls -alh /Volumes/ema_rina/pixels_solacc_tcia/pixels_volume/LUNA16/subset0/