# Data Preparation Notebook
This notebook downloads, extracts, and preprocesses the IXI T1 MRI dataset automatically.

In [None]:
%%bash
pip install requests nibabel tqdm

In [None]:
import os
import requests
import tarfile
from tqdm import tqdm
import nibabel as nib
import numpy as np

def download_file(url, dest_path):
    if os.path.exists(dest_path):
        print(f"Skipping download, {dest_path} already exists.")
        return
    os.makedirs(os.path.dirname(dest_path), exist_ok=True)
    resp = requests.get(url, stream=True)
    total = int(resp.headers.get('content-length', 0))
    with open(dest_path, 'wb') as f, tqdm(total=total, unit='B', unit_scale=True) as pbar:
        for chunk in resp.iter_content(chunk_size=1024):
            if chunk:
                f.write(chunk)
                pbar.update(len(chunk))

def extract_archive(archive_path, out_dir):
    if archive_path.endswith(('.tar.gz', '.tgz')):
        os.makedirs(out_dir, exist_ok=True)
        with tarfile.open(archive_path, 'r:gz') as tar:
            tar.extractall(path=out_dir)
        print(f"Extracted {archive_path} to {out_dir}")

def center_crop_3d(image, size=128):
    # Assumes image is a 3D numpy array
    z, y, x = image.shape
    cz, cy, cx = z // 2, y // 2, x // 2
    half = size // 2
    return image[cz-half:cz+half, cy-half:cy+half, cx-half:cx+half]

def preprocess_nifti(input_dir, output_dir, size=128):
    os.makedirs(output_dir, exist_ok=True)
    for fname in os.listdir(input_dir):
        if fname.endswith('.nii.gz'):
            path = os.path.join(input_dir, fname)
            img = nib.load(path).get_fdata().astype(np.float32)
            img = (img - img.mean()) / img.std()
            img_crop = center_crop_3d(img, size)
            out_path = os.path.join(output_dir, fname.replace('.nii.gz', '_proc.nii.gz'))
            nib.save(nib.Nifti1Image(img_crop, np.eye(4)), out_path)
            print(f"Preprocessed and saved {out_path}")


In [None]:
# Example: Download & preprocess a single IXI T1 volume
URL = 'https://brain-development.org/ixi-datasets/T1/IXI001-T1.nii.gz'
RAW_PATH = 'data/raw/IXI001-T1.nii.gz'
PROCESSED_DIR = 'data/processed/IXI'

download_file(URL, RAW_PATH)
# IXI T1 files are not in an archive, so skip extract_archive for .nii.gz
os.makedirs('data/raw', exist_ok=True)
preprocess_nifti('data/raw', PROCESSED_DIR)
