# Setup CC359 Dataset
Download the dataset from https://drive.google.com/drive/folders/0BxLb0NB2MjVZNm9JY1pWNFp6WTA?resourcekey=0-2sXMr8q-n2Nn6iY3PbBAdA&usp=sharing and place the contents in directory `data_path`.

Directory should have the following file structure:
- Hippocampus-masks (not required)
- Skull-stripping-masks
- WM-GM-CSF (not required)
- Original.zip

In [1]:
import pandas as pd
import glob
import shutil
import zipfile
from pathlib import Path

data_path = Path("/tmp/data/CC359-RAW")
output_path = Path("/tmp/data/CC359")
output_path.mkdir(parents=True, exist_ok=True)

vendors = {
    "philips_3": "Philips_3",
    "philips_15": "Philips_15",
    "siemens_3": "Siemens_3",
    "siemens_15": "Siemens_15",
    "ge_3": "GE_3",
    "ge_15": "GE_15",
}

In [2]:
# ----- Extract Original.zip and re-organize -----

with zipfile.ZipFile(data_path / "Original.zip", "r") as zip_file:
    zip_file.extractall(output_path)

for sub_vendor, vendor in vendors.items():
    vendor_path = output_path / "Original" / vendor
    vendor_path.mkdir(parents=True, exist_ok=True)

    for file_path in glob.iglob(str(output_path / "Original" / f"CC*_{sub_vendor}_*.nii.gz")):
        shutil.move(file_path, vendor_path)

In [3]:
# ----- Extract Silver-standard-STAPLE.zip and re-organize -----
silver_standard = "Silver-standard-machine-learning"

with zipfile.ZipFile(data_path / "Skull-stripping-masks" / f"{silver_standard}.zip", "r") as zip_file:
    zip_file.extractall(output_path)

for sub_vendor, vendor in vendors.items():
    vendor_path = output_path / "Silver-standard" / vendor
    vendor_path.mkdir(parents=True, exist_ok=True)

    for file_path in glob.iglob(str(output_path / "Silver-standard" / f"CC*_{sub_vendor}_*.nii.gz")):
        shutil.move(file_path, vendor_path)

In [4]:
# final cleanup
shutil.rmtree(output_path / "__MACOSX")

# Upload to W&B

In [5]:
import wandb
from pathlib import Path

In [6]:
run = wandb.init(project="UDA-Datasets")

artifact = wandb.Artifact("CC359-Skull-stripping", type="raw_data")
artifact.add_dir(output_path)
run.log_artifact(artifact)

wandb.finish()

[34m[1mwandb[0m: Currently logged in as: [33miserh[0m. Use [1m`wandb login --relogin`[0m to force relogin


[34m[1mwandb[0m: Adding directory to artifact (/tmp/data/CC359)... Done. 1.0s


VBox(children=(Label(value='4019.320 MB of 4019.320 MB uploaded (4019.313 MB deduped)\r'), FloatProgress(value…