In [2]:
# DRIVE MOUNT AND ENVIRONMENT VERIFICATION

import os
import sys

# --- Mount Google Drive ---
from google.colab import drive
drive.mount('/content/drive')
print("Google Drive mounted.")

# --- Verify project folder exists on Drive ---
PROJECT_ROOT = '/content/drive/MyDrive/kidney-tumour-detection'

required_folders = [
    'dataset/raw',
    'dataset/processed',
    'checkpoints',
    'logs',
    'outputs'
]

print("Verifying folder structure...")
all_good = True
for folder in required_folders:
  full_path = os.path.join(PROJECT_ROOT, folder)
  if os.path.exists(full_path):
    print(f" {folder}")
  else:
    print(f" {folder} missing, creating it now...")
    os.mkdirs(full_path, exist_ok = True)
    print(f" {folder} created!")
    all_good = False

if all_good:
  print("All folders have been verified")
else:
  print("Missing folders have been created")

# --- Check GPU availability ---
import subprocess
print("Checking GPU availability...")
try:
  gpu_info = subprocess.run(
      ['nvidia-smi'],
      capture_output=True,
      text=True
      )
  if gpu_info.returncode == 0:
          for line in gpu_info.stdout.split('\n'):
              if any(x in line for x in ['Tesla', 'A100', 'T4', 'V100', 'L4']):
                  print(f" GPU detected: {line.strip()}")
  else:
    print("nvidia-smi returned an error")
except FileNotFoundError:
    print("No GPU detected - please change runtime type")
    print("Go to: Runtime → Change runtime type → T4 GPU")
except Exception as e:
    print(f" GPU check failed: {e}")

# --- Check RAM ---
import psutil
ram = psutil.virtual_memory()
print(f"RAM available: {ram.available / (1024**3):.1f} gb"
      f"/ {ram.total / (1024**3):.1f} GB total")

# --- Check Runtime Disk Usage ---
disk = psutil.disk_usage('/')
print(f"Runtime disk: {disk.free / (1024**3):.1f} GB free "
      f"/ {disk.total / (1024**3):.1f} GB total")
print("Drive storage: 2TB (psutil cannot read network drives accurately)")
print("Verify manually at drive.google.com")

# --- Check Drive storage ---
drive_disk = psutil.disk_usage(PROJECT_ROOT)
print(f"Drive storage: {drive_disk.free / (1024**3):.1f} GB free "
      f"/ {drive_disk.total / (1024**3):.1f} GB total")

print("\n" + "="*50)
print("Session ready. Project root:", PROJECT_ROOT)



Mounted at /content/drive
Google Drive mounted.
Verifying folder structure...
 dataset/raw
 dataset/processed
 checkpoints
 logs
 outputs
All folders have been verified
Checking GPU availability...
 GPU detected: |   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
RAM available: 11.4 gb/ 12.7 GB total
Runtime disk: 194.1 GB free / 235.7 GB total
Drive storage: 2TB (psutil cannot read network drives accurately)
Verify manually at drive.google.com
Drive storage: 184.4 GB free / 235.7 GB total

Session ready. Project root: /content/drive/MyDrive/kidney-tumour-detection


In [3]:
# GITHUB REPOSITORY SYNC

import os

GITHUB_REPO = "https://github.com/danokundaye/kidney-tumour-detection.git"
REPO_NAME = "kidney-tumour-detection"
CLONE_PATH = f"/content/{REPO_NAME}"

# --- Clone or update repository ---
if os.path.exists(CLONE_PATH):
  print("Repository already exists, pulling latest changes...")
  os.chdir(CLONE_PATH)
  os.system("git pull origin main")
  print("Repository updated")
else:
  print("Cloning repository...")
  os.system(f" git clone {GITHUB_REPO} {CLONE_PATH}")
  print("Repository cloned")

# --- Add repo to Python path so we can import our modules ---
import sys
if CLONE_PATH not in sys.path:
    sys.path.insert(0, CLONE_PATH)
    print(f" Added {CLONE_PATH} to Python path")

print(f"\nWorking directory: {CLONE_PATH}")
print("GitHub sync complete")

Cloning repository...
Repository cloned
 Added /content/kidney-tumour-detection to Python path

Working directory: /content/kidney-tumour-detection
GitHub sync complete


In [4]:
# INSTALL REQUIRED LIBRARIES

print("Installing required libraries...")
print("This will take 3-5 minutes. Stay calm.")

import subprocess
import sys

def install(package):
  subprocess.check_call(
      [sys.executable, "-m", "pip", "install", package, "-q"],
      stdout=subprocess.DEVNULL,
      stderr=subprocess.DEVNULL
  )

# Verify PyTorch has already been installed
print(" Checking PyTorch...", end=" ")
try:
    import torch
    if torch.cuda.is_available():
        print(f"already installed (v{torch.__version__})")
    else:
        print("Installed but no CUDA - check runtime type")
except ImportError:
    print("Not found, installing...")
    subprocess.check_call(
        [sys.executable, "-m", "pip", "install",
         "torch", "torchvision", "-q"]
    )
    print("Installed")

libraries = [
    ("ultralytics",                               "YOLOv8"),
    ("segmentation-models-pytorch",               "U-Net with ResNet50"),
    ("monai",                                     "Medical imaging utilities"),
    ("nibabel",                                   "NIfTI file reading"),
    ("albumentations",                            "Data augmentation"),
    ("shap",                                      "Explainability"),
    ("opencv-python-headless",                    "Image processing"),
    ("scikit-learn",                              "Metrics"),
    ("matplotlib",                                "Matplotlib"),
    ("seaborn",                                   "Seaborn"),
    ("tqdm",                                      "Progress bars"),
]

for package, name in libraries:
  print(f" Installing {name}...", end=" ")
  try:
    install(package)
    print("installed")
  except Exception as e:
    print(f" Failed: {e}")

print("\nVerifying critical imports...")
verification = {
    "torch":                      "PyTorch",
    "torchvision":                "TorchVision",
    "ultralytics":                "YOLOv8",
    "segmentation_models_pytorch":"U-Net",
    "monai":                      "MONAI",
    "nibabel":                    "NiBabel",
    "albumentations":             "Albumentations",
    "shap":                       "SHAP",
    "cv2":                        "OpenCV",
    "sklearn":                    "Scikit-learn",
}

all_imported = True
for module, name in verification.items():
  try:
    __import__(module)
    print(f" {name}")
  except ImportError:
    print(f" {name} - FAILED TO IMPORT")
    all_imported = False

if all_imported:
    print("\nAll libraries installed and verified")
else:
    print("\nSome libraries failed - rerun this cell")

# Verify PyTorch access to GPU
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"VRAM: {torch.cuda.get_device_properties(0).total_memory / (1024**3):.1f} GB")
else:
    print("CUDA not available - check runtime type")

Installing required libraries...
This will take 3-5 minutes. Stay calm.
 Checking PyTorch... already installed (v2.9.0+cu128)
 Installing YOLOv8... installed
 Installing U-Net with ResNet50... installed
 Installing Medical imaging utilities... installed
 Installing NIfTI file reading... installed
 Installing Data augmentation... installed
 Installing Explainability... installed
 Installing Image processing... installed
 Installing Metrics... installed
 Installing Matplotlib... installed
 Installing Seaborn... installed
 Installing Progress bars... installed

Verifying critical imports...
 PyTorch
 TorchVision
Creating new Ultralytics Settings v0.0.6 file ✅ 
View Ultralytics Settings with 'yolo settings' or at '/root/.config/Ultralytics/settings.json'
Update Settings with 'yolo settings key=value', i.e. 'yolo settings runs_dir=path/to/dir'. For help see https://docs.ultralytics.com/quickstart/#ultralytics-settings.
 YOLOv8
 U-Net




 MONAI
 NiBabel
 Albumentations
 SHAP
 OpenCV
 Scikit-learn

All libraries installed and verified
GPU: Tesla T4
VRAM: 14.6 GB


In [5]:
# Clone official KiTS21 Repository

import os

# Define paths for download
DRIVE_PROJECT = "/content/drive/MyDrive/kidney-tumour-detection"
DATASET_RAW = os.path.join(DRIVE_PROJECT, "dataset", "raw")
KITS_REPO = "/content/kits21" # Temporary code storage

# Clone the KiTS21 repository into KITS_REPO
if not os.path.exists(KITS_REPO):
  !git clone https://github.com/neheller/kits21.git /content/kits21
  print("KiTS21 repository successfully cloned!")
else:
  print("KiTS21 repository already exists")

# Install package
%cd /content/kits21
!pip install -e . -q
print("KiTS21 package installed")

Cloning into '/content/kits21'...
remote: Enumerating objects: 87173, done.[K
remote: Counting objects: 100% (1155/1155), done.[K
remote: Compressing objects: 100% (570/570), done.[K
remote: Total 87173 (delta 556), reused 1109 (delta 545), pack-reused 86018 (from 1)[K
Receiving objects: 100% (87173/87173), 1.89 GiB | 19.18 MiB/s, done.
Resolving deltas: 100% (33701/33701), done.
Updating files: 100% (18187/18187), done.
KiTS21 repository successfully cloned!
/content/kits21
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.0/77.0 kB[0m [31m4.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m156.3/156.3 kB[0m [31m13.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.6/52.6 MB[0m [31m18.4 MB/s[0m eta [

In [6]:
import os

# Confirm number of cases
cases = [item for item in os.listdir("/content/kits21/kits21/data") if item.startswith("case_")]
print(f"Total cases found: {len(cases)} \n")

# Confirm case contents
sample_case = "/content/kits21/kits21/data/case_00000"
for item in os.listdir(sample_case):
    print(f"{item}")

# Confirm raw folder contents
print("\n--- raw folder ---")
raw_path = os.path.join(sample_case, "raw")
for item in os.listdir(raw_path):
    print(item)

# Confirm segmentation folder contents
print("\n--- segmentations folder ---")
seg_path = os.path.join(sample_case, "segmentations")
for item in os.listdir(seg_path):
    print(item)

download_path = "/content/kits21/kits21/data"
# Check if there's a download script
for root, dirs, files in os.walk("/content/kits21/kits21"):
    for file in files:
        if "download" in file.lower():
            print(os.path.join(root, file))

Total cases found: 300 

aggregated_OR_seg.nii.gz
aggregated_AND_seg.nii.gz
raw
aggregated_MAJ_seg.nii.gz
segmentations

--- raw folder ---
tumor
ureter
kidney
artery
vein
meta.json
full

--- segmentations folder ---
kidney_instance-1_annotation-2.nii.gz
kidney_instance-1_annotation-3.nii.gz
kidney_instance-2_annotation-2.nii.gz
kidney_instance-2_annotation-1.nii.gz
tumor_instance-1_annotation-3.nii.gz
tumor_instance-1_annotation-1.nii.gz
kidney_instance-1_annotation-1.nii.gz
tumor_instance-1_annotation-2.nii.gz
kidney_instance-2_annotation-3.nii.gz


In [7]:
# Redirect TRAINING_DIR to Google Drive to save downloads permanently
paths_file = "/content/kits21/kits21/configuration/paths.py"

new_content = '''from pathlib import Path
import os

# Redirected to Google Drive for permanent storage
TRAINING_DIR = Path("/content/drive/MyDrive/kidney-tumour-detection/dataset/raw")
TESTING_DIR = Path(os.environ["KITS21_TEST_DIR"]).resolve(strict=True) if "KITS21_TEST_DIR" in os.environ.keys() else None
SRC_DIR = Path(os.environ["KITS21_SERVER_DATA"]).resolve(strict=True) if "KITS21_SERVER_DATA" in os.environ.keys() else None
CACHE_FILE = Path(__file__).parent.parent / "annotation" / "cache.json"
'''

with open(paths_file, "w") as f:
    f.write(new_content)

print("TRAINING_DIR redirected to Drive")

# Verify the change
with open(paths_file, "r") as f:
    print(f.read())

TRAINING_DIR redirected to Drive
from pathlib import Path
import os

# Redirected to Google Drive for permanent storage
TRAINING_DIR = Path("/content/drive/MyDrive/kidney-tumour-detection/dataset/raw")
TESTING_DIR = Path(os.environ["KITS21_TEST_DIR"]).resolve(strict=True) if "KITS21_TEST_DIR" in os.environ.keys() else None
SRC_DIR = Path(os.environ["KITS21_SERVER_DATA"]).resolve(strict=True) if "KITS21_SERVER_DATA" in os.environ.keys() else None
CACHE_FILE = Path(__file__).parent.parent / "annotation" / "cache.json"



In [8]:
# Copy existing case folders in Temporary Storage to Google Drive

import shutil
import os
from tqdm import tqdm

SOURCE_DIR = "/content/kits21/kits21/data"
DEST_DIR = "/content/drive/MyDrive/kidney-tumour-detection/dataset/raw"

# Get all case folders
cases = sorted([c for c in os.listdir(SOURCE_DIR) if c.startswith("case_")])
print(f"Cases to copy: {len(cases)}")

for case in tqdm(cases, desc="Copying cases to Drive"):
    src = os.path.join(SOURCE_DIR, case)
    dst = os.path.join(DEST_DIR, case)

    # Only copy if not already in Drive
    if not os.path.exists(dst):
      shutil.copytree(src, dst)

print(f"\n All cases copied to Drive")
print(f"Contents of Drive raw folder:")
print(len(os.listdir(DEST_DIR)), "items")

Cases to copy: 300


Copying cases to Drive: 100%|██████████| 300/300 [00:00<00:00, 3031.68it/s]


 All cases copied to Drive
Contents of Drive raw folder:
300 items





In [9]:
# Verify all 300 cases and their contents are on Drive

import os

DEST_DIR = "/content/drive/MyDrive/kidney-tumour-detection/dataset/raw"

# Count case folders
cases_on_drive = sorted([
    item for item in os.listdir(DEST_DIR)
    if item.startswith("case_")
])

print(f"Total case folders on Drive: {len(cases_on_drive)}")
print(f"First case: {cases_on_drive[0]}")
print(f"Last case: {cases_on_drive[-1]}")

# Also verify case_00000 has its contents
sample = os.path.join(DEST_DIR, "case_00000")
print(f"\nContents of case_00000 on Drive:")
for item in os.listdir(sample):
    print(f"  {item}")

Total case folders on Drive: 300
First case: case_00000
Last case: case_00299

Contents of case_00000 on Drive:
  aggregated_OR_seg.nii.gz
  aggregated_MAJ_seg.nii.gz
  segmentations
  raw
  aggregated_AND_seg.nii.gz


In [11]:
# Download CT Scans to Drive

import os
import sys
sys.path.insert(0, "/content/kits21")

from pathlib import Path
from kits21.configuration.paths import TRAINING_DIR
import requests
import shutil
from tqdm import tqdm

# Verify TRAINING_DIR destination in Drive
print(f"Download destination: {TRAINING_DIR}")
assert "drive" in str(TRAINING_DIR), "TRAINING_DIR is not pointing to Drive! Stop and fix this."

imaging_url = "https://kits19.sfo2.digitaloceanspaces.com/"
imaging_name_tmplt =  "master_{:05d}.nii.gz"
temp_f = Path("/content/temp.tmp")

def get_destination(i):
    return TRAINING_DIR / "case_{:05d}".format(i) / "imaging.nii.gz"

def download_case(cid):
   remote_name = imaging_name_tmplt.format(cid)
   url = imaging_url + remote_name
   dst = get_destination(cid)
   try:
    with requests.get(url, stream = True) as r:
      r.raise_for_status()
      with temp_f.open('wb') as f:
        shutil.copyfileobj(r.raw, f)
    shutil.move(str(temp_f), str(dst))
    return True
   except Exception as e:
    if temp_f.exists():
      temp_f.unlink()
    print(f"\n Case {cid:05d} failed: {e}")
    return False

# Find cases still needing download
left_to_download = []
for i in range(300):
  dst = get_destination(i)
  if not dst.exists():
    left_to_download.append(i)

print(f"Cases already downloaded: {300 - len(left_to_download)}")
print(f"Cases remaining: {len(left_to_download)}")
print(f"Starting download...\n")

failed = []
for i, cid in enumerate(tqdm(left_to_download, desc="Downloading CT scans")):
    success = download_case(cid)
    if not success:
        failed.append(cid)

print(f"\n Download complete")
print(f"Successful: {len(left_to_download) - len(failed)}")
print(f"Failed: {len(failed)}")
if failed:
    print(f"Failed cases: {failed}")



Download destination: /content/drive/MyDrive/kidney-tumour-detection/dataset/raw
Cases already downloaded: 0
Cases remaining: 300
Starting download...



Downloading CT scans: 100%|██████████| 300/300 [10:46<00:00,  2.15s/it]


 Download complete
Successful: 300
Failed: 0



