In [None]:
import gdown
import os
from tqdm import tqdm
import pandas as pd

from src.wsi_utils import load_wsi, convert_to_hsv, segment_tissue, extract_patches, save_patches

## Segmentation and Patching

In [None]:
data_csv = "mesothelioma_project/data/raw-data.csv"
output_dir = "mesothelioma_project/data/patches"

diagnosis_map = {"E": "epithelioid", "S": "sarcomatoid", "B": "biphasic"}

df = pd.read_csv(data_csv, delim_whitespace=True)

for _, row in tqdm(df.iterrows(), total=len(df)):
    filename = row['filename']
    gdrive_id = row['id']
    diagnosis_code = row['diagnosis']
    diagnosis_name = diagnosis_map[diagnosis_code]

    wsi_url = f"https://drive.google.com/uc?id={gdrive_id}"
    gdown.download(wsi_url, quiet=False)

    slide_id = os.path.splitext(filename)[0]
    slide_output_dir = os.path.join(output_dir, diagnosis_name, slide_id)

    # Load and process the WSI
    slide = load_wsi(filename)
    hsv_img = convert_to_hsv(slide)
    tissue_mask = segment_tissue(hsv_img)
    patches = extract_patches(slide)
    save_patches(patches, slide_output_dir)

In [None]:
from google.colab import files
import shutil

# Percorso della cartella da scaricare
folder_path = "mesothelioma_project/data/patches"

# Nome del file zip da creare
zip_filename = "patches.zip"

# Comprimi la cartella
shutil.make_archive(zip_filename.replace(".zip", ""), 'zip', folder_path)

# Scarica il file zip
files.download(zip_filename)