In [None]:
!git clone https://github.com/andreazenotto/tempProjectRepo.git

In [None]:
%%capture
!apt update && apt install -y openslide-tools
!pip install openslide-python

In [None]:
import gdown
import os
from tqdm import tqdm
import pandas as pd
import sys

sys.path.append('tempProjectRepo/mesothelioma_project/src')
from wsi_utils import load_wsi, extract_patches

## Show WSI-1 size

In [None]:
import openslide

wsi_url = f"https://drive.google.com/uc?id=18HQP1e3IhTMaAyonylWxyaZiRaH5-M9e"
gdown.download(wsi_url)

# Carica la WSI
slide = openslide.OpenSlide("M-1.ndpi")

# Ottieni le dimensioni di tutti i livelli
levels = slide.level_dimensions

# Mostra le dimensioni per ogni livello
for i, (width, height) in enumerate(levels):
    print(f"Level {i} dimensions: {width} x {height}")

In [None]:
import os
import numpy as np
import openslide
from skimage.color import rgb2hsv
from PIL import Image

# Load and process the WSI
slide = load_wsi("M-1.ndpi")
# extract_patches(slide, "test1", level=0)
region = slide.read_region((3808, 23296), 0, (224, 224)).convert("RGB")
region = np.array(region)

# Segment and create the patch mask
patch_hsv = rgb2hsv(region)
saturation = patch_hsv[:, :, 1]
print(saturation)
# Create a binary mask based on the saturation threshold
tissue_mask = saturation > 0.2

# Calculate the tissue coverage percentage in the patch
tissue_coverage = np.sum(tissue_mask) / (224 * 224)

print()
print(tissue_coverage)

## Segmentation and Patching

In [None]:
data_csv = "tempProjectRepo/mesothelioma_project/data/raw-data.csv"
output_dir = "tempProjectRepo/mesothelioma_project/data/patches"

diagnosis_map = {"E": "epithelioid", "S": "sarcomatoid", "B": "biphasic"}

df = pd.read_csv(data_csv, delimiter=r"\s+")
start_idx = 0
end_idx = len(df)

for _, row in tqdm(df.iloc[start_idx:end_idx].iterrows(), total=len(df)):
    filename = row['filename']
    gdrive_id = row['id']
    diagnosis_code = row['diagnosis']
    diagnosis_name = diagnosis_map[diagnosis_code]

    wsi_url = f"https://drive.google.com/uc?id={gdrive_id}"
    gdown.download(wsi_url, quiet=False)

    slide_id = os.path.splitext(filename)[0]
    slide_output_dir = os.path.join(output_dir, diagnosis_name, slide_id)

    # Load and process the WSI
    slide = load_wsi(filename)
    extract_patches(slide, slide_output_dir, level=0)

In [None]:
from google.colab import files
import shutil

# Percorso della cartella da scaricare
folder_path = "tempProjectRepo/mesothelioma_project/data/patches"

# Nome del file zip da creare
zip_filename = "patches.zip"

# Comprimi la cartella
shutil.make_archive(zip_filename.replace(".zip", ""), 'zip', folder_path)

# Scarica il file zip
files.download(zip_filename)