In [None]:
from instanseg.utils.data_download import create_raw_datasets_dir, create_processed_datasets_dir, download_and_extract
from pathlib import Path
import numpy as np
import fastremap
import os
import xml.etree.ElementTree as ET
from tqdm import tqdm
import torch

import matplotlib.pyplot as plt

from PIL import Image, ImageDraw

from instanseg.utils.utils import show_images, _move_channel_axis
#aws s3 cp --no-sign-request s3://monkey-training/ ./ --recursive
monkey_dir = Path("../Raw_Datasets/Monkey")

files = sorted(os.listdir(os.path.join(monkey_dir ,"annotations","xml")))

label_ids = []
means_list = []
annotations_dict = {}


np.random.seed(0)

for file in tqdm(files):

    split = np.random.choice(["train", "val"], p=[0.8, 0.2])
 
    img_pascpg_path = Path(monkey_dir) / ("images/pas-cpg/" + file.split(".")[0] + "_PAS_CPG.tif")
    img_pasdiagnostic_path = Path(monkey_dir) / ("images/pas-diagnostic/" + file.split(".")[0] + "_PAS_Diagnostic.tif")
   # img_pasoriginal_path = Path(monkey_dir) / ("images/pas-original/" + file.split(".")[0] + "_PAS_Original.tif")
    ihc_path = Path(monkey_dir) / ("images/ihc/" + file.split(".")[0] + "_IHC_CPG.tif")
    
    from tiffslide import TiffSlide
    slidepascpg = TiffSlide(img_pascpg_path)
    slideihc = TiffSlide(ihc_path)


    tree = ET.parse(monkey_dir/("annotations/xml/"+file))
    root = tree.getroot()  # Get the root of the XML




    if split == "val":
        destination_img = "/home/cdt/Documents/Projects/monkey-challenge-instanseg/evaluation/validation_set/images/kidney-transplant-biopsy-wsi-pas/"
        destination_mask = "/home/cdt/Documents/Projects/monkey-challenge-instanseg/evaluation/validation_set/images/tissue-mask/"
        
        #move images to inference folder
        import shutil
        shutil.copy(monkey_dir / ("images/pas-cpg/" + file.split(".")[0] + "_PAS_CPG.tif"), destination_img)
        shutil.copy(monkey_dir / ("images/tissue-masks/" + file.split(".")[0] + "_mask.tif"), destination_mask)
        
        shutil.copy(monkey_dir / ("annotations/json/" + file.split(".")[0] + "_inflammatory-cells.json"), 
        '/home/cdt/Documents/Projects/monkey-challenge-instanseg/evaluation/ground_truth')

        shutil.copy(monkey_dir / ("annotations/json/" + file.split(".")[0] + "_lymphocytes.json"), 
        '/home/cdt/Documents/Projects/monkey-challenge-instanseg/evaluation/ground_truth')

        shutil.copy(monkey_dir / ("annotations/json/" + file.split(".")[0] + "_monocytes.json"), 
        '/home/cdt/Documents/Projects/monkey-challenge-instanseg/evaluation/ground_truth')

    coords = []

    annotations_dict[file] = []

    # Iterate over each annotation and extract relevant information
    for annotation in root.findall('.//Annotation'):
        name = annotation.get('Name')
        part_of_group = annotation.get('PartOfGroup')
        _type = annotation.get('Type')
      
        if _type == "Polygon":
            coords_ROI = []
            for coordinate in annotation.findall('.//Coordinate'):
                x = float(coordinate.get('X'))
                y = float(coordinate.get('Y'))
                coords_ROI.append([x, y])

            coords_ROI = np.array(coords_ROI)

            x_min, y_min = coords_ROI.min(axis=0)
            x_max, y_max = coords_ROI.max(axis=0)
            bbox_width = int(x_max - x_min)
            bbox_height = int(y_max - y_min)

            # Read the bounding box from the slide
            rgb_data = slidepascpg.read_region(
                (int(x_min), int(y_min)),
                0,
                (bbox_width, bbox_height),
                as_array=True,
            )

            ihc_data = slideihc.read_region(
                (int(x_min), int(y_min)),
                0,
                (bbox_width, bbox_height),
                as_array=True,
            )


            mask = Image.new("L", (bbox_width, bbox_height), 0)
            polygon = coords_ROI - [x_min, y_min]  # Translate polygon to local bbox coordinates
            ImageDraw.Draw(mask).polygon(polygon.flatten().tolist(), outline=1, fill=1)
            # Convert the mask to a NumPy array
            binary_mask = np.array(mask)

            annotations_dict[file].append({ "split": split,
                                            "pas-cpg":rgb_data,
                                            "ihc":ihc_data,
                                            "polygon": coords_ROI, 
                                            "mask": binary_mask, 
                                            "bbox" : [x_min, y_min, x_max, y_max], 
                                            "dots" : []})

            #show_images(rgb_data)

    for annotation in root.findall('.//Annotation'):
        name = annotation.get('Name')
        part_of_group = annotation.get('PartOfGroup')
        _type = annotation.get('Type')
        
        if _type == "Dot":
            # Find the coordinates
            coordinates = annotation.find('.//Coordinate')
            x = int(float(coordinates.get('X')))
            y = int(float(coordinates.get('Y')))
            c = 0 if part_of_group == "lymphocytes" else 1

            for i,annotation in enumerate(annotations_dict[file]):
                if annotation["bbox"][0] < x < annotation["bbox"][2] and annotation["bbox"][1] < y < annotation["bbox"][3]:
                    annotations_dict[file][i]["dots"].append([y - annotation["bbox"][1] ,x - annotation["bbox"][0],c])

    
                        
                  


In [None]:
leukocytes_dots = 0
detected_leukocytes = 0

def normalise_HE(x):
    import torch
    import torchstain
    from instanseg.utils.utils import _move_channel_axis

    x = _to_tensor_float32(x)
    device = x.device
    normalizer = torchstain.normalizers.MacenkoNormalizer(backend='torch')
    normalizer.maxCRef = normalizer.maxCRef.to(device)
    normalizer.HERef = normalizer.HERef.to(device)
    norm = normalizer.normalize(I=x, stains=False, Io = 240, beta = 0.01)
    norm = torch.clamp(norm[0], 0, 255)
    norm = _move_channel_axis(norm)
    return norm

import os
from instanseg.utils.pytorch_utils import get_masked_patches
from instanseg.instanseg import _to_tensor_float32, _rescale_to_pixel_size
import torchstain
from instanseg import InstanSeg

os.environ["INSTANSEG_BIOIMAGEIO_PATH"] = '/home/cdt/Documents/Projects/InstanSeg/instanseg_thibaut/instanseg/bioimageio_models/'
os.environ['INSTANSEG_DATASET_PATH'] = "../datasets/"

brightfield_nuclei = InstanSeg("brightfield_v2", verbosity = 0)

patch_size = 128
destination_pixel_size = 0.5 # 2420
rescale_output = False if destination_pixel_size == 0.5 else True

image_types  = ["cpg"]#, "ihc"]

for image_type in image_types:

  if image_type == "cpg":
    image_key  = "pas-cpg"
  else:
    image_key = "ihc"


  device = "cpu"

  np.random.seed(0)


  import h5py
  with h5py.File(Path(os.environ['INSTANSEG_DATASET_PATH']) / f"monkey_{image_type}_gold_norm.h5", "w") as f:

      f.attrs['class_names'] = str({"0": "lymphocytes", "1": "monocytes", "2" : "other"})  # Convert to string since HDF5 attributes must be simple types
      f.attrs['pixel_size'] = destination_pixel_size

      for split in ['train', 'val']:
          f.create_dataset(f"{split}/data", shape=(0, 4, patch_size, patch_size),
          dtype=np.uint8, maxshape=(None, 4, patch_size, patch_size),
          chunks=(1, 4, patch_size, patch_size),)
          f.create_dataset(f"{split}/labels", shape=(0, 1), dtype=np.uint8, maxshape=(None, 1))


      for file in tqdm(annotations_dict.keys()):
          split = annotations_dict[file][0]["split"]

          for annotation in annotations_dict[file]:

              try:

                array = normalise_HE(annotation["pas-cpg"]).float()

              except:
                array = _to_tensor_float32(annotation["pas-cpg"])

              

      
              labels , input_tensor = brightfield_nuclei.eval_medium_image(array,
              pixel_size = 0.2420, rescale_output = rescale_output, seed_threshold = 0.05, tile_size= 1024)

              dots = torch.tensor(annotation["dots"]).to(device)
              dots[:,:2] = dots[:,:2] * 0.2420 / destination_pixel_size

              mask = _rescale_to_pixel_size(_to_tensor_float32(annotation["mask"]), 0.2420, destination_pixel_size).to(device)
              
              labels = labels.to(device) * torch.tensor(mask).bool()
              canvas = torch.zeros_like(labels)
              dots = torch.tensor(dots, dtype=torch.long)
              canvas[:,:,dots[:,0],dots[:,1]] = dots[:,2].float() + 1
              monocytes = labels * torch.isin(labels,labels * (canvas == 2).float()).float()
              lymphocytes = labels * torch.isin(labels,labels * (canvas == 1).float()).float()
              other_cells = (labels * ~torch.isin(labels,labels * (canvas > 0).float())).float()

              img_tensor = _rescale_to_pixel_size(_to_tensor_float32(annotation[image_key]), 0.2420, destination_pixel_size).byte().to(device)

              img_tensor = normalise_HE(img_tensor)


              assert img_tensor.shape[-2:] == labels.shape[-2:]
              detected_leukocytes += len(torch.unique(monocytes + lymphocytes)) - 1
              leukocytes_dots += len(dots)


              if len(torch.unique(monocytes)) > 1:
                crops,masks = get_masked_patches(monocytes,img_tensor, patch_size=patch_size)
                crops = (crops).to(torch.uint8)
                masks = (masks).to(torch.uint8)
                x_monocytes =(torch.cat((crops,masks),dim= 1))
                y_monocytes = torch.zeros(len(x_monocytes),dtype = torch.long) + 1
              else:
                x_monocytes = torch.zeros(0,4,patch_size,patch_size).to(device)
                y_monocytes = torch.zeros(0,dtype = torch.long) + 1


              if len(torch.unique(lymphocytes)) > 1:
                crops,masks = get_masked_patches(lymphocytes,img_tensor, patch_size=patch_size)
                crops = (crops).to(torch.uint8)
                masks = (masks).to(torch.uint8)
                x_lymphocytes =(torch.cat((crops,masks),dim= 1))
                y_lymphocytes = torch.zeros(len(x_lymphocytes),dtype = torch.long) + 0
              else:
                x_lymphocytes = torch.zeros(0,4,patch_size,patch_size).to(device)
                y_lymphocytes = torch.zeros(0,dtype = torch.long) + 0

              if len(torch.unique(other_cells)) > 1:
                crops,masks = get_masked_patches(other_cells,img_tensor, patch_size=patch_size)
                crops = (crops).to(torch.uint8)
                masks = (masks).to(torch.uint8)
                x_other =(torch.cat((crops,masks),dim= 1))
                y_other = torch.zeros(len(x_other),dtype = torch.long) + 2
              else:
                x_other = torch.zeros(0,4,patch_size,patch_size).to(device)
                y_other = torch.zeros(0,dtype = torch.long) + 2

              x = torch.cat((x_monocytes,x_lymphocytes,x_other),dim = 0)
              y = torch.cat((y_monocytes,y_lymphocytes,y_other),dim = 0).numpy()[:,None]

              if len(x) != len(y):
                    pdb.set_trace()

              data_ds = f[f"{split}/data"]
              labels_ds = f[f"{split}/labels"]

              data_ds.resize((data_ds.shape[0] + x.shape[0],) + x.shape[1:])
              data_ds[-x.shape[0]:, ...] = (x).cpu().numpy().astype(np.uint8)
              labels_ds.resize((labels_ds.shape[0] + y.shape[0],) + y.shape[1:])
              labels_ds[-y.shape[0]:, ...] = y.astype(np.uint8)

          

  undetected_percent = ( leukocytes_dots - detected_leukocytes) / leukocytes_dots
  print(f"Detected {detected_leukocytes} out of {leukocytes_dots} dots. { 100 - undetected_percent * 100:.2f}% detected")
      

In [None]:
array = _to_tensor_float32(array).to(device)

In [None]:
array.dtype

In [None]:
from pathlib import Path
import numpy as np
import fastremap
import os
from tqdm import tqdm
import torch
import matplotlib.pyplot as plt
from instanseg.utils.utils import show_images, _move_channel_axis
from tiling import get_random_non_empty_tiles
from tiffslide import TiffSlide

os.environ["INSTANSEG_BIOIMAGEIO_PATH"] = '/home/cdt/Documents/Projects/InstanSeg/instanseg_thibaut/instanseg/bioimageio_models/'
os.environ['INSTANSEG_DATASET_PATH'] = "../datasets/"


#os.environ['INSTANSEG_DATASET_PATH'] = "/run/user/1000/gvfs/smb-share:server=cmvm.datastore.ed.ac.uk,share=igmm/bankhead-lab/thibaut_goldsborough/processed_datasets/"


from instanseg.instanseg import InstanSeg, _rescale_to_pixel_size, _to_tensor_float32, to_ndim
from instanseg.utils.pytorch_utils import get_masked_patches
brightfield_nuclei = InstanSeg("brightfield_v2", verbosity = 0)

import os
os.environ["INSTANSEG_OUTPUT_PATH"] = "../outputs/"
from utils import get_classifier
classifier = get_classifier("1922985").to("cuda").eval()

classifier_he = get_classifier("test_0").to("cuda").eval()

import ttach as tta
transforms = tta.Compose(
    [
        tta.HorizontalFlip(),
        tta.Rotate90(angles=[0, 180]),  
    ]
)
tta_classifier = tta.ClassificationTTAWrapper(classifier, transforms, merge_mode='mean').eval()


patch_size = 128
destination_pixel_size = 0.5
normalise = True

device = "cpu"

monkey_dir = Path("../Raw_Datasets/Monkey")
files = os.listdir(os.path.join(monkey_dir ,"annotations","xml"))


def normalise_HE(x):
    import torch
    import torchstain
    from instanseg.utils.utils import _move_channel_axis
    device = x.device
    normalizer = torchstain.normalizers.MacenkoNormalizer(backend='torch')
    normalizer.maxCRef = normalizer.maxCRef.to(device)
    normalizer.HERef = normalizer.HERef.to(device)
    norm = normalizer.normalize(I=x, stains=False, Io = 240, beta = 0.01)
    norm = torch.clamp(norm[0], 0, 255)
    norm = _move_channel_axis(norm)
    return norm

np.random.seed(0)
import h5py
with h5py.File(Path(os.environ['INSTANSEG_DATASET_PATH']) / "monkey_cpg_silver.h5", "w") as f:

    f.attrs['class_names'] = str({"0": "lymphocytes", "1": "monocytes", "2" : "other"})  # Convert to string since HDF5 attributes must be simple types
    f.attrs['pixel_size'] = destination_pixel_size

    for split in ['train', 'val']:
        f.create_dataset(f"{split}/data", shape=(0, 4, patch_size, patch_size), 
                        dtype=np.uint8, maxshape=(None, 4, patch_size, patch_size), 
                        chunks=(1, 4, patch_size, patch_size),
                      #  compression = "lzf",
        )

        f.create_dataset(f"{split}/labels", shape=(0, 1), dtype=np.uint8, maxshape=(None, 1))


    for file in tqdm(files):

        split = annotations_dict[file][0]["split"]

        img_pascpg_path = Path(monkey_dir) / ("images/pas-cpg/" + file.split(".")[0] + "_PAS_CPG.tif")
        ihc_path = Path(monkey_dir) / ("images/ihc/" + file.split(".")[0] + "_IHC_CPG.tif")
        
        slidepascpg = TiffSlide(img_pascpg_path)
        slideihc = TiffSlide(ihc_path)

        tiles_he,tiles_ihc = get_random_non_empty_tiles(slidepascpg,slideihc, num_images=1000, tile_size=1024) #400


        for tile_he,tile_ihc in zip(tiles_he,tiles_ihc):


           # show_images(tile_he,tile_ihc,labels)

                        
            labels , input_tensor = brightfield_nuclei.eval_small_image(tile_he,
            pixel_size = 0.2420, rescale_output = False, seed_threshold = 0.05)

            ihc_tensor = _rescale_to_pixel_size(_to_tensor_float32(tile_ihc), 0.2420, destination_pixel_size).byte().to(device)

            he_tensor = _rescale_to_pixel_size(_to_tensor_float32(tile_he), 0.2420, destination_pixel_size).byte().to(device)

            if normalise:
                he_tensor = normalise_HE(he_tensor.to("cuda").to(device)),

            

            if labels.sum() == 0:
                continue

            assert ihc_tensor.shape[-2:] == he_tensor.shape[-2:]
            assert ihc_tensor.shape[-2:] == labels.shape[-2:]

            crops,masks = get_masked_patches(labels.to(device),ihc_tensor, patch_size=patch_size)
            crops = (crops) / 255
            masks = (masks)
            x_ihc =(torch.cat((crops,masks),dim= 1))

            crops,masks = get_masked_patches(labels.to(device),he_tensor, patch_size=patch_size)
            crops = (crops).to(torch.uint8)
            masks = (masks).to(torch.uint8)
            x =(torch.cat((crops,masks),dim= 1)).cpu().numpy().astype(np.uint8)

            with torch.no_grad():
                batch_size = 128
               # y_hat_he = torch.cat([classifier_he.forward(x[i:i+batch_size].float().to("cuda")) for i in range(0,len(x_ihc),batch_size)],dim = 0)
               # y_hat_he = y_hat_he.argmax(dim = 1).cpu()

                y_hat= torch.cat([tta_classifier.forward(x_ihc[i:i+batch_size].float().to("cuda")) for i in range(0,len(x_ihc),batch_size)],dim = 0)
                y_hat = y_hat.argmax(dim = 1).cpu()

          
            # show_images(*x_ihc[y_hat == 1][:8,:3],n_cols = 8)
            # show_images(*x_ihc[y_hat == 0][:8,:3],n_cols = 8)

          #  1/0
            
            y = y_hat.numpy()[:,None]

            unique, counts = np.unique(y, return_counts=True)
            min_count = counts.min()
            y_subset = np.concatenate([y[y == i][:min_count + 10] for i in range(3)])
            x_subset = np.concatenate([x[(y == i).squeeze()][:min_count + 10] for i in range(3)])


            if x_subset.ndim == 5:
                x_subset = x_subset[0]
            x = x_subset
            y = y_subset[:,None]

            data_ds = f[f"{split}/data"]
            labels_ds = f[f"{split}/labels"]

            data_ds.resize((data_ds.shape[0] + x.shape[0],) + x.shape[1:])
            data_ds[-x.shape[0]:, ...] = x
            labels_ds.resize((labels_ds.shape[0] + y.shape[0],) + y.shape[1:])
            labels_ds[-y.shape[0]:, ...] = y.astype(np.uint8)


            

            # except Exception as e:
            #     print(e)
            #     continue

        





In [None]:
def normalise_HE(x):
    import torch
    import torchstain
    normalizer = torchstain.normalizers.MacenkoNormalizer(backend='torch')
    normalizer.maxCRef = normalizer.maxCRef.to("cuda")
    normalizer.HERef = normalizer.HERef.to("cuda")
    norm = normalizer.normalize(I=x, stains=False, Io = 240, beta = 0.01)
    norm = torch.clamp(norm[0], 0, 255)
    return norm

show_images(normalise_HE(he_tensor.to("cuda")))

In [None]:
from pathlib import Path
import numpy as np
import fastremap
import os
from tqdm import tqdm
import torch
import matplotlib.pyplot as plt
from instanseg.utils.utils import show_images, _move_channel_axis
from tiling import get_random_non_empty_tiles
from tiffslide import TiffSlide
import zarr

def normalise_HE(x):
    import torch
    import torchstain

    normalizer = torchstain.normalizers.MacenkoNormalizer(backend='torch')
    norm = normalizer.normalize(I=x, stains=False)
    norm = torch.clamp(norm[0], 0, 255)

    return norm


os.environ["INSTANSEG_BIOIMAGEIO_PATH"] = '/home/cdt/Documents/Projects/InstanSeg/instanseg_thibaut/instanseg/bioimageio_models/'
os.environ['INSTANSEG_DATASET_PATH'] = "../datasets/"

from instanseg.instanseg import InstanSeg, _rescale_to_pixel_size, _to_tensor_float32, to_ndim
from instanseg.utils.pytorch_utils import get_masked_patches
brightfield_nuclei = InstanSeg("brightfield_v2", verbosity=0)

os.environ["INSTANSEG_OUTPUT_PATH"] = "../outputs/"
from utils import get_classifier
classifier = get_classifier("1922985").to("cuda").eval()

classifier_he = get_classifier("test_0").to("cuda").eval()

import ttach as tta
transforms = tta.Compose(
    [
        tta.HorizontalFlip(),
        tta.Rotate90(angles=[0, 180]),
    ]
)
tta_classifier = tta.ClassificationTTAWrapper(classifier, transforms, merge_mode='mean').eval()

patch_size = 128
destination_pixel_size = 0.5

device = "cpu"

monkey_dir = Path("../Raw_Datasets/Monkey")
files = os.listdir(os.path.join(monkey_dir, "annotations", "xml"))

np.random.seed(0)

# Create Zarr store
dataset_path = Path(os.environ['INSTANSEG_DATASET_PATH']) / "monkey_cpg_silver_del.zarr"
root = zarr.open(dataset_path, mode="w")

# Add metadata
root.attrs['class_names'] = {"0": "lymphocytes", "1": "monocytes", "2": "other"}
root.attrs['pixel_size'] = destination_pixel_size

# Create train/val datasets
for split in ['train', 'val']:
    root.create_dataset(f"{split}/data",
                        shape=(0, 4, patch_size, patch_size),
                        chunks=(1, 4, patch_size, patch_size),
                        dtype=np.uint8,
                        compressor=zarr.Blosc(cname='zstd', clevel=5, shuffle=1))  # Compression
    root.create_dataset(f"{split}/labels",
                        shape=(0, 1),
                        chunks=(1, 1),
                        dtype=np.uint8)

# Processing images
for file in tqdm(files):
    split = annotations_dict[file][0]["split"]

    img_pascpg_path = Path(monkey_dir) / ("images/pas-cpg/" + file.split(".")[0] + "_PAS_CPG.tif")
    ihc_path = Path(monkey_dir) / ("images/ihc/" + file.split(".")[0] + "_IHC_CPG.tif")

    slidepascpg = TiffSlide(img_pascpg_path)
    slideihc = TiffSlide(ihc_path)

    tiles_he, tiles_ihc = get_random_non_empty_tiles(slidepascpg, slideihc, num_images=1, tile_size=1024)

    for tile_he, tile_ihc in zip(tiles_he, tiles_ihc):
        labels, input_tensor = brightfield_nuclei.eval_small_image(tile_he,
                                                                   pixel_size=0.2420,
                                                                   rescale_output=False,
                                                                   seed_threshold=0.05)

        ihc_tensor = _rescale_to_pixel_size(_to_tensor_float32(tile_ihc), 0.2420, destination_pixel_size).byte().to(device)
        he_tensor = _rescale_to_pixel_size(_to_tensor_float32(tile_he), 0.2420, destination_pixel_size).byte().to(device)

        he_tensor = normalise_HE(he_tensor).byte()

      #  show_images(normalise_HE(he_tensor),he_tensor.byte())

        if labels.sum() == 0:
            continue

        assert ihc_tensor.shape[-2:] == he_tensor.shape[-2:]
        assert ihc_tensor.shape[-2:] == labels.shape[-2:]

        crops, masks = get_masked_patches(labels.to(device), ihc_tensor, patch_size=patch_size)
        crops = (crops) / 255
        masks = (masks)
        x_ihc = (torch.cat((crops, masks), dim=1))

        crops, masks = get_masked_patches(labels.to(device), he_tensor, patch_size=patch_size)
        crops = (crops).to(torch.uint8)
        masks = (masks).to(torch.uint8)
        x = (torch.cat((crops, masks), dim=1)).cpu().numpy().astype(np.uint8)

        with torch.no_grad():
            batch_size = 128
            y_hat = torch.cat([tta_classifier.forward(x_ihc[i:i + batch_size].float().to("cuda"))
                               for i in range(0, len(x_ihc), batch_size)], dim=0)
            y_hat = y_hat.argmax(dim=1).cpu()

        y = y_hat.numpy()[:, None]

        unique, counts = np.unique(y, return_counts=True)
        min_count = counts.min()
        y_subset = np.concatenate([y[y == i][:min_count + 10] for i in range(3)])
        x_subset = np.concatenate([x[(y == i).squeeze()][:min_count + 10] for i in range(3)])

        if x_subset.ndim == 5:
            x_subset = x_subset[0]
        x = x_subset
        y = y_subset[:, None]

        data_ds = root[f"{split}/data"]
        labels_ds = root[f"{split}/labels"]

        # Resize and append to the Zarr dataset
        data_ds.append(x)
        labels_ds.append(y.astype(np.uint8))


In [None]:
show_images(normalise_HE(he_tensor),he_tensor.byte())

In [None]:
he_tensor.dtype

In [None]:

import os
import torch
os.environ["INSTANSEG_OUTPUT_PATH"] = "/home/cdt/Documents/Projects/InstanSeg/instanseg_classification/instanseg_classification/outputs"
from utils import get_classifier

model = "1937330"

classifier = get_classifier(model).to("cpu").eval()
torch.jit.save(torch.jit.script(classifier.path_classifier.eval()), f"/home/cdt/Documents/Projects/monkey-challenge-instanseg/inference-docker/example_model/{model}.pt")

In [None]:

import os
import torch
os.environ["INSTANSEG_OUTPUT_PATH"] = "/home/cdt/Documents/Projects/InstanSeg/instanseg_classification/instanseg_classification/outputs"
from utils import get_classifier

model = "1937330"

classifier = get_classifier(model).to("cpu").eval()
torch.jit.save(torch.jit.trace(classifier.path_classifier.eval(),torch.randn(2,4,128,128).to("cpu")), f"/home/cdt/Documents/Projects/monkey-challenge-instanseg/inference-docker/example_model/{model}.pt")

In [None]:

import os
import torch
os.environ["INSTANSEG_OUTPUT_PATH"] = "../outputs/"
from utils import get_classifier

classifier = get_classifier("1923883").to("cpu").eval()
torch.jit.save(torch.jit.script(classifier.path_classifier.eval()), "/home/cdt/Documents/Projects/monkey-challenge-instanseg/inference-docker/example_model/classifier_large.pt")

In [None]:
def brightfield_to_fluo(rgb_data):
    import numpy as np
    import matplotlib.pyplot as plt

    from skimage import data
    from skimage.color import rgb2hed, hed2rgb

    # Separate the stains from the IHC image

    rgb_data  = _move_channel_axis(rgb_data, to_back = True)


    ihc_hed = rgb2hed(rgb_data)

    null = np.zeros_like(ihc_hed[:, :, 0])
    ihc_h = hed2rgb(np.stack((ihc_hed[:, :, 0], null, null), axis=-1))
    ihc_e = hed2rgb(np.stack((null, ihc_hed[:, :, 1], null), axis=-1))
    ihc_d = hed2rgb(np.stack((null, null, ihc_hed[:, :, 2]), axis=-1))

    from skimage.exposure import rescale_intensity

    # Rescale hematoxylin and DAB channels and give them a fluorescence look
    h = rescale_intensity(
        ihc_hed[:, :, 0],
        out_range=(0, 1),
        in_range=(0, np.percentile(ihc_hed[:, :, 0], 99)),
    )

    e = rescale_intensity(
        ihc_hed[:, :, 1],
        out_range=(0, 1),
        in_range=(0, np.percentile(ihc_hed[:, :, 1], 99)),
    )

    d = rescale_intensity(
        ihc_hed[:, :, 2],
        out_range=(0, 1),
        in_range=(0, np.percentile(ihc_hed[:, :, 2], 99)),
    )

    # Cast the two channels into an RGB image, as the blue and green channels
    # respectively
    zdh = np.stack((h,e,d))

    return zdh


In [None]:
from instanseg.scripts.train import instanseg_training


In [None]:
from pathlib import Path
import numpy as np
import fastremap
import os
from tqdm import tqdm
import torch
import matplotlib.pyplot as plt
from instanseg.utils.utils import show_images, _move_channel_axis
from tiling import get_random_non_empty_tiles
from tiffslide import TiffSlide

os.environ["INSTANSEG_BIOIMAGEIO_PATH"] = '/home/cdt/Documents/Projects/InstanSeg/instanseg_thibaut/instanseg/bioimageio_models/'
os.environ['INSTANSEG_DATASET_PATH'] = "../datasets/"


from instanseg.instanseg import InstanSeg, _rescale_to_pixel_size, _to_tensor_float32, to_ndim
from instanseg.utils.pytorch_utils import get_masked_patches
brightfield_nuclei = InstanSeg("brightfield_nuclei", verbosity = 0, device = "cpu")

patch_size = 128
destination_pixel_size = 0.5

device = "cpu"

monkey_dir = Path("../Raw_Datasets/Monkey")
files = os.listdir(os.path.join(monkey_dir ,"annotations","xml"))


np.random.seed(0)


for file in tqdm(files):

    split = annotations_dict[file][0]["split"]

    img_pascpg_path = Path(monkey_dir) / ("images/pas-cpg/" + file.split(".")[0] + "_PAS_CPG.tif")
    ihc_path = Path(monkey_dir) / ("images/ihc/" + file.split(".")[0] + "_IHC_CPG.tif")
    
    slidepascpg = TiffSlide(img_pascpg_path)
    slideihc = TiffSlide(ihc_path)

    tiles_he,tiles_ihc = get_random_non_empty_tiles(slidepascpg,slideihc, num_images=1, tile_size=1024) #400


    for tile_he,tile_ihc in zip(tiles_he,tiles_ihc):

          
        labels , input_tensor = brightfield_nuclei.eval_small_image(tile_he,
        pixel_size = 0.2420, rescale_output = False, seed_threshold = 0.05)

        show_images(tile_he,labels, labels = [1])



In [None]:
from train import PatchDataset

train_dataset = PatchDataset("../datasets/monkey_cpg_gold.h5", 'train', batch_size= 128, in_memory = False)

In [None]:
from torch.utils.data import Dataset, DataLoader, ConcatDataset

def time_loader(loader, N = 10000):
    import time
    start = time.time()
    for i in range(N):
        next(iter(loader))
    print(f"{time.time() - start} for {N} trials")


train_dataset = PatchDataset("../datasets/monkey_cpg_gold.h5", 'train', batch_size= 128, in_memory = False)
loader = DataLoader(train_dataset, batch_size= 128, num_workers=0, shuffle = True)


time_loader(loader, N = 10)

time_loader(train_dataset, N = 128 * 10)



In [None]:
def time_loader_dataset(dataset, batch_size, N=10000):
    import time
    start = time.time()
    for i in range(N):
        # Simulate batching by manually fetching batch_size samples at a time
        batch = [dataset[j] for j in range(i * batch_size, (i + 1) * batch_size)]
    print(f"{time.time() - start} for {N} trials of batches")


In [None]:
from torch.utils.data import Dataset, DataLoader, ConcatDataset

def time_loader(loader, N = 10000):
    import time
    start = time.time()
    for i in range(N):
        next(iter(loader))
    print(f"{time.time() - start} for {N} trials")

def time_loader_dataset(dataset, batch_size, N=10000):
    import time
    start = time.time()
    for i in range(N):
        # Simulate batching by manually fetching batch_size samples at a time
        batch = [dataset[j] for j in range(i * batch_size, (i + 1) * batch_size)]
    print(f"{time.time() - start} for {N} trials of batches")


batch_size = 128
N_trials = 1000

train_dataset = PatchDataset("../datasets/monkey_cpg_gold.h5", 'train', batch_size= 128, in_memory = False)
loader = DataLoader(train_dataset, batch_size= batch_size, num_workers=4, persistent_workers = True, shuffle = False)


time_loader(loader, N = N_trials)

time_loader_dataset(train_dataset, batch_size=batch_size, N=N_trials)



train - chunk size (1, 4, 128, 128) num samples 223878
(tensor([0, 1, 2]), tensor([ 47651,  22113, 154114]))
219.12578701972961 for 1000 trials
51.75438141822815 for 1000 trials of batches

In [None]:

train_dataset = PatchDataset("../datasets/monkey_cpg_gold_gzip.h5", 'train', batch_size= 128, in_memory = False)
loader = DataLoader(train_dataset, batch_size= batch_size, num_workers=4, persistent_workers = True, shuffle = False)


time_loader(loader, N = N_trials)

time_loader_dataset(train_dataset, batch_size=batch_size, N=N_trials)




In [None]:


train_dataset = PatchDataset("../datasets/monkey_cpg_gold_lzf.h5", 'train', batch_size= 128, in_memory = False)
loader = DataLoader(train_dataset, batch_size= batch_size, num_workers=0, persistent_workers = True, shuffle = False)


time_loader(loader, N = N_trials)

time_loader_dataset(train_dataset, batch_size=batch_size, N=N_trials)




In [None]:


train_dataset = PatchDataset("../datasets/monkey_cpg_gold_lzf.h5", 'train', batch_size= 128, in_memory = True)
loader = DataLoader(train_dataset, batch_size= batch_size, num_workers=0, shuffle = False)


time_loader(loader, N = N_trials)

time_loader_dataset(train_dataset, batch_size=batch_size, N=N_trials)




In [None]:
import h5py

import numpy as np

f = h5py.File("../datasets/monkey_cpg_gold.h5","r")

d = f["train/data"]
d.read_direct(np.s_[0:128])

impo

In [None]:
import torch
import h5py
batch_size = 128

def time_loader(loader, N = 10000):
    import time
    start = time.time()
    for i in range(N):
        next(iter(loader))
    print(f"{time.time() - start} for {N} trials")

def time_loader_dataset(dataset, batch_size, N=10000):
    import time
    start = time.time()
    for i in range(N):
        # Simulate batching by manually fetching batch_size samples at a time
        batch = [dataset[j] for j in range(i * batch_size, (i + 1) * batch_size)]
    print(f"{time.time() - start} for {N} trials of batches")



class H5Dataset(torch.utils.data.Dataset):
    def __init__(self, path):
        self.file_path = path
        self.dataset = None
        with h5py.File(self.file_path, 'r') as file:
            self.dataset_len = len(file["train/data"])

    def __getitem__(self, index):
        if self.dataset is None:
            self.dataset = h5py.File(self.file_path, 'r')["train/data"]
        return self.dataset[index]

    def __len__(self):
        return self.dataset_len

train_dataset = H5Dataset("../datasets/monkey_cpg_gold.h5")


loader = torch.utils.data.DataLoader(train_dataset, batch_size= batch_size, num_workers=0, shuffle = False)

time_loader(loader, N = 1000)


In [None]:
import h5py

# Print the HDF5 library version
print("HDF5 library version:", h5py.version.hdf5_version)

# Print the h5py version
print("h5py version:", h5py.__version__)


In [None]:
import h5py
import numpy as np

# Open the HDF5 file and load the dataset
file_path = "../datasets/monkey_cpg_gold_gzip.h5"
with h5py.File(file_path, "r+") as f:  # Open in read/write mode to modify
    dataset = f["train/data"][:]  # Load the dataset into memory
    shuffled_indices = np.random.permutation(len(dataset))  # Shuffle indices
    shuffled_data = dataset[shuffled_indices]  # Apply shuffling

    # Store the shuffled data back into the file (overwrite or save as a new dataset)
    del f["train/data"]  # Delete the old dataset
    f.create_dataset("train/data", data=shuffled_data)
