# Getting images for training U-Net for removing ECM
Usage of wsitools

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
# Google Drive setup
from google.colab import drive
drive.mount("/content/gdrive")

Mounted at /content/gdrive


## Installing dependencies and import needed modules

### ImageSplitterMerger

In [4]:
# Installations of needed modules
!pip install openslide-python openslide-bin imagecodecs loguru read_roi czifile
!pip install git+https://github.com/mjirik/imma.git

# WSI tools installation
!pip install git+https://github.com/mjirik/wsitools.git

# Openslide installation
# !apt-get install openslide-tools
# !apt-get install python-openslide
# !pip install openslide-python

# Modules import
import matplotlib.pyplot as plt
import numpy as np
import imma
# import openslide
import openslide_bin
import requests

Collecting git+https://github.com/mjirik/imma.git
  Cloning https://github.com/mjirik/imma.git to /tmp/pip-req-build-xe1piaha
  Running command git clone --filter=blob:none --quiet https://github.com/mjirik/imma.git /tmp/pip-req-build-xe1piaha
  Resolved https://github.com/mjirik/imma.git to commit d0d6adad07fb1a298fbe1d4f525b690d9aedd1d1
  Preparing metadata (setup.py) ... [?25l[?25hdone
Collecting git+https://github.com/mjirik/wsitools.git
  Cloning https://github.com/mjirik/wsitools.git to /tmp/pip-req-build-kqrkvsvo
  Running command git clone --filter=blob:none --quiet https://github.com/mjirik/wsitools.git /tmp/pip-req-build-kqrkvsvo
  Resolved https://github.com/mjirik/wsitools.git to commit 1dd20818b0e6006bf9a9e899b67d095f75e71a28
  Preparing metadata (setup.py) ... [?25l[?25hdone


### UNet

In [5]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
from keras.preprocessing.image import load_img, img_to_array
from PIL import Image
from skimage.transform import resize

### Inpainting

In [6]:
from skimage.restoration import inpaint
from skimage.morphology import dilation
import random
import string
import os

## CZI file

In [7]:
# Possible filenames: J7_5_a.czi, J7_25_a_ann0004.czi, J8_8_a.czi
filename = "J7_5_a.czi"

# URL of the file on GitHub
url_path = "https://github.com/janburian/Masters_thesis/raw/main/data_czi/" + filename

# Fetch the file
response = requests.get(url_path)

# Check if the request was successful
if response.status_code == 200:
    # Save the content to a local file
    with open(filename, "wb") as file:
        file.write(response.content)
else:
    print("Failed to fetch the file from GitHub")

## Segmentation setup

### UNet

In [8]:
import tensorflow as tf
from keras import backend as K

def jaccard_coef(y_true, y_pred):
    y_true_f = K.flatten(y_true)
    y_pred_f = K.flatten(y_pred)
    intersection = K.sum(y_true_f * y_pred_f)
    return (intersection + 1.0) / (K.sum(y_true_f) + K.sum(y_pred_f) - intersection + 1.0)

def jaccard_coef_loss(y_true, y_pred):
    return -jaccard_coef(y_true, y_pred)

In [9]:
unet_model_path = '/content/gdrive/MyDrive/cell_nuclei_data/output/unet/20240415-13:06_unet_cell_nuclei_150epochs.keras'
model_unet = load_model(unet_model_path, custom_objects={'jaccard_coef_loss': jaccard_coef_loss,
                                                         'jaccard_coef': jaccard_coef})

## Auxiliary methods (segmentation)

In [10]:
def do_inference_unet(img: np.array, model_unet, orig_tile_shape: tuple) -> np.array:
  # print(model_unet.input_shape[1:3])
  tensor_img = tf.convert_to_tensor(img)
  resized_image = tf.image.resize(tensor_img, model_unet.input_shape[1:3])  # Match model input size
  x = img_to_array(resized_image)
  x = x / 255.0  # Assuming model expects normalized values (0-1)
  x = np.expand_dims(x, axis=0)

  # Get segmentation mask
  mask = model_unet.predict(x)

  mask = np.squeeze(mask, axis=0)
  # plt.imshow(mask, cmap="gray")
  # plt.show()

  # Thresholding output mask
  threshold = 0.4  # You can adjust this value based on your needs
  mask = (mask[..., 0] > threshold).astype(np.uint8)  # Assuming channel 0 and converting to uint8 for binary mask
  # plt.imshow(mask, cmap="gray")
  # plt.show()

  mask_resized_back = resize(mask, orig_tile_shape[0:2], preserve_range=True)
  return mask_resized_back


def apply_dilation(mask, num_steps_dilation) -> np.array:
  for i in range(num_steps_dilation):
      mask = dilation(mask)

  return mask


def do_inpainting_biharmonic(orig_tile: np.array, mask: np.array) -> np.array:
  img_removed_nuclei = inpaint.inpaint_biharmonic(orig_tile, mask, channel_axis=-1)
  # plt.imshow(img_removed_nuclei[:,:,::-1])
  # plt.show()

  return img_removed_nuclei

## Implementing *process_tile()* methods

In [1]:
# import uuid

# def generate_random_string():
#   """Generates a random string using a UUID.

#   Returns:
#       A string representation of a UUID.
#   """

#   filename = str(uuid.uuid4())
#   return filename

# # Generate a random string (UUID)
# random_string = generate_random_string()
# print(random_string)

68074a85-8f98-4a07-b189-4a7fe3de6167


In [11]:
!rm -rf training_images
!mkdir training_images

In [13]:
def generate_random_string(length=8):
  """Generates a random string of the specified length.

  Args:
      length: The desired length of the random string. Defaults to 16.

  Returns:
      A random string of the specified length.
  """

  letters = string.ascii_lowercase + string.digits
  result = ''.join(random.choice(letters) for i in range(length))
  return result

# Generate a random string of length 16
random_string = generate_random_string()
print(random_string)


def process_tile(tile: np.array) -> np.array:
  orig_tile_shape = tile.shape
  mask = do_inference_unet(tile, model_unet, orig_tile_shape)
  img_name = generate_random_string()
  output_path = "./training_images/"

  if np.any(mask):  # determine whether in mask are white pixels
    mask_dil = apply_dilation(mask, 5)
    img_removed_cell_nuclei = do_inpainting_biharmonic(tile, mask_dil)
    plt.imsave(os.path.join(output_path, img_name + ".png"), img_removed_cell_nuclei)
    # plt.imshow(img_removed_cell_nuclei)
    # plt.show()
    return img_removed_cell_nuclei

  else: # no mask in tile; returning the original tile
      plt.imsave(os.path.join(output_path, img_name + ".png"), tile)
      # plt.imshow(tile)
      # plt.show()
      return tile

ccv1l91k


## Final part

In [14]:
from wsitools.tile_image import ImageSplitterMerger

# Create an ImageSplitterMerger instance with the specified parameters
image = ImageSplitterMerger(filename, tilesize_px=150, overlap_px=0, pixelsize_mm=[0.001, 0.001], #150px and pixelsize_mm=[0.001, 0.001] is probably best
                            fcn=process_tile)

# Split and merge the image, applying the specified tile processing function
merged_image_processed = image.split_and_merge_image()

# Display the input and merged images using Matplotlib
plt.imshow(merged_image_processed)
plt.title("Merged Processed Image")
plt.show()

Output hidden; open in https://colab.research.google.com to view.

### Download dataset

In [15]:
import shutil
import zipfile
from google.colab import files

In [18]:
images_path = './training_images'

# Specify the desired zip file name
zip_file_name = 'dataset.zip'  # Replace with your preferred name

# Create a zip file in the Colab environment
shutil.make_archive(zip_file_name, 'zip', images_path)

'/content/dataset.zip'

In [19]:
files.download(zip_file_name)

FileNotFoundError: Cannot find file: dataset