For the package StainTools:
- GitHub: https://github.com/Peter554/StainTools/tree/master
  ```bash
  pip install staintools
  ```

- The author suggests to use the conda through this [link](https://anaconda.org/conda-forge/python-spams) and the related code for installing the SPAMS dependency:
  ```bash
  conda install conda-forge::python-spams
  conda install conda-forge/label/broken::python-spams
  conda install conda-forge/label/cf201901::python-spams
  conda install conda-forge/label/cf202003::python-spams
  conda install conda-forge/label/gcc7::python-spams
  ```

In [1]:
from __future__ import division

import staintools
# import stain_utils as utils
# import stainNorm_Reinhard
# import stainNorm_Macenko
# import stainNorm_Vahadane
%load_ext autoreload
%autoreload 2

import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from multiprocessing import Pool, Lock
import os
import datetime
import subprocess

%matplotlib inline

In [4]:
os.chdir("/disk2/user/gabgam/work/gigi_env/the_project/2_image_normalisation/")
print(os.getcwd())

/disk2/work/gabgam/gigi_env/the_project/2_image_normalisation


In [5]:
# Set paths
INPUT_FOLDER = "../1_tiling/outputs/satac_C1/tiling_output/v3_allspots/tiles_100/"  # Replace with the path to your folder containing images
tiles_info = INPUT_FOLDER.split('/')

---
# 1. - Normalisation methods

## 1.1 - Macenko's method

The normalisation is sequentially, but maybe in the future I'll integrate with a parallel one.

In [2]:
from __future__ import division

import staintools
#%load_ext autoreload
#%autoreload 2

import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
from multiprocessing import Pool, Lock
import os
import datetime

#%matplotlib inline

starttime = datetime.datetime.now()

os.chdir("/disk2/user/gabgam/work/gigi_env/the_project/2_image_normalisation/")
print(os.getcwd())

# SET PATHS, HERE IS THE MOST IMPORTANT STEP, BE CAREFUL WITH IT.
INPUT_FOLDER = "../1_tiling/outputs/satac_C1/tiling_output/v3_allspots/tiles_100/"  # Replace with the path to your folder containing images
tiles_info = INPUT_FOLDER.split('/')



# Define the paths
TARGET_IMAGE_PATH = "reference_images/reference_full.jpeg"
target_temp_path = "target_is_" + TARGET_IMAGE_PATH.split("/")[1].split(".")[0]
normalisation_method = 'staintools_macenko'
output_folder = f"./output/{tiles_info[3]}/{tiles_info[5]}/{tiles_info[6]}/{normalisation_method}/{target_temp_path}"

# Ensure the output folder exists
os.makedirs(output_folder, exist_ok=True)

# File to log normalization failures
normalisation_fails_file = f"{output_folder}/0_failed_to_normalise.txt"
with open(normalisation_fails_file, "w") as file:
    file.write("The following tiles have failed normalization:\n")

# Compute target statistics from target image
target = staintools.read_image(TARGET_IMAGE_PATH)
normalizer = staintools.StainNormalizer(method='macenko')
normalizer.fit(target)

def process_image(image_path, output_path):
    try:
        # Read and normalize the image
        to_transform = staintools.read_image(image_path)
        transformed = normalizer.transform(to_transform)

        # Convert the transformed array back to a PIL image
        img_normed_pil = Image.fromarray(np.uint8(transformed))

        # Define the output file path
        output_image_path = os.path.join(output_path, f"{os.path.splitext(os.path.basename(image_path))[0]}_st_macenko_normalized.jpg")

        # Save the normalized image
        img_normed_pil.save(output_image_path)

    except Exception as e:
        # Log failures
        with open(normalisation_fails_file, "a") as file:
            file.write(f"{image_path}: {str(e)}\n")

def normalize_images_sequentially(input_folder, output_path):
    image_paths = [os.path.join(input_folder, image) for image in os.listdir(input_folder) if image.endswith(('.jpg', '.png', '.jpeg'))]
    
    for image_path in image_paths:
        process_image(image_path, output_path)

if __name__ == "__main__":
    normalize_images_sequentially(INPUT_FOLDER, output_folder)

    # eventually deleting the previous time log file
    for filename in os.listdir(output_folder):
        if filename.startswith("0_started_"):
            file_path = os.path.join(output_folder, filename)
            if os.path.isfile(file_path):  # Check if it is a file
                os.remove(file_path)      # Delete the file
                print(f"Deleted: {file_path}")

    # saving the start and finish time in the file's name for simplicity in the reading.
    with open(f"{output_folder}/0_started_at_{starttime}_finished_at_{datetime.datetime.now()}.txt", "w") as file:
        file.write(f"The run started at {starttime} and finished at {datetime.datetime.now()}.")


/disk2/work/gabgam/gigi_env/the_project/2_image_normalisation


  source_concentrations *= (self.maxC_target / maxC_source)
  source_concentrations *= (self.maxC_target / maxC_source)


KeyboardInterrupt: 

## 1.2 - Reinhard's method

In [None]:
starttime = datetime.datetime.now()

# Define the paths
TARGET_IMAGE_PATH = "reference_images/reference_full.jpeg"
target_temp_path = "target_is_" + TARGET_IMAGE_PATH.split("/")[1].split(".")[0]
normalisation_method = 'staintools_reinhard'
output_folder = f"./output/{tiles_info[3]}/{tiles_info[5]}/{tiles_info[6]}/{normalisation_method}/{target_temp_path}"

# Ensure the output folder exists
os.makedirs(output_folder, exist_ok=True)

# File to log normalization failures
normalisation_fails_file = f"{output_folder}/0_failed_to_normalise.txt"
with open(normalisation_fails_file, "w") as file:
    file.write("The following tiles have failed normalization:\n")

# Compute target statistics from target image
target = staintools.read_image(TARGET_IMAGE_PATH)
normalizer = staintools.ReinhardColorNormalizer()
normalizer.fit(target)

def process_image(image_path, output_path):
    try:
        # Read and normalize the image
        to_transform = staintools.read_image(image_path)
        transformed = normalizer.transform(to_transform)

        # Convert the transformed array back to a PIL image
        img_normed_pil = Image.fromarray(np.uint8(transformed))

        # Define the output file path
        output_image_path = os.path.join(output_path, f"{os.path.splitext(os.path.basename(image_path))[0]}_st_reinhard_normalized.jpg")

        # Save the normalized image
        img_normed_pil.save(output_image_path)

    except Exception as e:
        # Log failures
        with open(normalisation_fails_file, "a") as file:
            file.write(f"{image_path}: {str(e)}\n")

def normalize_images_sequentially(input_folder, output_path):
    image_paths = [os.path.join(input_folder, image) for image in os.listdir(input_folder) if image.endswith(('.jpg', '.png', '.jpeg'))]
    
    for image_path in image_paths:
        process_image(image_path, output_path)

if __name__ == "__main__":
    normalize_images_sequentially(INPUT_FOLDER, output_folder)

    with open(f"{output_folder}/0_started_at_{starttime}_finished_at_{datetime.datetime.now()}.txt", "w") as file:
        file.write(f"The run started at {starttime} and finished at {datetime.datetime.now()}.")


## 1.3 - Vahadane's method

In [None]:
starttime = datetime.datetime.now()

# Define the paths
TARGET_IMAGE_PATH = "reference_images/reference_full.jpeg"
target_temp_path = "target_is_" + TARGET_IMAGE_PATH.split("/")[1].split(".")[0]
normalisation_method = 'staintools_vahadane'
output_folder = f"./output/{tiles_info[3]}/{tiles_info[5]}/{tiles_info[6]}/{normalisation_method}/{target_temp_path}"

# Ensure the output folder exists
os.makedirs(output_folder, exist_ok=True)

# File to log normalization failures
normalisation_fails_file = f"{output_folder}/0_failed_to_normalise.txt"
with open(normalisation_fails_file, "w") as file:
    file.write("The following tiles have failed normalization:\n")

# Compute target statistics from target image
target = staintools.read_image(TARGET_IMAGE_PATH)
normalizer = staintools.StainNormalizer(method='vahadane')
normalizer.fit(target)

def process_image(image_path, output_path):
    try:
        # Read and normalize the image
        to_transform = staintools.read_image(image_path)
        transformed = normalizer.transform(to_transform)

        # Convert the transformed array back to a PIL image
        img_normed_pil = Image.fromarray(np.uint8(transformed))

        # Define the output file path
        output_image_path = os.path.join(output_path, f"{os.path.splitext(os.path.basename(image_path))[0]}_st_vahadane_normalized.jpg")

        # Save the normalized image
        img_normed_pil.save(output_image_path)

    except Exception as e:
        # Log failures
        with open(normalisation_fails_file, "a") as file:
            file.write(f"{image_path}: {str(e)}\n")

def normalize_images_sequentially(input_folder, output_path):
    image_paths = [os.path.join(input_folder, image) for image in os.listdir(input_folder) if image.endswith(('.jpg', '.png', '.jpeg'))]
    
    for image_path in image_paths:
        process_image(image_path, output_path)

if __name__ == "__main__":
    normalize_images_sequentially(INPUT_FOLDER, output_folder)

    with open(f"{output_folder}/0_started_at_{starttime}_finished_at_{datetime.datetime.now()}.txt", "w") as file:
        file.write(f"The run started at {starttime} and finished at {datetime.datetime.now()}.")


---
# Final - Saving the environment requirements

In [4]:
# Save package versions to a .txt file
with open("requirements_for_staintools_env.txt", "w") as f:
    subprocess.run(["pip", "freeze"], stdout=f)

---
**CODES FOR PARALLEL COMPUTING (to be done)**

In [None]:
def normalize_image(args):
    '''
    This function normalise the image given as input with the relative method and correclty saves in the specified folder.
    '''
    image_path, method, saving_folder = args

    # Read the image and standardize brigthness
    to_transform = staintools.read_image(image_path)
    to_transform = staintools.LuminosityStandardizer.standardize(to_transform)

    # Stain normalize
    normalizer = staintools.StainNormalizer(method=method)
    normalizer.fit(target)
    transformed = normalizer.transform(to_transform)

    # Save normalized image
    transformed_pil = Image.fromarray(transformed)
    image_name = os.path.basename(image_path).split(".")[0] + f"_{method}.jpeg"
    save_path = os.path.join(saving_folder, image_name)
    transformed_pil.save(save_path)


def normalize_image(image_path, target, method, output_dir):
    '''
    This function normalise the image given as input with the relative method and correclty saves in the specified folder.
    '''
    try:
        # Read and standardize the image
        to_transform = staintools.read_image(image_path)
        to_transform = staintools.LuminosityStandardizer.standardize(to_transform)

        # Stain normalization
        normalizer = staintools.StainNormalizer(method=method)
        normalizer.fit(target)
        transformed = normalizer.transform(to_transform)

        # Save the transformed image
        image_name = os.path.basename(image_path)
        output_path = os.path.join(output_dir, f"{os.path.splitext(image_name)[0]}_{method}.jpeg")
        cv2.imwrite(output_path, cv2.cvtColor(transformed, cv2.COLOR_RGB2BGR))

    # Skipping when image is almost or completely empty
    except TissueMaskException:
        print(f"Skipping {image_path}: Empty tissue mask computed.")
        return None

    return output_path



def process_images_for_method(method):
    '''
    This function implement parallel normalisation processing of the image for the specified method.
    '''

    # Create output folder for the method
    saving_folder_per_method = os.path.join(output_dir, f"{method}_staintools/")
    os.makedirs(saving_folder_per_method, exist_ok=True)

    # Collect all image paths in the input directory
    image_paths = [
        os.path.join(input_dir, image)
        for image in os.listdir(input_dir)
        if image.startswith(("tissue")) # and image.endswith((".jpg")) 
    ]

    # argument tuples for multiprocessing
    args = [(image_path, method, saving_folder_per_method) for image_path in image_paths]

    # parallel processing of the images
    with Pool() as pool:
        pool.map(normalize_image, args)


In [None]:
# input and output directories
input_dir = f"../1_tiling/outputs/{SAMPLE_NAME}/tiling_output/"
output_dir = f"./output/{SAMPLE_NAME}/normalised_he/"

# preprocess the reference target image
target = staintools.read_image("./reference_images/reference_sparser.jpeg") # check for the other images, results may change
target = staintools.LuminosityStandardizer.standardize(target)

# normalization methods
methods = ["macenko", "vahadane"]
if __name__ == "__main__":
    for method in methods:
        process_images_for_method(method)