In [None]:
!python -m pip install pyyaml==5.1
import sys, os, distutils.core
# Note: This is a faster way to install detectron2 in Colab, but it does not include all functionalities (e.g. compiled operators).
# See https://detectron2.readthedocs.io/tutorials/install.html for full installation instructions
!git clone 'https://github.com/facebookresearch/detectron2'
dist = distutils.core.run_setup("./detectron2/setup.py")
!python -m pip install {' '.join([f"'{x}'" for x in dist.install_requires])}
sys.path.insert(0, os.path.abspath('./detectron2'))

# Properly install detectron2. (Please do not install twice in both ways)
# !python -m pip install 'git+https://github.com/facebookresearch/detectron2.git'

In [None]:
import torch, detectron2
!nvcc --version
TORCH_VERSION = ".".join(torch.__version__.split(".")[:2])
CUDA_VERSION = torch.__version__.split("+")[-1]
print("torch: ", TORCH_VERSION, "; cuda: ", CUDA_VERSION)
print("detectron2:", detectron2.__version__)

In [None]:
# Some basic setup:
# Setup detectron2 logger
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import os, json, cv2, random
from google.colab.patches import cv2_imshow
from matplotlib import pyplot as plt

# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.data import MetadataCatalog, DatasetCatalog
from detectron2.data.datasets import register_coco_instances

In [None]:
register_coco_instances("my_dataset_train", {}, "/kaggle/input/coco-dataset-5/TIL.v5i.coco/train/_annotations.coco.json", "/kaggle/input/coco-dataset-5/TIL.v5i.coco/train")
register_coco_instances("my_dataset_val", {}, "/kaggle/input/coco-dataset-5/TIL.v5i.coco/valid/_annotations.coco.json", "/kaggle/input/coco-dataset-5/TIL.v5i.coco/valid")
register_coco_instances("my_dataset_test", {}, "/kaggle/input/coco-dataset-5/TIL.v5i.coco/test/_annotations.coco.json", "/kaggle/input/coco-dataset-5/TIL.v5i.coco/test")

In [None]:
train_metadata = MetadataCatalog.get("my_dataset_train")
train_dataset_dicts = DatasetCatalog.get("my_dataset_train")

In [None]:
val_metadata = MetadataCatalog.get("my_dataset_val")
val_dataset_dicts = DatasetCatalog.get("my_dataset_val")

In [None]:
test_metadata = MetadataCatalog.get("my_dataset_test")
test_dataset_dicts = DatasetCatalog.get("my_dataset_test")

In [None]:
# Visualize some random samples
for d in random.sample(train_dataset_dicts, 1):
    img = cv2.imread(d["file_name"])
    visualizer = Visualizer(img[:, :, ::-1], metadata=train_metadata, scale=1)
    vis = visualizer.draw_dataset_dict(d)
    plt.imshow(vis.get_image()[:, :, ::-1])
    plt.show()

# Train

In [None]:
from detectron2.engine import DefaultTrainer

cfg = get_cfg()
cfg.OUTPUT_DIR = "/kaggle/working/models/Detectron2_Models"
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.DATASETS.TRAIN = ("my_dataset_train",)
cfg.DATASETS.TEST = ()
cfg.DATALOADER.NUM_WORKERS = 2
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")  # Let training initialize from model zoo
cfg.SOLVER.IMS_PER_BATCH = 2  # This is the real "batch size" commonly known to deep learning people
cfg.SOLVER.BASE_LR = 0.00025  # pick a good LR
cfg.SOLVER.MAX_ITER = 5000    # 5000 iterations seems good enough for this dataset
cfg.SOLVER.STEPS = []        # do not decay learning rate
cfg.MODEL.ROI_HEADS.BATCH_SIZE_PER_IMAGE = 256   # Default is 512, using 256 for this dataset.
cfg.MODEL.ROI_HEADS.NUM_CLASSES = 3  # We have 3 class
# NOTE: this config means the number of classes, without the background. Do not use num_classes+1 here.

os.makedirs(cfg.OUTPUT_DIR, exist_ok=True)
trainer = DefaultTrainer(cfg) #Create an instance of of DefaultTrainer with the given congiguration
trainer.resume_or_load(resume=False) #Load a pretrained model if available (resume training) or start training from scratch if no pretrained model is available

In [None]:
trainer.train() #Start the training process

In [None]:
import yaml
# Save the configuration to a config.yaml file
config_yaml_path = "/kaggle/working/models/Detectron2_Models/config.yaml"
with open(config_yaml_path, 'w') as file:
    yaml.dump(cfg, file)

In [None]:
# Inference should use the config with parameters that are used in training
# cfg now already contains everything we've set previously. We changed it a little bit for inference:
cfg.MODEL.WEIGHTS = os.path.join(cfg.OUTPUT_DIR, "model_final.pth")  # path to the model we just trained
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5   # set a custom testing threshold
predictor = DefaultPredictor(cfg)

In [None]:
from detectron2.utils.visualizer import ColorMode

for d in random.sample(val_dataset_dicts, 1):    #select number of images for display
    im = cv2.imread(d["file_name"])
    outputs = predictor(im)
    instances = outputs["instances"].to("cpu")
    v = Visualizer(im[:, :, ::-1],
                   metadata=val_metadata,
                   scale=1
    )
    out = v.draw_instance_predictions(instances)
    cv2_imshow(out.get_image()[:, :, ::-1])

In [None]:
from detectron2.evaluation import COCOEvaluator, inference_on_dataset
from detectron2.data import build_detection_test_loader
evaluator = COCOEvaluator("my_dataset_val", output_dir="./output")
val_loader = build_detection_test_loader(cfg, "my_dataset_val")
print(inference_on_dataset(predictor.model, val_loader, evaluator))
# another equivalent way to evaluate the model is to use `trainer.test`

In [None]:
# Test with new image
new_im = cv2.imread("/kaggle/input/coco-dataset-5/TIL.v5i.coco/test/146_image_png.rf.d544162a26ace1a67e988bc28fbdb313.jpg")
new_im = cv2.resize(new_im, (256, 256))
print(new_im.shape)

outputs = predictor(new_im)
instances = outputs["instances"].to("cpu")

confidence_threshold = 0.3
instances = instances[instances.scores > confidence_threshold]

v = Visualizer(new_im[:, :, ::-1], metadata=train_metadata, scale=1)
out = v.draw_instance_predictions(instances)

cv2_imshow(out.get_image()[:, :, ::-1])

# TODO

- Extracting cells number and density
- Create csv metadata file containing this infos
- Feature extraction 

In [None]:
"""
# Directory path to the input images folder
input_images_directory = "/kaggle/input/camelyon/dataset/patches"

# Output directory where the segmented images will be saved
output_directory = "/kaggle/working/test_results/masks"  # Replace this with the path to your desired output directory
os.makedirs(output_directory, exist_ok=True)

# Loop over the images in the input folder
def process_images_in_directory(input_dir, output_dir):
    for root, _, files in os.walk(input_dir):
        for file in files:
            if file.endswith(('.png')): 
                image_path = os.path.join(root, file)
                new_im = cv2.imread(image_path)

                # Perform prediction on the new image
                outputs = predictor(new_im)  # Replace `predictor` with your initialized model

                # Use Visualizer to draw the predictions on the image
                v = Visualizer(new_im[:, :, ::-1], metadata=train_metadata)
                out = v.draw_instance_predictions(outputs["instances"].to("cpu"))

                # Create the output path, maintaining the folder structure
                relative_path = os.path.relpath(image_path, input_dir)
                result_path = os.path.join(output_dir, os.path.splitext(relative_path)[0] + "_result.png")
                os.makedirs(os.path.dirname(result_path), exist_ok=True)

                # Save the segmented image
                cv2.imwrite(result_path, out.get_image()[:, :, ::-1])
                print(f"Image at {image_path} processed")

# Process all images in the directory
process_images_in_directory(input_images_directory, output_directory)

print("Segmentation of all images in subfolders completed.")
"""

In [None]:
"""
import csv
import os
import time
import re
import cv2
import numpy as np

# Directory path to the input images folder
input_images_directory = "/kaggle/input/custom-dataset/selected_patches"

# Output directory where the CSV file will be saved
output_csv_path = "/kaggle/working/test_results/output_objects.csv"
os.makedirs(os.path.dirname(output_csv_path), exist_ok=True)

# Gather all image paths
image_paths = []
for root, _, files in os.walk(input_images_directory):
    for file in files:
        if file.endswith('.png'):  # Process only PNG files
            image_paths.append(os.path.join(root, file))

# Function to extract patient, node, x, and y coordinates from file name
def parse_filename(file_name):
    pattern = r"patch_patient_(\d+)_node_(\d+)_x_(\d+)_y_(\d+)"
    match = re.search(pattern, file_name)
    if match:
        return match.group(1), match.group(2), match.group(3), match.group(4)
    return None, None, None, None

# Total number of images
total_images = len(image_paths)
print(f"Total Images: {total_images}")

# Open the CSV file for writing
with open(output_csv_path, 'w', newline='') as csvfile:
    csvwriter = csv.writer(csvfile)

    # Write the header row in the CSV file
    csvwriter.writerow(["patient", "node", "x_coord", "y_coord", "bounding_box", "area", "number_of_cells", "cell_density"])

    start_time = time.time()  # Record the start time of the entire process

    # Loop over the images
    for idx, image_path in enumerate(image_paths):
        image_start_time = time.time()  # Start time for the current image
        image = cv2.imread(image_path)

        file_name = os.path.basename(image_path)
        patient, node, x_coord, y_coord = parse_filename(file_name)

        # Perform prediction on the image
        outputs = predictor(image)  # Replace `predictor` with your initialized model

        # Extract bounding boxes and scores
        pred_boxes = outputs["instances"].pred_boxes.tensor.cpu().numpy()

        # Calculate total area covered by all bounding boxes
        total_area = 0
        for box in pred_boxes:
            box_area = (box[2] - box[0]) * (box[3] - box[1])
            total_area += box_area

        # Number of detected cells
        num_cells = len(pred_boxes)

        # Calculate cell density (number of cells per unit area)
        cell_density = num_cells / total_area if total_area > 0 else 0

        # Write information for each detected object
        for bounding_box in pred_boxes:
            area = (bounding_box[2] - bounding_box[0]) * (bounding_box[3] - bounding_box[1])  # Area of the bounding box

            # Write the object information to the CSV file
            csvwriter.writerow([
                patient, node, x_coord, y_coord, 
                bounding_box.tolist(),  # Convert NumPy array to list
                area, 
                num_cells, 
                cell_density
            ])

        # Calculate time taken for the current image
        image_time_taken = time.time() - image_start_time

        # Calculate estimated remaining time
        images_processed = idx + 1
        elapsed_time = time.time() - start_time
        avg_time_per_image = elapsed_time / images_processed
        remaining_time = avg_time_per_image * (total_images - images_processed)

        # Print progress
        print(f"Processed {images_processed}/{total_images} images. "
              f"Time for current image: {image_time_taken:.2f}s. "
              f"Estimated time remaining: {remaining_time:.2f}s.")

print("Object-level information saved to CSV file.")
"""

In [None]:
import os
import pandas as pd
import cv2
import numpy as np
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer
from detectron2.structures import BoxMode

# Funzione per caricare il modello e fare le predizioni
def get_detector():
    cfg = get_cfg()
    cfg.merge_from_file("/kaggle/working/models/Detectron2_Models/config.yaml")  # Sostituisci con il percorso del tuo file di configurazione
    cfg.MODEL.WEIGHTS = "/kaggle/working/models/Detectron2_Models/model_final.pth"  # Sostituisci con il percorso del tuo modello allenato
    cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # Threshold per la predizione
    predictor = DefaultPredictor(cfg)
    return predictor

# Funzione per analizzare la segmentazione
def analyze_segmentation(predictions, image):
    # Estrazione delle istanze segmentate
    instances = predictions["instances"]
    pred_classes = instances.pred_classes.cpu().numpy()
    pred_masks = instances.pred_masks.cpu().numpy()
    
    # Calcolo delle caratteristiche richieste
    num_cells = len(pred_classes)
    cell_types = np.unique(pred_classes)  # Varietà cellulare (tipi di cellule unici)
    
    tumor_detected = 1 if 1 in cell_types else 0  # Se c'è almeno una cellula tumorale, la patch è tumorale
    area_cells = sum([np.sum(mask) for mask in pred_masks])  # Area totale delle cellule segmentate
    
    return num_cells, cell_types, area_cells, tumor_detected

# Funzione per elaborare un'immagine e raccogliere i metadati
def process_image(image_path, predictor):
    image = cv2.imread(image_path)
    predictions = predictor(image)
    
    num_cells, cell_types, area_cells, tumor_detected = analyze_segmentation(predictions, image)
    
    # Usa il nome del file come patch_id
    patch_id = os.path.basename(image_path)
    
    # Crea un dizionario con i metadati della patch
    metadata = {
        "patch_id": patch_id,
        "num_cells": num_cells,
        "cell_types": ", ".join(map(str, cell_types)),  # Concatena i tipi di cellula unici
        "area_cells": area_cells,
        "tumor_detected": tumor_detected
    }
    
    return metadata

# Funzione per processare tutte le immagini in una cartella
def process_images_in_folder(folder_path, predictor):
    metadata_list = []
    
    for root, _, files in os.walk(folder_path):
        for file in files:
            if file.endswith(".png") or file.endswith(".jpg"):
                image_path = os.path.join(root, file)
                metadata = process_image(image_path, predictor)
                metadata_list.append(metadata)
    
    # Salva i metadati in un file CSV
    df = pd.DataFrame(metadata_list)
    df.to_csv("patch_metadata.csv", index=False)


if __name__ == "__main__":
    folder_path = "/kaggle/input/coco-dataset-5/TIL.v5i.coco"  
    predictor = get_detector()
    process_images_in_folder(folder_path, predictor)