In [None]:
import numpy as np
from typing import Text
import base64
import numpy as np
import pandas as pd
from pycocotools import _mask as coco_mask
import zlib
import glob
import os
from pathlib import Path
import matplotlib.pyplot as plt


import torch
import torch.nn as nn


## Submission File
For each image in the test set, you must predict a list of instance segmentation masks and their associated detection score (Confidence). The submission.csv file uses the following format:

```
id,height,width,prediction_string
72e40acccadf,512,512,0 1.0 eNoLTDAwyrM3yI/PMwcAE94DZA==
```
where ```prediction_string``` has the format ```0 {confidence} {EncodedMask}```. Note that the metric has several "boilerplate" values needed to adapt it to this competition; namely, the ```height```, ```width```, and the leading ```0``` in prediction_string, which ordinarily is a class label.

**Separate** prediction strings multiple instance masks for the same image with a **space**

```
id,height,width,prediction_string
72e40acccadf,512,512,0 1.0 eNoLTDAwyrM3yI/PMwcAE94DZA== 0 0.5 eAndnnDS1A/mdmkE35Ek9d
```

https://www.kaggle.com/competitions/hubmap-hacking-the-human-vasculature/overview/evaluation

In [None]:
def encode_binary_mask(mask: np.ndarray) -> Text:
  """Converts a binary mask into OID challenge encoding ascii text."""

  # check input mask --
  if mask.dtype != bool:
    raise ValueError(
        "encode_binary_mask expects a binary mask, received dtype == %s" %
        mask.dtype)

  mask = np.squeeze(mask)
  if len(mask.shape) != 2:
    raise ValueError(
        "encode_binary_mask expects a 2d mask, received shape == %s" %
        mask.shape)

  # convert input mask to expected COCO API input --
  mask_to_encode = mask.reshape(mask.shape[0], mask.shape[1], 1)
  mask_to_encode = mask_to_encode.astype(np.uint8)
  mask_to_encode = np.asfortranarray(mask_to_encode)

  # RLE encode mask --
  encoded_mask = coco_mask.encode(mask_to_encode)[0]["counts"]

  # compress and base64 encoding --
  binary_str = zlib.compress(encoded_mask, zlib.Z_BEST_COMPRESSION)
  base64_str = base64.b64encode(binary_str)
  return base64_str

In [None]:
on_pc = os.path.isdir('data')
if on_pc: 
    data_directory = "data"
else:
    data_directory = "/kaggle/input/hubmap-hacking-the-human-vasculature"

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
device

In [None]:
import segmentation_models_pytorch as smp

class UNetInstanceSegmentation(nn.Module):
    def __init__(self, num_classes=1, num_channels=3):
        super(UNetInstanceSegmentation, self).__init__()
        self.model = smp.Unet(
            encoder_name="resnet34",  # You can choose different encoder backbones if desired.
            encoder_weights="imagenet",
            in_channels=num_channels,
            classes=num_classes,
            activation='sigmoid'
        )

    def forward(self, x):
        return self.model(x)

    def predict(self, x):
        with torch.no_grad():
            y_pred = self.forward(x)
        return (y_pred > 0.5) * 1.0

model = UNetInstanceSegmentation()
model.to(device)

inp = torch.randn(1, 3, 512, 512).to(device)
output_mask = model.predict(inp)


In [None]:
model_path = f"{data_directory}/models/onereg_20min"

# model.load_state_dict(torch.load(model_path))

model = UNetInstanceSegmentation()
model.load_state_dict(torch.load(model_path))


In [22]:
def convert_multiple_masks(masks: list[np.ndarray], confidence_values: list[int]) -> Text:
    strings = []
    for mask, confidence in zip(masks, confidence_values):
        strings.append(f"0 {confidence} {encode_binary_mask(mask)}")

    return ' '.join(strings)

In [None]:
def instance_confidence(masks, conf_matrixes) -> list:
    conf_values = []
    for mask, conf_matrix in zip(masks, conf_matrixes):
        num, denom = 0, 0
        for i in range(mask.shape[0]):
            for j in range(mask.shape[1]):
                if mask[i][j] == 1:
                    num += conf_matrix[i][j]
                    denom += 1
        conf_values.append(num/denom)
    return conf_values

In [32]:
submission_df = pd.DataFrame(columns=['id', 'height', 'width', 'prediction_string'])

tif_files = glob.glob(os.path.join(f'{data_directory}/test', '*.tif'))
rows = []
for file_path in tif_files:
    # Preparation of image
    image_id = Path(file_path).stem
    inp = torch.tensor(plt.imread(file_path) / 255) # imread returns (512, 512, 3)
    inp = inp.reshape(1, 3, 512, 512) # BATCH, CHANNELS, SIZEY, SIZEX
    inp = inp.float() # from float64 to float32

    # Prediction + post preparation
    with torch.no_grad():
        conf_matrix = model.forward(inp).numpy().reshape(512, 512)
    predicted = model.predict(inp).numpy().reshape(512, 512).astype(bool)

    # plt.imshow(predicted.numpy().reshape(512, 512, 1), vmax=1, vmin=0, cmap="gray")
    
    # Mask separation - TODO
    masks = []
    masks.append(predicted)
    #

    # Confidence
    conf_matrixes = []
    conf_matrixes.append(conf_matrix)

    # confidence =  sum of all pixels confidence over treshold divided by the number of  pixels confidence over treshold
    confidence_values = instance_confidence(masks, conf_matrixes)

    # Prediction string
    prediction_str = convert_multiple_masks(masks, confidence_values)  

    # Add row to list  
    new_row = {
        'id': image_id,
        'height': 512,
        'width': 512,
        'prediction_string': prediction_str
    }
    rows.append(new_row)


res = submission_df.from_dict(rows)
res.set_index("id", inplace=True)
#res.reset_index(inplace=True)

res.head()jjijfef

bool


Unnamed: 0_level_0,height,width,prediction_string
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
72e40acccadf,512,512,0 0.6643457779663025 b'eNpEfWeDHLeu7F8CU8cJO7E...
