# Preprocess mask

Combine the multiple labels into 10 categories:
- hard coral
- hard coral bleached
- dead coral
- other invertebrates
- sand/rubble
- other
- (macro) algae
- seagrass
- unknown
- no label

## Imports

In [27]:
# load custom scripts
from preprocess_inference import *

# import the necessary packages
from imutils import paths
from PIL import Image
from skimage import io
import cv2
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import plotly.offline as pyo
pyo.init_notebook_mode()
import plotly.express as px
import time
import warnings
warnings.filterwarnings('ignore')

## Load masks and labels

In [None]:
# for scene_3
#%cp -r /home/jonathan/aerial_wildlife_detection/export/scene_3/. /data/jantina/CoralNet/inference/segmentation_masks/scene_3/


In [28]:
# choose scene
scene = 'scene_3'

In [None]:
# load the paths of segmentation masks 
maskPaths = sorted(list(paths.list_images("/data/jantina/data/CoralNet/inference/segmentation_masks/"+scene)))

In [None]:
pd.read_csv('/data/jantina/data/CoralNet/inference/labelclasses.csv',usecols=["name","labelclass_index"])

## Old classes distribution

In [None]:
classes = {
1: "Branching Dead Unknown",
2: "Massive Dead Uknown",
3: "Encrusting Dead",
29: "Dead Unknown",
5: "Acropora Unknown",
6: "Stylophora Unknown",
7: "Stylophora Pistillata",
8: "Stylophora Wellsi",
9: "Pocilopora Unknown",
10: "Pocilopora Verrucosa",
11: "Pocilopora Damicornis",
12: "Seriatopora Hystrix",
4: "Branching Unknown",
118: "Lobophyllia Unknown",
13: "Massive Unknown",
14: "Encrusting Unknown",
15: "Soft Coral Unknown",
17: "Millepora Alcicornis",
18: "Millepora Dichotoma",
16: "Hydrozoans Unknown",
19: "Unidentifiable",
20: "Sand",
21: "Rock",
22: "Fish",
23: "Human",
24: "Background",
25: "Macroalgae",
185: "Trash",
0: "Unkown"
    }

## Remapping segmentation masks into masks

In [81]:
# slightly remapped

classes = {
1: "Branching Dead Unknown",
2: "Massive Dead Uknown",
3: "Encrusting Dead",
4: "Branching Unknown",
5: "Acropora Unknown",
6: "Stylophora Unknown",
7: "Stylophora Pistillata",
8: "Stylophora Wellsi",
9: "Pocilopora Unknown",
10: "Pocilopora Verrucosa",
11: "Pocilopora Damicornis",
12: "Seriatopora Hystrix",
13: "Massive Unknown",
14: "Encrusting Unknown",
15: "Soft Coral Unknown",
16: "Hydrozoans Unknown",
17: "Millepora Alcicornis",
18: "Millepora Dichotoma",
19: "Sand",
20: "Rock",
21: "Fish",
22: "Human",
23: "Background",
24: "Macroalgae",
25: "Dead Unknown",
26: "Lobophyllia Unknown",
27: "Trash",
0: "Unkown"
    }

In [31]:
maskPaths = sorted(list(paths.list_images('/data/jantina/data/CoralNet/inference/segmentation_masks/'+scene+'/')))
startTime = time.time()

for masks in maskPaths:
    mask = io.imread(masks)
    new_mask = merge_mask2(mask)
    im = Image.fromarray(new_mask) 
    maskPath = '/data/jantina/data/CoralNet/inference/masks/'+scene+'/' + masks.split(os.path.sep)[-1].replace('..tiff','.png.tif')
    im.save(maskPath)

endTime = time.time()
print("[INFO] total time taken to write the new masks: {:.2f}s".format(endTime - startTime))

[INFO] total time taken to write the new masks: 0.23s


### Old classes distribution

In [None]:
count = pd.DataFrame()

new_maskPaths = sorted(list(paths.list_images("/data/jantina/data/CoralNet/inference/masks/total/")))
for masks in new_maskPaths:
    mask = io.imread(masks)
    unique, counts = np.unique(mask, return_counts=True)
    count = count.append(pd.DataFrame(dict(zip(unique, counts)).items(),
                                      columns=['label', 'pixel count']),
                         ignore_index = True)

In [None]:
orderClasses = classes.values()
d = pd.DataFrame(count.groupby('label')['pixel count'].sum()).reset_index()
d = d.drop([0])

for old, new in classes.items():
    d.label[d.label == old] = new
    
mapping = {label: i for i, label in enumerate(orderClasses)}
key = d['label'].map(mapping)
d = d.iloc[key.argsort()].set_index('label')

fig = plt.figure(figsize = (12,6))
colors = ['#88CCFF']
ax = plt.subplot(1,1,1)
d.plot(kind = 'bar', ax =ax, color = colors)
ax.set_xticklabels(d.index, rotation = 80, fontsize = 15)
ax.set_ylabel('Pixel Occurcences', fontsize = 16, labelpad=20)
ax.set_xlabel('Semantic Class', fontsize= 16)
ax.get_legend().remove()
plt.savefig('histogram.pdf', bbox_inches="tight") 

## New label classes

In [None]:
classes = {1: 'hard coral',
           2: 'hard coral bleached',
           3: 'dead coral',
           4: 'other invertebrates',
           5: 'sand/rubble',
           6: 'other',
           7: '(macro) algae',
           8: 'seagrass',
           9: 'unknown',
           0: 'no label'
          }

In [None]:
hard_coral = [4,5,6,7,8,9,10,11,12,13,14,17,18,118]

hard_coral_bleached = []

dead_coral = [1,2,3,29]

other_invertebrates = [15,16]

sand_rubble = [20,21,24]

other = [22,23,185]

algae = [25]

seagrass = []

unknown = []

no_label = [19]

### Creating new labels + cropped images and saving to disk

In [32]:
startTime = time.time()

for masks in maskPaths:
    mask = io.imread(masks)
    new_mask = merge_mask(mask)
    im = Image.fromarray(new_mask) 
    maskPath = '/data/jantina/data/CoralNet/inference/labels/'+scene+'/' + masks.split(os.path.sep)[-1].replace('..tiff','.png.tif')
    im.save(maskPath)
    
endTime = time.time()
print("[INFO] total time taken to write the new masks: {:.2f}s".format(endTime - startTime))

[INFO] total time taken to write the new masks: 0.21s


In [None]:
startTime = time.time()

# load the image paths corresponding to the images
directory = "/data/jonathan/labeling_test/"+scene+"/"

# iterate over the selected test image paths
for masks in maskPaths:
    y = int(masks.split(os.path.sep)[-1].split("_")[1])
    x = int(masks.split(os.path.sep)[-1].split("_")[2].split(".")[0])
    name = masks.split(os.path.sep)[-1].split("_")[0] + ".png"
    image = io.imread(directory+name)[y:y+500,x:x+800]
    im = Image.fromarray(image)
    imagePath = '/data/jantina/data/CoralNet/inference/images/'+scene+'/' + masks.split(os.path.sep)[-1].replace('..tiff','.png')
    im.save(imagePath)

endTime = time.time()
print("[INFO] total time taken to write the new masks: {:.2f}s".format(endTime - startTime))

### New classes distribution

In [96]:
count = pd.DataFrame()

new_maskPaths = sorted(list(paths.list_images("/data/jantina/data/CoralNet/inference/labels/total/")))
for masks in new_maskPaths:
    mask = io.imread(masks)
    unique, counts = np.unique(mask, return_counts=True)
    count = count.append(pd.DataFrame(dict(zip(unique, counts)).items(),
                                      columns=['label', 'pixel count']),
                         ignore_index = True)
    

In [None]:
orderClasses = classes.values()
d = pd.DataFrame(count.groupby('label')['pixel count'].sum()).reset_index()
d = d.drop([0])

for old, new in classes.items():
    d.label[d.label == old] = new
    
mapping = {label: i for i, label in enumerate(orderClasses)}
key = d['label'].map(mapping)
d = d.iloc[key.argsort()].set_index('label')

fig = plt.figure(figsize = (12,6))
colors = ['#88CCFF']
ax = plt.subplot(1,1,1)
d.plot(kind = 'bar', ax =ax, color = colors)
ax.set_xticklabels(d.index, rotation = 25, fontsize = 15)
ax.set_ylabel('Pixel Occurcences', fontsize = 16, labelpad=20)
ax.set_xlabel('Semantic Class', fontsize= 16)
ax.get_legend().remove()
plt.savefig('histogram.pdf', bbox_inches="tight") 