<a href="https://colab.research.google.com/github/emely3h/Geospatial_ML/blob/main/data_exploration/physics_jaccard.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Physics Jaccard Index

Calculate the physics jaccard index which will be the main benchmark metric.

### 0. Prepare Colab

In [1]:
from google.colab import drive

drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
! ls
%cd drive/MyDrive/MachineLearning/Geospatial_ML
! git pull
! ls

drive  sample_data
/content/drive/.shortcut-targets-by-id/15HUD3sGdfvxy5Y_bjvuXgrzwxt7TzRfm/MachineLearning/Geospatial_ML
You are not currently on a branch.
Please specify which branch you want to merge with.
See git-pull(1) for details.

    git pull <remote> <branch>

data_exploration  experiments	     models	   pyproject.toml    scripts
docs		  image_processing   poetry.lock   README.md	     sripts
evaluation	  metrics_bug.ipynb  prepare_data  requirements.txt


In [3]:
import numpy as np
import os
import pandas as pd
import pickle
from data_exploration.mask_stats import Mask_Stats
from sklearn.metrics import jaccard_score, recall_score, precision_score, f1_score, accuracy_score


In [4]:
class EvaluationMetrics:
    def __init__(self, iou, recall, precision, f1, accuracy):
        self.iou_invalid = iou[0]
        self.iou_valid = iou[1]
        self.iou_land = iou[2]
        self.mean_iou = iou.sum() / 3

        self.precision_invalid = precision[0]
        self.precision_valid = precision[1]
        self.precision_land = precision[2]
        self.mean_precision = precision.sum() / 3

        self.recall_invalid = recall[0]
        self.recall_valid = recall[1]
        self.recall_land = recall[2]
        self.mean_recall = recall.sum() / 3

        self.f1_invalid = f1[0]
        self.f1_valid = f1[1]
        self.f1_land = f1[2]
        self.mean_f1 = f1.sum() / 3

        self.mean_accuracy = accuracy

    def print_metrics(self):
        print(f"mean iou: {self.mean_iou}")
        print(f"iou invalid: {self.iou_invalid}")
        print(f"iou valid: {self.iou_valid}")
        print(f"iou land: {self.iou_land}\n")

        print(f"mean precision: {self.mean_precision}")
        print(f"precision_invalid: {self.precision_invalid}")
        print(f"precision_valid: {self.precision_valid}")
        print(f"precision_land: {self.precision_land}\n")

        print(f"mean recall: {self.mean_recall}")
        print(f"recall_invalid: {self.recall_invalid}")
        print(f"recall_valid: {self.recall_valid}")
        print(f"recall_land: {self.recall_land}\n")

        print(f"mean f1: {self.mean_f1}")
        print(f"f1_invalid: {self.f1_invalid}")
        print(f"f1_valid: {self.f1_valid}")
        print(f"f1_land: {self.f1_land}\n")

        print(f"mean accuracy: {self.mean_accuracy}")

In [5]:
def label_pixels(img):
    mask1 = img == 0
    mask2 = (img == 255) | (img == 253)
    img[mask1] = 2
    img[mask2] = 0
    img[~(mask1 | mask2)] = 1
    return img


def get_save_metrics(mmap_true, mmap_pred, saving_path):
    print('start')
    true = np.copy(mmap_true)
    pred = np.copy(mmap_pred)
    print('copied arrays')

    pred = pred[:, :, :, 4]
    pred = label_pixels(pred)

    pred = pred.flatten()
    true = true.flatten()

    print(pred.shape)
    print(true.shape)
    print(type(pred[0]))
    print(type(true[0]))
    print(np.max(true))
    print(np.max(pred))
    print(np.min(true))
    print(np.min(pred))

    print('start calculate iou...')
    iou = jaccard_score(true, pred, average=None)
    print('start calculate recall...')
    recall = recall_score(true, pred, average=None)
    print('start calculate precision...')
    precision = precision_score(true, pred, average=None)
    print('start calculate f1...')
    f1 = f1_score(true, pred, average=None)
    print('start calculate accuracy...')
    accuracy = accuracy_score(true, pred)

    metrics = EvaluationMetrics(iou, recall, precision, f1, accuracy)

    with open(saving_path, 'wb') as file:
        pickle.dump(metrics, file)
    print('saving complete')
    return metrics


### 2. Jaccard index for overlapping tiles

In [9]:
data_path = "../data_colab/256_200"
train_tiles = 11063
val_tiles = 3545
test_tiles = 3699

train_split_x = np.memmap(os.path.join(data_path, "train_split_x.npy"), mode="r", shape=(train_tiles, 256, 256, 5),
                          dtype=np.uint8)
train_split_y = np.memmap(os.path.join(data_path, "train_split_y.npy"), mode="r", shape=(train_tiles, 256, 256),
                          dtype=np.uint8)
val_split_x = np.memmap(os.path.join(data_path, "val_split_x.npy"), mode="r", shape=(val_tiles, 256, 256, 5),
                        dtype=np.uint8)
val_split_y = np.memmap(os.path.join(data_path, "val_split_y.npy"), mode="r", shape=(val_tiles, 256, 256),
                        dtype=np.uint8)
test_split_x = np.memmap(os.path.join(data_path, "test_split_x.npy"), mode="r", shape=(test_tiles, 256, 256, 5),
                         dtype=np.uint8)
test_split_y = np.memmap(os.path.join(data_path, "test_split_y.npy"), mode="r", shape=(test_tiles, 256, 256),
                         dtype=np.uint8)

train_stats = Mask_Stats(train_split_y)
train_stats.print_stats()
print()
val_stats = Mask_Stats(val_split_y)
val_stats.print_stats()
print()
test_stats = Mask_Stats(test_split_y)
test_stats.print_stats()

Shape: (11063, 256, 256)
Land pixels: 326666615  45.056 %
Valid pixels: 231026701  31.865 %
Invalid pixels: 167331452  23.079 %
Sum: 11063

Shape: (3545, 256, 256)
Land pixels: 100682317  43.337 %
Valid pixels: 76811432  33.062 %
Invalid pixels: 54831371  23.601 %
Sum: 3545

Shape: (3699, 256, 256)
Land pixels: 112712687  46.495 %
Valid pixels: 71301683  29.413 %
Invalid pixels: 58403294  24.092 %
Sum: 3699


In [10]:
print('Calc test metrics...')
test_metrics = get_save_metrics(test_split_y, test_split_x, '../metrics/physics_iou_overlapping_test_data.pkl')
print('Calc val metrics...')
val_metrics = get_save_metrics(val_split_y, val_split_x, '../metrics/physics_iou_overlapping_val_data.pkl')
print('Calc train metrics...')
train_metrics = get_save_metrics(train_split_y, train_split_x, '../metrics/physics_iou_overlapping_train_data.pkl')
print('METRICS TEST DATA')
test_metrics.print_metrics()
print()
print()
print('METRICS VAL DATA')
val_metrics.print_metrics()
print()
print()
print('METRICS TRAIN DATA')
train_metrics.print_metrics()
print()
print()


Calc test metrics...
start
copied arrays
(242417664,)
(242417664,)
<class 'numpy.uint8'>
<class 'numpy.uint8'>
2
2
0
0
start calculate iou...
start calculate recall...
start calculate precision...
start calculate f1...
start calculate accuracy...
saving complete
Calc val metrics...
start
copied arrays
(232325120,)
(232325120,)
<class 'numpy.uint8'>
<class 'numpy.uint8'>
2
2
0
0
start calculate iou...
start calculate recall...
start calculate precision...
start calculate f1...
start calculate accuracy...
saving complete
Calc train metrics...
start
copied arrays
(725024768,)
(725024768,)
<class 'numpy.uint8'>
<class 'numpy.uint8'>
2
2
0
0
start calculate iou...
start calculate recall...
start calculate precision...
start calculate f1...
start calculate accuracy...
saving complete
METRICS TEST DATA
mean iou: 0.8674488378312623
iou invalid: 0.7642157135862919
iou valid: 0.8381307999074951
iou land: 1.0

mean precision: 0.946043599969165
precision_invalid: 1.0
precision_valid: 0.83813079990

### 3. Jaccard index for non-overlapping tiles

In [6]:
total_tiles = 11121
train_tiles = 6672
test_val_tiles = 2224
data_path = "../data_colab/256_256"

train_split_x = np.memmap(os.path.join(data_path, "train_split_x.npy"), mode="r", shape=(train_tiles, 256, 256, 5),
                          dtype=np.uint8)
train_split_y = np.memmap(os.path.join(data_path, "train_split_y.npy"), mode="r", shape=(train_tiles, 256, 256),
                          dtype=np.uint8)
val_split_x = np.memmap(os.path.join(data_path, "val_split_x.npy"), mode="r", shape=(test_val_tiles, 256, 256, 5),
                        dtype=np.uint8)
val_split_y = np.memmap(os.path.join(data_path, "val_split_y.npy"), mode="r", shape=(test_val_tiles, 256, 256),
                        dtype=np.uint8)
test_split_x = np.memmap(os.path.join(data_path, "test_split_x.npy"), mode="r", shape=(test_val_tiles, 256, 256, 5),
                         dtype=np.uint8)
test_split_y = np.memmap(os.path.join(data_path, "test_split_y.npy"), mode="r", shape=(test_val_tiles, 256, 256),
                         dtype=np.uint8)

train_stats = Mask_Stats(train_split_y)
train_stats.print_stats()
print()
val_stats = Mask_Stats(val_split_y)
val_stats.print_stats()
print()
test_stats = Mask_Stats(test_split_y)
test_stats.print_stats()

Shape: (6672, 256, 256)
Land pixels: 195058814  44.610 %
Valid pixels: 138904480  31.767 %
Invalid pixels: 103292898  23.623 %
Sum: 6672

Shape: (2224, 256, 256)
Land pixels: 65320265  44.816 %
Valid pixels: 46246663  31.730 %
Invalid pixels: 34185136  23.454 %
Sum: 2224

Shape: (2224, 256, 256)
Land pixels: 64786699  44.450 %
Valid pixels: 46892391  32.173 %
Invalid pixels: 34072974  23.377 %
Sum: 2224


In [7]:
print('Calc test metrics...')
test_metrics = get_save_metrics(test_split_y, test_split_x, '../metrics/physics_iou_non-overlapping_test_data.pkl')
print('Calc val metrics...')
val_metrics = get_save_metrics(val_split_y, val_split_x, '../metrics/physics_iou_non-overlapping_val_data.pkl')
print('Calc train metrics...')
train_metrics = get_save_metrics(train_split_y, train_split_x, '../metrics/physics_iou_non-overlapping_train_data.pkl')
print('METRICS TEST DATA')
test_metrics.print_metrics()
print()
print()
print('METRICS VAL DATA')
val_metrics.print_metrics()
print()
print()
print('METRICS TRAIN DATA')
train_metrics.print_metrics()
print()
print()


Calc test metrics...
start
copied arrays
(145752064,)
(145752064,)
<class 'numpy.uint8'>
<class 'numpy.uint8'>
2
2
0
0
start calculate iou...
start calculate recall...
start calculate precision...
start calculate f1...
start calculate accuracy...
saving complete
Calc val metrics...
start
copied arrays
(145752064,)
(145752064,)
<class 'numpy.uint8'>
<class 'numpy.uint8'>
2
2
0
0
start calculate iou...
start calculate recall...
start calculate precision...
start calculate f1...
start calculate accuracy...
saving complete
Calc train metrics...
start
copied arrays
(437256192,)
(437256192,)
<class 'numpy.uint8'>
<class 'numpy.uint8'>
2
2
0
0
start calculate iou...
start calculate recall...
start calculate precision...
start calculate f1...
start calculate accuracy...
saving complete
METRICS TEST DATA
mean iou: 0.8974743870995937
iou invalid: 0.81239073644316
iou valid: 0.8800332274895075
iou land: 0.9999991973661136

mean precision: 0.9600107266083836
precision_invalid: 0.9999989523356431
p

### Summary

In [11]:
metrics = []
labels = ['non-o test', 'non-o val', 'non-o train', 'overlap test', 'overlap val', 'overlap train']
with open('../metrics/physics_iou_non-overlapping_test_data.pkl', 'rb') as file:
    metrics.append(pickle.load(file).__dict__)
with open('../metrics/physics_iou_non-overlapping_val_data.pkl', 'rb') as file:
    metrics.append(pickle.load(file).__dict__)
with open('../metrics/physics_iou_non-overlapping_train_data.pkl', 'rb') as file:
    metrics.append(pickle.load(file).__dict__)
with open('../metrics/physics_iou_overlapping_test_data.pkl', 'rb') as file:
    metrics.append(pickle.load(file).__dict__)
with open('../metrics/physics_iou_overlapping_val_data.pkl', 'rb') as file:
    metrics.append(pickle.load(file).__dict__)
with open('../metrics/physics_iou_overlapping_train_data.pkl', 'rb') as file:
    metrics.append(pickle.load(file).__dict__)

In [13]:
df = pd.DataFrame(metrics)
df.index = labels
df.transpose()

Unnamed: 0,non-o test,non-o val,non-o train,overlap test,overlap val,overlap train
iou_invalid,0.812391,0.792307,0.801342,0.764216,0.820772,0.809363
iou_valid,0.880033,0.866908,0.871285,0.838131,0.886571,0.878673
iou_land,0.999999,1.0,0.999996,1.0,1.0,0.999996
mean_iou,0.897474,0.886405,0.890874,0.867449,0.902448,0.896011
precision_invalid,0.999999,1.0,0.999998,1.0,1.0,0.999998
precision_valid,0.880033,0.866908,0.871285,0.838131,0.886571,0.878673
precision_land,1.0,1.0,1.0,1.0,1.0,1.0
mean_precision,0.960011,0.955636,0.957094,0.946044,0.96219,0.959557
recall_invalid,0.812391,0.792307,0.801343,0.764216,0.820772,0.809365
recall_valid,1.0,1.0,1.0,1.0,1.0,1.0
