<a href="https://colab.research.google.com/github/emely3h/Geospatial_ML/blob/main/data_exploration/physical_jaccard.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Physical Jaccard Index

Calculate the physical jaccard index which will be the main success metric.

### 0. Get Stats for each image

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
! ls
%cd drive/MyDrive/MachineLearning/Geospatial_ML
! ls

drive  sample_data
/content/drive/.shortcut-targets-by-id/15HUD3sGdfvxy5Y_bjvuXgrzwxt7TzRfm/MachineLearning/Geospatial_ML
architecture.drawio  evaluation   models	README.md
data_exploration     experiments  prepare_data	requirements.txt


In [3]:
import numpy as np
import os
import matplotlib.pyplot as plt
import pandas as pd
import pickle
from data_exploration.mask_stats import Mask_Stats
from prepare_data.create_mask import create_physical_mask
from tensorflow.keras.utils import to_categorical
from tensorflow import keras

In [4]:
total_tiles = 11121
train_tiles = total_tiles // 100 * 60 +1
test_val_tiles = total_tiles // 100 * 20 +1
data_path = "../data_colab/256_256"

train_split_x = np.memmap(os.path.join(data_path, "train_split_x.npy"), mode="r", shape=(train_tiles, 256, 256, 5), dtype=np.float32)
train_split_y = np.memmap(os.path.join(data_path, "train_split_y.npy"), mode="r", shape=(train_tiles, 256, 256), dtype=np.float32)

train_stats = Mask_Stats(train_split_y)
train_stats.print_stats()


Shape: (6661, 256, 256)
Land pixels: 176919986  40.528 %
Valid pixels: 125877821  28.836 %
Invalid pixels: 133737489  30.636 %
Sum: 6661


In [6]:
x_input = np.zeros((train_tiles, 256, 256, 5), dtype=np.float32)
print(x_input.shape)
print(np.max(x_input))
print(np.min(x_input))

np.copyto(x_input, train_split_x[0:train_tiles])
print(x_input.shape)
print(np.max(x_input))
print(np.min(x_input))

(6661, 256, 256, 5)
0.0
0.0
(6661, 256, 256, 5)
255.0
0.0


In [7]:
y_mask = np.zeros((train_tiles, 256, 256), dtype=np.float32)
print(y_mask.shape)
print(np.max(y_mask))
print(np.min(y_mask))

np.copyto(y_mask, train_split_y[0:train_tiles])
print(y_mask.shape)
print(np.max(y_mask))
print(np.min(y_mask))

# why is max 128 not 255/ 254??? => dtype np.uni8/ np.float32

(6661, 256, 256)
0.0
0.0
(6661, 256, 256)
2.0
0.0


In [8]:
pred_physical = create_physical_mask(x_input)
print(pred_physical.shape)

(6661, 256, 256, 3)


In [9]:
y_one_hot = to_categorical(y_mask, num_classes=3)
print(y_one_hot.shape)

(6661, 256, 256, 3)


In [10]:
def jaccard_coef(y_true, y_pred):
    y_true_f = keras.backend.flatten(y_true)
    y_pred_f = keras.backend.flatten(y_pred)

    intersection = keras.backend.sum(y_true_f * y_pred_f)
    return (intersection + 1.0) / (
            keras.backend.sum(y_true_f) + keras.backend.sum(y_pred_f) - intersection + 1.0
    ) 

In [11]:
jaccard = jaccard_coef(y_one_hot, pred_physical)
print(jaccard)

tf.Tensor(0.7695766, shape=(), dtype=float32)


In [None]:
# todo: over entire dataset, not only training and both for non-overlapping and overlapping => should be equal

In [None]:


data_path = "../data_colab/256_200"
train_total_tiles = 11063
val_total_tiles = 3545
test_total_tiles = 3699

train_split_x = np.memmap(os.path.join(data_path, "train_split_x.npy"), mode="r", shape=(train_total_tiles, 256, 256, 5), dtype=np.uint8)
train_split_y = np.memmap(os.path.join(data_path, "train_split_y.npy"), mode="r", shape=(train_total_tiles, 256, 256), dtype=np.uint8)
val_split_x = np.memmap(os.path.join(data_path, "val_split_x.npy"), mode="r", shape=(val_total_tiles, 256, 256, 5), dtype=np.uint8)
val_split_y = np.memmap(os.path.join(data_path, "val_split_y.npy"), mode="r", shape=(val_total_tiles, 256, 256), dtype=np.uint8)
test_split_x = np.memmap(os.path.join(data_path, "test_split_x.npy"), mode="r", shape=(test_total_tiles, 256, 256, 5), dtype=np.uint8)
test_split_y = np.memmap(os.path.join(data_path, "test_split_y.npy"), mode="r", shape=(test_total_tiles, 256, 256), dtype=np.uint8)
