<a href="https://colab.research.google.com/github/emely3h/Geospatial_ML/blob/fix%2Fcreate_mmaps/jaccard_index/data_exploration/physics_jaccard.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Physics Jaccard Index

Calculate the physics jaccard index which will be the main success metric.

### 0. Prepare Colab

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [16]:
! ls
#%cd drive/MyDrive/MachineLearning/Geospatial_ML
#! git pull
! git checkout fix/create_mmaps
! ls

architecture.drawio  evaluation		   models	 requirements.txt
data_exploration     experiment_1_2.ipynb  prepare_data
debug_mmaps.ipynb    experiments	   README.md
Already on 'fix/create_mmaps'
Your branch is up to date with 'origin/fix/create_mmaps'.
architecture.drawio  evaluation		   models	 requirements.txt
data_exploration     experiment_1_2.ipynb  prepare_data
debug_mmaps.ipynb    experiments	   README.md


In [3]:
import numpy as np
import os
import matplotlib.pyplot as plt
import pandas as pd
import pickle
from data_exploration.mask_stats import Mask_Stats
from prepare_data.create_mask import create_physical_mask
from tensorflow.keras.utils import to_categorical
from tensorflow import keras
from datetime import datetime
from typing import Tuple
from evaluation.jaccard_calculator import JaccardIndexCalculator

In [4]:
def jaccards_entire_dataset(train_jaccard, val_jaccard, test_jaccard):

  total_intersection = train_jaccard.intersections_sum + val_jaccard.intersections_sum + test_jaccard.intersections_sum
  total_union = train_jaccard.unions_sum + val_jaccard.unions_sum + test_jaccard.unions_sum

  total_mean_jaccard = (total_intersection + 1.0) / (total_union + 1.0)

  print(f'total_mean_jaccard: {total_mean_jaccard}')

  labels = ["invalid", "valid", "land"]

  for label in range (0,3):
    total_intersection = train_jaccard.intersections[label] + val_jaccard.intersections[label] + test_jaccard.intersections[label]
    total_union = train_jaccard.unions[label] + val_jaccard.unions[label] + test_jaccard.unions[label]

    jaccard = (total_intersection + 1.0) / (total_union + 1.0)

    print(f'total_jaccard_{labels[label]}: {jaccard}')

### 2. Jaccard index for overlapping tiles

In [9]:
data_path = "../data_colab/256_200"
train_tiles = 11063
val_tiles = 3545
test_tiles = 3699

train_split_x = np.memmap(os.path.join(data_path, "train_split_x.npy"), mode="r", shape=(train_tiles, 256, 256, 5), dtype=np.uint8)
train_split_y = np.memmap(os.path.join(data_path, "train_split_y.npy"), mode="r", shape=(train_tiles, 256, 256), dtype=np.uint8)
val_split_x = np.memmap(os.path.join(data_path, "val_split_x.npy"), mode="r", shape=(val_tiles, 256, 256, 5), dtype=np.uint8)
val_split_y = np.memmap(os.path.join(data_path, "val_split_y.npy"), mode="r", shape=(val_tiles, 256, 256), dtype=np.uint8)
test_split_x = np.memmap(os.path.join(data_path, "test_split_x.npy"), mode="r", shape=(test_tiles, 256, 256, 5), dtype=np.uint8)
test_split_y = np.memmap(os.path.join(data_path, "test_split_y.npy"), mode="r", shape=(test_tiles, 256, 256), dtype=np.uint8)

train_stats = Mask_Stats(train_split_y)
train_stats.print_stats()
print()
val_stats = Mask_Stats(val_split_y)
val_stats.print_stats()
print()
test_stats = Mask_Stats(test_split_y)
test_stats.print_stats()

Shape: (11063, 256, 256)
Land pixels: 326666615  45.056 %
Valid pixels: 231026701  31.865 %
Invalid pixels: 167331452  23.079 %
Sum: 11063

Shape: (3545, 256, 256)
Land pixels: 100682317  43.337 %
Valid pixels: 76811432  33.062 %
Invalid pixels: 54831371  23.601 %
Sum: 3545

Shape: (3699, 256, 256)
Land pixels: 112712687  46.495 %
Valid pixels: 71301683  29.413 %
Invalid pixels: 58403294  24.092 %
Sum: 3699


In [10]:
start = datetime.now()
print("Calculate Intersection and Union for training set \n")
train_jaccard = JaccardIndexCalculator(train_split_x, train_split_y, train_tiles, chunk_size=1000)

print("\n\nCalculate Intersection and Union for validation set \n")
val_jaccard = JaccardIndexCalculator(val_split_x, val_split_y, val_tiles, chunk_size=1000)

print("\n\nCalculate Intersection and Union for testing set \n")
test_jaccard = JaccardIndexCalculator(test_split_x, test_split_y, test_tiles, chunk_size=1000)

end = datetime.now()
print(f'time needed: {end - start}')

Calculate Intersection and Union for training set 

Chunk No.0
copying chunk from mmap [0:1000]
shape x_input_chunk: (1000, 256, 256, 5), shape y_mask_chunk: (1000, 256, 256)
shape y_true: (1000, 256, 256, 3), shape pred_physical: (1000, 256, 256, 3)
chunk_jaccard_invalid: 0.5083847641944885, chunk_intersection: 2094040.0, chunk_union: 4119007.0
chunk_jaccard_valid: 0.9374726414680481, chunk_intersection: 30360340.0, chunk_union: 32385308.0
chunk_jaccard_land: 1.0, chunk_intersection: 31056652.0, chunk_union: 31056652.0


Chunk No.1
copying chunk from mmap [1000:2000]
shape x_input_chunk: (1000, 256, 256, 5), shape y_mask_chunk: (1000, 256, 256)
shape y_true: (1000, 256, 256, 3), shape pred_physical: (1000, 256, 256, 3)
chunk_jaccard_invalid: 0.6571222543716431, chunk_intersection: 3616129.0, chunk_union: 5502978.0
chunk_jaccard_valid: 0.9423701763153076, chunk_intersection: 30853776.0, chunk_union: 32740612.0
chunk_jaccard_land: 0.9999964237213135, chunk_intersection: 29179200.0, chun

In [11]:
jaccards_entire_dataset(train_jaccard, val_jaccard, test_jaccard) 

total_mean_jaccard: 0.911575198173523
total_jaccard_invalid: 0.8021949294373217
total_jaccard_valid: 0.8723119327500691
total_jaccard_land: 0.9999978669101273


### 3. Jaccard index for non-overlapping tiles

In [23]:
total_tiles = 11121
train_tiles = 6672
test_val_tiles = 2224
data_path = "../data_colab/256_256"

train_split_x = np.memmap(os.path.join(data_path, "train_split_x.npy"), mode="r", shape=(train_tiles, 256, 256, 5), dtype=np.uint8)
train_split_y = np.memmap(os.path.join(data_path, "train_split_y.npy"), mode="r", shape=(train_tiles, 256, 256), dtype=np.uint8)
val_split_x = np.memmap(os.path.join(data_path, "val_split_x.npy"), mode="r", shape=(test_val_tiles, 256, 256, 5), dtype=np.uint8)
val_split_y = np.memmap(os.path.join(data_path, "val_split_y.npy"), mode="r", shape=(test_val_tiles, 256, 256), dtype=np.uint8)
test_split_x = np.memmap(os.path.join(data_path, "test_split_x.npy"), mode="r", shape=(test_val_tiles, 256, 256, 5), dtype=np.uint8)
test_split_y = np.memmap(os.path.join(data_path, "test_split_y.npy"), mode="r", shape=(test_val_tiles, 256, 256), dtype=np.uint8)

train_stats = Mask_Stats(train_split_y)
train_stats.print_stats()
print()
val_stats = Mask_Stats(val_split_y)
val_stats.print_stats()
print()
test_stats = Mask_Stats(test_split_y)
test_stats.print_stats()

Shape: (6672, 256, 256)
Land pixels: 195058814  44.610 %
Valid pixels: 138904480  31.767 %
Invalid pixels: 103292898  23.623 %
Sum: 6672

Shape: (2224, 256, 256)
Land pixels: 65320265  44.816 %
Valid pixels: 46246663  31.730 %
Invalid pixels: 34185136  23.454 %
Sum: 2224

Shape: (2224, 256, 256)
Land pixels: 64786699  44.450 %
Valid pixels: 46892391  32.173 %
Invalid pixels: 34072974  23.377 %
Sum: 2224


In [24]:
start = datetime.now()
print("Calculate Intersection and Union for training set \n")
train_jaccard = JaccardIndexCalculator(train_split_x, train_split_y, train_tiles, chunk_size=1000)

print("\n\nCalculate Intersection and Union for validation set \n")
val_jaccard = JaccardIndexCalculator(val_split_x, val_split_y, test_val_tiles, chunk_size=1000)

print("\n\nCalculate Intersection and Union for testing set \n")
test_jaccard = JaccardIndexCalculator(test_split_x, test_split_y, test_val_tiles, chunk_size=1000)

end = datetime.now()
print(f'time needed: {end - start}')

Calculate Intersection and Union for training set 

Chunk No.0
copying chunk from mmap [0:1000]
shape x_input_chunk: (1000, 256, 256, 5), shape y_mask_chunk: (1000, 256, 256)
shape y_true: (1000, 256, 256, 3), shape pred_physical: (1000, 256, 256, 3)
chunk_jaccard_invalid: 0.8029453754425049, chunk_intersection: 16969458.0, chunk_union: 21134014.0
chunk_jaccard_valid: 0.8042755722999573, chunk_intersection: 17113096.0, chunk_union: 21277652.0
chunk_jaccard_land: 1.0, chunk_intersection: 27288892.0, chunk_union: 27288892.0


Chunk No.1
copying chunk from mmap [1000:2000]
shape x_input_chunk: (1000, 256, 256, 5), shape y_mask_chunk: (1000, 256, 256)
shape y_true: (1000, 256, 256, 3), shape pred_physical: (1000, 256, 256, 3)
chunk_jaccard_invalid: 0.8608508110046387, chunk_intersection: 12112116.0, chunk_union: 14069936.0
chunk_jaccard_valid: 0.9152489304542542, chunk_intersection: 21143014.0, chunk_union: 23100834.0
chunk_jaccard_land: 1.0, chunk_intersection: 30323052.0, chunk_union: 30

In [25]:
jaccards_entire_dataset(train_jaccard, val_jaccard, test_jaccard) 

total_mean_jaccard: 0.9108173847198486
total_jaccard_invalid: 0.8017360591242783
total_jaccard_valid: 0.8721593017004603
total_jaccard_land: 0.9999975766207699
