<a href="https://colab.research.google.com/github/emely3h/Geospatial_ML/blob/feature%2Fphysics-jaccard-index/data_exploration/physics_jaccard.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Physics Jaccard Index

Calculate the physics jaccard index which will be the main success metric.

# Small Subset(256x256: Training)

### 0. Get Stats for each image

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
! ls
%cd drive/MyDrive/MachineLearning/Geospatial_ML
! ls

drive  sample_data
/content/drive/.shortcut-targets-by-id/15HUD3sGdfvxy5Y_bjvuXgrzwxt7TzRfm/MachineLearning/Geospatial_ML
architecture.drawio  experiment_1_2.ipynb  prepare_data
data_exploration     experiments	   README.md
evaluation	     models		   requirements.txt


In [3]:
# ! git checkout feature/physics-jaccard-index
! git pull

Already on 'feature/physics-jaccard-index'
Your branch is up to date with 'origin/feature/physics-jaccard-index'.
Already up to date.


In [4]:
import numpy as np
import os
import matplotlib.pyplot as plt
import pandas as pd
import pickle
from data_exploration.mask_stats import Mask_Stats
from prepare_data.create_mask import create_physical_mask
from tensorflow.keras.utils import to_categorical
from tensorflow import keras
from models.unet_model import unet_2d
from models.helpers import initialize_saved_data, jaccard_coef

### 1. Loading + Preparing Training Data

In [None]:
total_tiles = 11121
train_tiles = total_tiles // 100 * 60 +1
test_val_tiles = total_tiles // 100 * 20 +1
data_path = "../data_colab/256_256"

train_split_x = np.memmap(os.path.join(data_path, "train_split_x.npy"), mode="r", shape=(train_tiles, 256, 256, 5), dtype=np.float32)
train_split_y = np.memmap(os.path.join(data_path, "train_split_y.npy"), mode="r", shape=(train_tiles, 256, 256), dtype=np.float32)

train_stats = Mask_Stats(train_split_y)
train_stats.print_stats()


Shape: (6661, 256, 256)
Land pixels: 176919986  40.528 %
Valid pixels: 125877821  28.836 %
Invalid pixels: 133737489  30.636 %
Sum: 6661


### 2. Initializing training data

In [None]:
x_input,y_mask = initialize_saved_data(train_split_x, train_split_y, train_tiles)

Initializing saved data...
x_input shape: (6661, 256, 256, 5)
x_min: 0.0 x_max: 0.0

Copying saved data to x_input...
x_input shape: (6661, 256, 256, 5)
x_min: 0.0 x_max: 255.0

Initializing y_mask...
y_mask shape: (6661, 256, 256)
y_min: 0.0 y_max: 0.0

Copying saved data to y_mask...
y_mask shape: (6661, 256, 256)
y_min: 0.0 y_max: 2.0


### 3. Create Physical mask(training data)

In [None]:
pred_physical = create_physical_mask(x_input)

### 4. Encoding (training data)

In [None]:
y_one_hot = to_categorical(y_mask, num_classes=3)

### 5. Calculate Physics jaccard index(training data)

# All dataset(Overlap: 56px)

In [None]:
jaccard = jaccard_coef(y_one_hot, pred_physical)
print(jaccard)

tf.Tensor(0.7695766, shape=(), dtype=float32)


In [None]:
# todo: over entire dataset, not only training and both for non-overlapping and overlapping => should be equal

### 6. Loading all data(256_200)

In [None]:
data_path = "../data_colab/256_200"
train_total_tiles = 11063
val_total_tiles = 3545
test_total_tiles = 3699

train_split_x = np.memmap(os.path.join(data_path, "train_split_x.npy"), mode="r", shape=(train_total_tiles, 256, 256, 5), dtype=np.uint8)
train_split_y = np.memmap(os.path.join(data_path, "train_split_y.npy"), mode="r", shape=(train_total_tiles, 256, 256), dtype=np.uint8)
val_split_x = np.memmap(os.path.join(data_path, "val_split_x.npy"), mode="r", shape=(val_total_tiles, 256, 256, 5), dtype=np.uint8)
val_split_y = np.memmap(os.path.join(data_path, "val_split_y.npy"), mode="r", shape=(val_total_tiles, 256, 256), dtype=np.uint8)
test_split_x = np.memmap(os.path.join(data_path, "test_split_x.npy"), mode="r", shape=(test_total_tiles, 256, 256, 5), dtype=np.uint8)
test_split_y = np.memmap(os.path.join(data_path, "test_split_y.npy"), mode="r", shape=(test_total_tiles, 256, 256), dtype=np.uint8)


### 7. Initializing all data(256_200)

In [None]:
print("1: Training Dataset")
train_x_input, train_y_mask = initialize_saved_data(train_split_x, train_split_y, train_total_tiles)
print("\n2: Validation Dataset")
val_x_input, val_y_mask = initialize_saved_data(val_split_x, val_split_y, val_total_tiles)
print("\n3: Test Dataset")
test_x_input, test_y_mask = initialize_saved_data(test_split_x, test_split_y, test_total_tiles)

1: Training Dataset
Initializing saved data...
x_input shape: (11063, 256, 256, 5)
x_min: 0.0 x_max: 0.0

Copying saved data to x_input...
x_input shape: (11063, 256, 256, 5)
x_min: 0.0 x_max: 255.0

Initializing y_mask...
y_mask shape: (11063, 256, 256)
y_min: 0.0 y_max: 0.0

Copying saved data to y_mask...
y_mask shape: (11063, 256, 256)
y_min: 0.0 y_max: 2.0

2: Validation Dataset
Initializing saved data...
x_input shape: (3545, 256, 256, 5)
x_min: 0.0 x_max: 0.0

Copying saved data to x_input...
x_input shape: (3545, 256, 256, 5)
x_min: 0.0 x_max: 255.0

Initializing y_mask...
y_mask shape: (3545, 256, 256)
y_min: 0.0 y_max: 0.0

Copying saved data to y_mask...
y_mask shape: (3545, 256, 256)
y_min: 0.0 y_max: 2.0

3: Test Dataset
Initializing saved data...
x_input shape: (3699, 256, 256, 5)
x_min: 0.0 x_max: 0.0

Copying saved data to x_input...
x_input shape: (3699, 256, 256, 5)
x_min: 0.0 x_max: 255.0

Initializing y_mask...
y_mask shape: (3699, 256, 256)
y_min: 0.0 y_max: 0.0

C

### 8. Create all Physical masks (256_200)

In [None]:
train_pred_physical = create_physical_mask(train_x_input)
val_pred_physical = create_physical_mask(val_x_input)
test_pred_physical = create_physical_mask(test_x_input)

### 9. Encoding all data (256_200)

In [None]:
train_y_one_hot = to_categorical(train_y_mask, num_classes=3)
val_y_one_hot = to_categorical(val_y_mask, num_classes=3)
test_y_one_hot = to_categorical(test_y_mask, num_classes=3)

### 10. Calculate all Physics jaccard index (256_200)

In [None]:
train_jaccard = jaccard_coef(train_y_one_hot, train_pred_physical)
val_jaccard = jaccard_coef(val_y_one_hot, val_pred_physical)
test_jaccard = jaccard_coef(test_y_one_hot, test_pred_physical)

print("1.Traing dataset")
print(train_jaccard)
print("\n2.Validation dataset")
print(val_jaccard)
print("\n3.Test dataset")
print(test_jaccard)

# All dataset(No overlap)

### 11. Loading all data(256_256)

In [5]:
data_path = "../data_colab/256_256"
train_total_tiles = 11063
val_total_tiles = 3545
test_total_tiles = 3699

train_split_x = np.memmap(os.path.join(data_path, "train_split_x.npy"), mode="r", shape=(train_total_tiles, 256, 256, 5), dtype=np.uint8)
train_split_y = np.memmap(os.path.join(data_path, "train_split_y.npy"), mode="r", shape=(train_total_tiles, 256, 256), dtype=np.uint8)
val_split_x = np.memmap(os.path.join(data_path, "val_split_x.npy"), mode="r", shape=(val_total_tiles, 256, 256, 5), dtype=np.uint8)
val_split_y = np.memmap(os.path.join(data_path, "val_split_y.npy"), mode="r", shape=(val_total_tiles, 256, 256), dtype=np.uint8)
test_split_x = np.memmap(os.path.join(data_path, "test_split_x.npy"), mode="r", shape=(test_total_tiles, 256, 256, 5), dtype=np.uint8)
test_split_y = np.memmap(os.path.join(data_path, "test_split_y.npy"), mode="r", shape=(test_total_tiles, 256, 256), dtype=np.uint8)

### 12. Initializing all data(256_256)

In [6]:
print("1: Training Dataset")
train_x_input, train_y_mask = initialize_saved_data(train_split_x, train_split_y, train_total_tiles)
print("\n2: Validation Dataset")
val_x_input, val_y_mask = initialize_saved_data(val_split_x, val_split_y, val_total_tiles)
print("\n3: Test Dataset")
test_x_input, test_y_mask = initialize_saved_data(test_split_x, test_split_y, test_total_tiles)

1: Training Dataset
Initializing saved data...
x_input shape: (11063, 256, 256, 5)
x_min: 0.0 x_max: 0.0

Copying saved data to x_input...
x_input shape: (11063, 256, 256, 5)
x_min: 0.0 x_max: 254.0

Initializing y_mask...
y_mask shape: (11063, 256, 256)
y_min: 0.0 y_max: 0.0

Copying saved data to y_mask...
y_mask shape: (11063, 256, 256)
y_min: 0.0 y_max: 128.0

2: Validation Dataset
Initializing saved data...
x_input shape: (3545, 256, 256, 5)
x_min: 0.0 x_max: 0.0

Copying saved data to x_input...
x_input shape: (3545, 256, 256, 5)
x_min: 0.0 x_max: 254.0

Initializing y_mask...
y_mask shape: (3545, 256, 256)
y_min: 0.0 y_max: 0.0

Copying saved data to y_mask...
y_mask shape: (3545, 256, 256)
y_min: 0.0 y_max: 128.0

3: Test Dataset
Initializing saved data...
x_input shape: (3699, 256, 256, 5)
x_min: 0.0 x_max: 0.0

Copying saved data to x_input...
x_input shape: (3699, 256, 256, 5)
x_min: 0.0 x_max: 254.0

Initializing y_mask...
y_mask shape: (3699, 256, 256)
y_min: 0.0 y_max: 0.

### 13. Create all Physical masks (256_256)

In [7]:
train_pred_physical = create_physical_mask(train_x_input)
val_pred_physical = create_physical_mask(val_x_input)
test_pred_physical = create_physical_mask(test_x_input)

### 14. Encoding all data (256_256)

In [None]:
train_y_one_hot = to_categorical(train_y_mask, num_classes=3)
val_y_one_hot = to_categorical(val_y_mask, num_classes=3)
test_y_one_hot = to_categorical(test_y_mask, num_classes=3)

### 15. Calculate all Physics jaccard index (256_256)

In [None]:
train_jaccard = jaccard_coef(train_y_one_hot, train_pred_physical)
val_jaccard = jaccard_coef(val_y_one_hot, val_pred_physical)
test_jaccard = jaccard_coef(test_y_one_hot, test_pred_physical)

print("1.Traing dataset")
print(train_jaccard)
print("\n2.Validation dataset")
print(val_jaccard)
print("\n3.Test dataset")
print(test_jaccard)