In [None]:
import os
from pathlib import Path
from pydantic import BaseSettings
from matplotlib import  pyplot as plt

import numpy as np
from tqdm import tqdm

In [None]:
class StatsConfig(BaseSettings):
    class Config:
        env_file = ".env"
        env_file_encoding = "utf-8"
    data_processed: Path
    subset: str = "train"

In [None]:
cwd = Path(os.getcwd())
os.chdir(str(cwd.parent))

In [None]:
cfg = StatsConfig()
cfg

In [None]:
import cv2
from floods.datasets.flood import WeightedFloodDataset

In [None]:
mean = WeightedFloodDataset.mean()
std = WeightedFloodDataset.std()


# create train and validation sets
dataset = WeightedFloodDataset(path=cfg.data_processed,
                               subset="train",
                               include_dem=True,
                               transform_base=None)

In [None]:
def rgb_image(image: np.ndarray, weights: tuple = (2.5, 4.0, 25.0)) -> np.ndarray:
    vv, vh, both = weights
    image[:,:,0] *= vv
    image[:,:,1] *= vh
    image[:,:,2] *= image[:,:,0] * image[:,:,1] / both
    return np.clip(image, 0, 1)

In [None]:
def create_round_kernel(kernel_size: int):
        # compute center and radius, suppose symmetrical and centered
        center = kernel_size // 2
        radius = min(center, kernel_size - center)
        # compute a distance grid from the given center
        yy, xx = np.ogrid[:kernel_size, :kernel_size]
        dist_from_center = np.sqrt((xx - center)**2 + (yy - center)**2)
        # produce a binary mask
        mask = dist_from_center <= radius
        return mask.astype(np.uint8)

In [None]:
indices = list(np.random.choice(len(dataset), size=4, replace=False))

In [None]:
batches = [dataset[i] for i in indices]
for image, label, weight in batches:
    f, axes = plt.subplots(1, 3, figsize=(18, 6))
    rgb = rgb_image(image, weights=(5.0, 15.0, 20.0))
    rgb2 = cv2.medianBlur(rgb, ksize=5)
    axes[0].imshow(rgb)
    axes[1].imshow(rgb2)
    axes[2].imshow(weight, vmin=0, vmax=5)
    plt.tight_layout()
    plt.show()

In [None]:

def entropy(label: np.ndarray, ignore: int = 255) -> np.ndarray:
    valid = label.copy()
    valid[valid == ignore] = 0
    marg = np.histogramdd(valid.ravel(), bins = 2)[0]/label.size
    marg = list(filter(lambda p: p > 0, np.ravel(marg)))
    return -np.sum(np.multiply(marg, np.log2(marg)))

In [None]:
entropies = list()
for image, label, _ in tqdm(dataset):
    entropies.append(entropy(label))

entropies = np.array(entropies)

In [None]:
entropies[entropies > 1] -= 1
ranked = np.argsort(entropies)[::-1]
n = 50
for index in ranked[-1000:-950]:
    img, lab, w = dataset[index]
    f, axes = plt.subplots(1, 2, figsize=(6, 3))
    axes[0].imshow(img[:,:,0] * 5, cmap="gray")
    axes[1].imshow(lab)
    plt.tight_layout()
    plt.title(f"index: {index} - entropy: {entropies[index]:.4f}")
    plt.show()

In [None]:
def sigmoid(x):
  return 1 / (1 + np.exp(-x))

In [None]:
scaled = np.clip(entropies, 0, 1) * 0.8 + 0.2
# scaled = sigmoid(scaled)
plt.plot(np.arange(len(ranked)), scaled[ranked])

### Test bed for weights

In [None]:
morph_kernel = create_round_kernel(5)

In [None]:
import cv2
from skimage.restoration import denoise_nl_means
from skimage.morphology import opening

batches = [dataset[i] for i in indices]
for image, label in batches:
    dem = image[:,:,-1].copy()
    mask = label != 255

    dmin = dem[mask].min()
    dmax = dem[mask].max()
    img = rgb_image(image)
    denoised = denoise_nl_means(img, h = 0.1, multichannel=True)
    flooded = ((denoised[:,:,0] <= 0.1) * (denoised[:,:,1] <= 0.1)).astype(np.uint8)
    flooded = cv2.morphologyEx(flooded, cv2.MORPH_OPEN, morph_kernel)
    # flooded = opening(flooded)
    weights = flooded + label
    f, axes = plt.subplots(1, 4, figsize=(24, 6))
    axes[0].imshow(denoised)
    axes[1].imshow(flooded)
    axes[2].imshow(dem)
    axes[3].imshow(weights)
    plt.tight_layout()
    plt.show()