# Analyze results from first run

In [None]:
from pathlib import Path
import time

import numpy as np
import rasterio
from skimage.color import rgb2gray
from tqdm.notebook import tqdm
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
import seaborn as sns

In [None]:
pred_path = Path("../data/predict/attempt3_zero/")
test_path = Path("../data/test/")

## Helper functions

In [None]:
def plot(image, title="", cmap=None, ax=None, vmin=None, vmax=None, size=6, side=False):
    """Plot an image size 6x6 with a title"""

    if not cmap:
        cmap = ListedColormap(["gray", "white", "blue"])
        vmin = -1
        vmax = 1
    if not ax:
        fig, ax = plt.subplots(1, 1, figsize=(size, size))

    ax.imshow(image, cmap=cmap, vmin=vmin, vmax=vmax)
    ax.set_title(title)
    ax.set_xticks([], [])
    ax.set_yticks([], [])
    if not side:
        plt.show()

In [None]:
def sideplot(image1, image2, title1="", title2="", cmap=None, vmin=None, vmax=None, size=6):
    """Plot two images side by side."""

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))
    plot(image1, title1, cmap=cmap, ax=ax1, vmin=vmin, vmax=vmax, size=size, side=True)
    plot(image2, title2, cmap=cmap, ax=ax2, vmin=vmin, vmax=vmax, size=size, side=True)
    plt.show()

In [None]:
def get_chips_less_than(items, lower_bound=0, upper_bound=0):
    out_list = []
    lower_bound *= 1024*1024/100  # so give in percent eg 2 for 2%
    upper_bound *= 1024*1024/100
    for it in tqdm(items):
        tif = pred_path / f"{it}.tif"
        with rasterio.open(tif) as rd:
            a = rd.read(1)
        if np.sum(a) > lower_bound and np.sum(a) <= upper_bound:
            out_list.append(it)
    return sorted(out_list)

In [None]:
def save(li, file):
    with open(file, "w") as f:
        for line in li:
            print(line, file=f)

In [None]:
def load(file):
    result = []
    with open(file) as f:
        result = f.readlines()
    result = [l.strip() for l in result]
    return result

In [None]:
def check(items, start=0, end=-1, size=3, pred=False):
    for it in items[start:end]:
        tif = test_path / f"{it}" / f"{it}.tif"
        print(it)
        with rasterio.open(tif) as rd:
            a = np.moveaxis(rd.read([1, 2, 3]), 0, -1)
        if pred:
            pred_tif = pred_path / f"{it}.tif"
            with rasterio.open(pred_tif) as rd:
                pred_a = rd.read(1)
            sideplot(a, pred_a, size=size, cmap="viridis")
        else:
            plot(a, size=size)

In [None]:
def histogram(items):
    d = []
    for it in tqdm(items):
        tif = pred_path / f"{it}.tif"
        with rasterio.open(tif) as rd:
            a = rd.read(1)
        d.append(np.sum(a) / 1024**2)
    return d

In [None]:
def get_means(items):
    result = {}
    means = {0: [], 1: [], 2: []}
    stds = {0: [], 1: [], 2: []}
    for it in tqdm(items):
        tif = test_path / it / f"{it}.tif"
        with rasterio.open(tif) as rd:
            a = rd.read([1, 2, 3])
            for band in range(3):
                means[band].append(np.mean(a[band, :, :]))
                stds[band].append(np.std(a[band, :, :]))
    means = {key: np.mean(value) for key, value in means.items()}
    stds = {key: np.mean(value) for key, value in stds.items()}
    return means, stds

## Get main list of test items

In [None]:
test_items = list([t.stem for t in test_path.iterdir() if "catalog" not in t.name])
len(test_items)

## Get list of no buildings predictions and save

In [None]:
no_buildings = get_chips_less_than(test_items, lower_bound=-1, upper_bound=0)
save(no_buildings, "nob.txt")
len(no_buildings)

## Manually check no buildings predictions

In [None]:
check(no_buildings, start=0, end=10, size=6)

## Check preds with <2% buildings?

In [None]:
no_buildings = load("nobuildings.txt")
len(no_buildings)

In [None]:
less_2pc_buildings = get_chips_less_than(test_items, lower_bound=0, upper_bound=2)
len(less_2pc_buildings)

In [None]:
check(no_buildings, start=4300, end=-1, size=3)

## Now check higher %s

In [None]:
higher_buildings = get_chips_less_than(test_items, lower_bound=20, upper_bound=40)
len(higher_buildings)

In [None]:
check(higher_buildings, size=3, start=1000, end=-1)

## Create histogram

In [None]:
dist = histogram(test_items)

In [None]:
fig, ax = plt.subplots(figsize=(12, 6))
print("Distribution of % building")
sns.distplot(dist, ax=ax)
plt.show()

In [None]:
fig, ax = plt.subplots(figsize=(12, 6))
print("Distribution of IoU 'points'")
sns.distplot(dist, hist_kws={"weights": dist}, ax=ax)
plt.show()

# Check some stats

In [None]:
means, stds = get_means(test_items)
means, stds

## Check predictions

In [None]:
check(test_items, start=0, end=50, size=6, pred=True)