In [1]:
# geopython env + mxnet1.6.0 env
import numpy as np
import pandas as pd
from tqdm import tqdm
import imageio
import os
from scipy.stats.stats import pearsonr
from sklearn.metrics import matthews_corrcoef
# import higra as hg

import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
# mxnet1.6.0 env
import cv2
import mxnet as mx
from mxnet import gluon
from mxnet import autograd
from mxnet import image

import sys
sys.path.append('../../resuneta/src')
sys.path.append('../../decode/FracTAL_ResUNet/models/semanticsegmentation')
sys.path.append('../../decode/FracTAL_ResUNet/nn/loss')
sys.path.append('../../')
sys.path.append('../MXNet-ResUNeta/')

# from resunet_d6_causal_mtskcolor_ddist import *
# from resunet_d7_causal_mtskcolor_ddist import *
from FracTAL_ResUNet import FracTAL_ResUNet_cmtsk
from datasets import *

# Helper functions

In [3]:
def dice_coef(x, y):
    if type(x).__module__ == 'numpy':
        intersection = np.logical_and(x, y)
        return 2. * np.sum(intersection) / (np.sum(x) + np.sum(y))
    else:
        intersection = mx.ndarray.op.broadcast_logical_and(x, y)
        return 2. * mx.nd.sum(intersection) / (mx.nd.sum(x) + mx.nd.sum(y))

In [4]:
def InstSegm(extent, boundary, t_ext=0.4, t_bound=0.2):
    """
    INPUTS:
    extent : extent prediction
    boundary : boundary prediction
    t_ext : threshold for extent
    t_bound : threshold for boundary
    OUTPUT:
    instances
    """

    # Threshold extent mask
    ext_binary = np.uint8(extent >= t_ext)

    # Artificially create strong boundaries for
    # pixels with non-field labels
    input_hws = np.copy(boundary)
    input_hws[ext_binary == 0] = 1

    # Create the directed graph
    size = input_hws.shape[:2]
    graph = hg.get_8_adjacency_graph(size)
    edge_weights = hg.weight_graph(
        graph,
        input_hws,
        hg.WeightFunction.mean
    )

    tree, altitudes = hg.watershed_hierarchy_by_dynamics(
        graph,
        edge_weights
    )
    
    # Get individual fields
    # by cutting the graph using altitude
    instances = hg.labelisation_horizontal_cut_from_threshold(
        tree,
        altitudes,
        threshold=t_bound)
    
    instances[ext_binary == 0] = -1

    return instances

# Load model

In [5]:
# ==================== USER SETTINGS ==================== #

# === MODELS TRAINED ON MONTHS SEPARATELY === #
# === PARTIAL LABELS FRANCE === #
# model_name = 'resunet-d6_2019_10_nfilter-16_bs-8_lr-0.001_4x-downsampled_allfield_n6759/'
# model_name = 'fractal-resunet_3month-separate_nfilter-32_depth-6_bs-8_lr-0.001_1x-downsampled_allfields_n6759'
model_name = 'fractal-resunet_3month-separate_nfilter-32_depth-6_bs-8_lr-0.001_2x-3x-downsampled_allfields_n6759'
# model_name = 'fractal-resunet_3month-separate_nfilter-32_depth-6_bs-4_lr-0.001_3x-downsampled_n125_80field/'
# model_name = 'fractal-resunet_3month-separate_nfilter-32_depth-6_bs-4_lr-0.001_3x-downsampled_n1000_10field/'
# model_name = 'fractal-resunet_3month-separate_nfilter-32_depth-6_bs-4_lr-0.001_3x-downsampled_n5000_2field/'
file_name = '../experiments/partial-france/{}/model.params'.format(model_name)

# === FULL LABELS FRANCE === #
# model_name = 'fractal-resunet_3month-separate_nfilter-32_depth-6_bs-7_lr-0.001_1x-downsampled_allfields_n6759_1250px_thickness2'
# file_name = '../experiments/full-france/{}/model.params'.format(model_name)

n_filters = 32
depth = 6
n_classes = 1
n_channels = 3
batch_size = 8
codes_to_keep = [1]
boundary_kernel_size = (2,2)
ctx_name = 'gpu' # 'cpu'
gpu = 0

# model = ResUNet_d6(_nfilters_init=n_filters, 
#                    _NClasses=n_classes,
#                    hsv_outchannels=hsv_channels)

model = FracTAL_ResUNet_cmtsk(nfilters_init=n_filters, depth=depth, NClasses=n_classes)
if ctx_name == 'gpu':
    model.load_parameters(file_name, ctx=mx.gpu(gpu)) # mx.cpu())
elif ctx_name == 'cpu':
    model.load_parameters(file_name, ctx=mx.cpu())
# ======================================================= #

depth:= 0, nfilters: 32, nheads::8, widths::1
depth:= 1, nfilters: 64, nheads::16, widths::1
depth:= 2, nfilters: 128, nheads::32, widths::1
depth:= 3, nfilters: 256, nheads::64, widths::1
depth:= 4, nfilters: 512, nheads::128, widths::1
depth:= 5, nfilters: 1024, nheads::256, widths::1
depth:= 6, nfilters: 512, nheads::256, widths::1
depth:= 7, nfilters: 256, nheads::128, widths::1
depth:= 8, nfilters: 128, nheads::64, widths::1
depth:= 9, nfilters: 64, nheads::32, widths::1
depth:= 10, nfilters: 32, nheads::16, widths::1


# Run France images through model

## Planet

### 3-month separate

#### 2x+3x downsampled

In [27]:
months = ['2019_{}'.format(str(x).zfill(2)) for x in [4, 7, 10]]

splits_path = '../data/splits/sherrie10k_planetImagery_splits_20x20_4x-downsampled.csv'
splits_df = pd.read_csv(splits_path)
splits_df['image_id'] = splits_df['image_id'].astype(str).str.zfill(5)

# get all img and labels
all_img_names = []
all_label_names = []

fold = 'test' # 'val'
# img_dir = '../data/planet/france/sherrie10k/monthly_mosaics_renamed_clipped_merged/1250px/2x-3x_downsample/'
# label_dir = '../data/planet/france/sherrie10k/extent_labels/1250px/2x-3x_downsample/'
img_base_dir = '../data/planet/france/1250px/'
label_base_dir = '../data/planet/france/extent_labels/1250px/'
image_ids = splits_df['image_id'].values

for downsample in ['2x', '3x']:
    img_dir = os.path.join(img_base_dir, '{}_downsample'.format(downsample))
    label_dir = os.path.join(label_base_dir, '{}_downsample'.format(downsample))
    for month in months:
        for image_id in image_ids:
            # img_name = image_id + '-' + downsample + '_' + month + '.tif'
            # img_path = os.path.join(img_dir, month, img_name)
            # label_path = os.path.join(label_dir, image_id + '-' + downsample + '.png')
            img_name = image_id + '_' + month + '.tif'
            img_path = os.path.join(img_dir, img_name)
            label_path = os.path.join(label_dir, image_id + '.png')
            if os.path.exists(img_path) and os.path.exists(label_path):
                all_img_names.append(img_path)
                all_label_names.append(label_path)

# split imgs and labels into train/val/test
all_images = pd.DataFrame({'img_path': all_img_names})
all_images['image_id'] = all_images['img_path'].str.split('/').apply(
    lambda x: x[-1]).str.split('.').apply(
    # lambda x: x[0]).str.split('-').apply(
    lambda x: x[0]).str.split('_').apply(
    lambda x: x[0])
all_images = all_images.merge(splits_df[['image_id', 'fold']], on='image_id', how='left')
test_names = all_images[all_images['fold'] == fold]['img_path'].values

all_labels = pd.DataFrame({'label_path': all_label_names})
all_labels['image_id'] = all_labels['label_path'].str.split('/').apply(
    # lambda x: x[-1]).str.split('-').apply(
    lambda x: x[-1]).str.split('.').apply(
    lambda x: x[0])
all_labels = all_labels.merge(splits_df[['image_id', 'fold']], on='image_id', how='left')
test_names_label = all_labels[all_labels['fold'] == fold]['label_path'].values

test_dataset = AirbusMasked(
    fold=fold, 
    image_names=test_names, 
    label_names=test_names_label, 
    classes=codes_to_keep,
    boundary_kernel_size=boundary_kernel_size,
    random_crop=False
)

test_dataloader = gluon.data.DataLoader(test_dataset, batch_size=batch_size)

In [28]:
for month in months:
    # save_dir = '../results/partial-france/{}/{}'.format(model_name, month)
    save_dir = '../results/partial-france/{}'.format(model_name)
    
    if not os.path.exists(os.path.join(save_dir, 'extent_predictions')):
        os.makedirs(os.path.join(save_dir, 'boundary_predictions'))
        os.makedirs(os.path.join(save_dir, 'extent_predictions'))

# initialize metrics
for batch_i, (img, extent, boundary, distance, hsv, mask) in enumerate(test_dataloader):

    if batch_i % 10 == 0:
        print("    batch {}".format(batch_i))

    if ctx_name == 'gpu':
        img = img.as_in_context(mx.gpu(gpu))
    elif ctx_name == 'cpu':
        img = img.as_in_context(mx.cpu())
    logits, bound, _ = model(img)
    logits = logits.detach().asnumpy()
    bound = bound.detach().asnumpy()

    for i in range(logits.shape[0]):
#         print("    ", i)

        # save predictions
        save_month = test_names[batch_i * batch_size + i].split('/')[-2]
#         print("save_month", save_month)
#         save_dir = os.path.join('../results/partial-france/{}'.format(model_name), save_month)
#         print("save_dir", save_dir)
        image_id = test_names[batch_i * batch_size + i].split('/')[-1][:-4]
        downsample = test_names[batch_i * batch_size + i].split('/')[-2][:2]
#         print("image_id", image_id)
        np.save(os.path.join(save_dir, 'boundary_predictions/{}-{}.npy'.format(image_id, downsample)), bound[i][0])
#         print("here")
        np.save(os.path.join(save_dir, 'extent_predictions/{}-{}.npy'.format(image_id, downsample)), logits[i][0])

    batch 0
    batch 10
    batch 20
    batch 30
    batch 40
    batch 50
    batch 60
    batch 70
    batch 80
    batch 90
    batch 100
    batch 110
    batch 120
    batch 130
    batch 140
    batch 150
    batch 160
    batch 170
    batch 180
    batch 190
    batch 200
    batch 210
    batch 220
    batch 230
    batch 240
    batch 250
    batch 260
    batch 270
    batch 280
    batch 290
    batch 300
    batch 310
    batch 320
    batch 330
    batch 340
    batch 350
    batch 360
    batch 370
    batch 380
    batch 390
    batch 400
    batch 410
    batch 420
    batch 430
    batch 440
    batch 450
    batch 460
    batch 470
    batch 480
    batch 490
    batch 500
    batch 510
    batch 520
    batch 530
    batch 540
    batch 550
    batch 560
    batch 570
    batch 580
    batch 590
    batch 600
    batch 610
    batch 620
    batch 630
    batch 640
    batch 650
    batch 660
    batch 670
    batch 680
    batch 690
    batch 700
    batch 710
   

#### 1x original resolution

In [6]:
months = ['2019_{}'.format(str(x).zfill(2)) for x in [4, 7, 10]]

splits_path = '../data/splits/sherrie10k_planetImagery_splits_20x20_4x-downsampled.csv'
splits_df = pd.read_csv(splits_path)
splits_df['image_id'] = splits_df['image_id'].astype(str).str.zfill(5)

# get all img and labels
all_img_names = []
all_label_names = []

fold = 'test'
img_dir = '../data/planet/france/sherrie10k/monthly_mosaics_renamed_clipped_merged/300px/'
label_dir = '../data/planet/france/sherrie10k/extent_labels/300px/'
image_ids = splits_df['image_id'].values

for month in months:
    for image_id in image_ids:
        img_name = image_id + '_' + month + '.tif'
        img_path = os.path.join(img_dir, month, img_name)
        label_path = os.path.join(label_dir, image_id + '.png')
        if os.path.exists(img_path) and os.path.exists(label_path):
            all_img_names.append(img_path)
            all_label_names.append(label_path)

# split imgs and labels into train/val/test
all_images = pd.DataFrame({'img_path': all_img_names})
all_images['image_id'] = all_images['img_path'].str.split('/').apply(
    lambda x: x[-1]).str.split('.').apply(
    lambda x: x[0]).str.split('_').apply(
    lambda x: x[0])
all_images = all_images.merge(splits_df[['image_id', 'fold']], on='image_id', how='left')
test_names = all_images[all_images['fold'] == fold]['img_path'].values

all_labels = pd.DataFrame({'label_path': all_label_names})
all_labels['image_id'] = all_labels['label_path'].str.split('/').apply(
    lambda x: x[-1]).str.split('.').apply(
    lambda x: x[0])
all_labels = all_labels.merge(splits_df[['image_id', 'fold']], on='image_id', how='left')
test_names_label = all_labels[all_labels['fold'] == fold]['label_path'].values

test_dataset = AirbusMasked(
    fold=fold, 
    image_names=test_names, 
    label_names=test_names_label, 
    classes=codes_to_keep,
    boundary_kernel_size=boundary_kernel_size,
    random_crop=False)

test_dataloader = gluon.data.DataLoader(test_dataset, batch_size=batch_size)

In [7]:
for month in months:
    save_dir = '../results/partial-france/{}/{}'.format(model_name, month)
    
    if not os.path.exists(save_dir):
        os.makedirs(os.path.join(save_dir, 'boundary_predictions'))
        os.makedirs(os.path.join(save_dir, 'extent_predictions'))

# initialize metrics
for batch_i, (img, extent, boundary, distance, hsv, mask) in enumerate(test_dataloader):

    if batch_i % 10 == 0:
        print("    batch {}".format(batch_i))

    if ctx_name == 'gpu':
        img = img.as_in_context(mx.gpu(gpu))
    elif ctx_name == 'cpu':
        img = img.as_in_context(mx.cpu())
    logits, bound, _ = model(img)
    logits = logits.detach().asnumpy()
    bound = bound.detach().asnumpy()

    for i in range(logits.shape[0]):
#         print("    ", i)

        # save predictions
        save_month = test_names[batch_i * batch_size + i].split('/')[-2]
#         print("save_month", save_month)
        save_dir = os.path.join('../results/partial-france/{}'.format(model_name), save_month)
#         print("save_dir", save_dir)
        image_id = test_names[batch_i * batch_size + i].split('/')[-1][:-4]
#         print("image_id", image_id)
        np.save(os.path.join(save_dir, 'boundary_predictions/{}.npy'.format(image_id)), bound[i][0])
#         print("here")
        np.save(os.path.join(save_dir, 'extent_predictions/{}.npy'.format(image_id)), logits[i][0])

    batch 0
    batch 10
    batch 20
    batch 30
    batch 40
    batch 50
    batch 60
    batch 70
    batch 80
    batch 90
    batch 100
    batch 110
    batch 120
    batch 130
    batch 140
    batch 150
    batch 160
    batch 170
    batch 180
    batch 190
    batch 200
    batch 210
    batch 220
    batch 230
    batch 240
    batch 250
    batch 260
    batch 270
    batch 280
    batch 290
    batch 300
    batch 310
    batch 320
    batch 330
    batch 340
    batch 350
    batch 360
    batch 370
    batch 380
    batch 390
    batch 400
    batch 410
    batch 420
    batch 430
    batch 440
    batch 450
    batch 460
    batch 470
    batch 480
    batch 490
    batch 500
    batch 510
    batch 520
    batch 530
    batch 540


### 3-month stacked

In [24]:
months = ['2020_10', '2020_12', '2021_02']
image_directories = ['../data/planet/india/GeneralBlockchain/monthly_mosaics_renamed_clipped_merged_test/' 
                     + month for month in months]
label_dir = '../data/planet/india/GeneralBlockchain/extent_labels_test/'
image_suffixes = ['_' + x + '.tif' for x in months]

splits_path = '../data/splits/india_planetImagery_splits_20x20_v2.csv'
splits_df = pd.read_csv(splits_path)
splits_df['image_id'] = splits_df['image_id'].astype(str).str.zfill(4)

# get all img and labels
all_img_names = []
all_label_names = []
label_folder_imgs = sorted(os.listdir(os.path.join(label_dir)))
for label_name in label_folder_imgs:
    img_name = label_name.split('.')[0]
    all_img_names.append(img_name)
    label_path = os.path.join(label_dir, label_name)
    all_label_names.append(label_path)

# split imgs and labels into train/val/test
all_images = pd.DataFrame({'image_id_new': all_img_names})
all_images['image_id'] = all_images['image_id_new'].str[:5]
all_images = all_images.merge(splits_df[['image_id', 'fold']], on='image_id', how='left')
test_names = all_images[all_images['fold'] == 'test']['image_id_new'].values

all_labels = pd.DataFrame({'label_path': all_label_names})
all_labels['image_id_new'] = all_labels['label_path'].str.split('/').apply(
    lambda x: x[-1]).str.split('.').apply(
    lambda x: x[0])
all_labels['image_id'] = all_labels['image_id_new'].str[:5]
all_labels = all_labels.merge(splits_df[['image_id', 'fold']], on='image_id', how='left')
test_names_label = all_labels[all_labels['fold'] == 'test']['label_path'].values

test_dataset = MultitempMasked(
        image_directories=image_directories,
        fold='test', 
        image_names=test_names, 
        label_names=test_names_label, 
        classes=codes_to_keep,
        boundary_kernel_size=boundary_kernel_size,
        n_channels=n_channels,
        image_suffixes=image_suffixes)

test_dataloader = gluon.data.DataLoader(test_dataset, batch_size=batch_size)

In [25]:
for month in months:
    save_dir = '../results/india/GeneralBlockchain/{}'.format(model_name)

    if not os.path.exists(os.path.join(save_dir, 'extent_predictions')) or \
    not os.path.exists(os.path.join(save_dir, 'boundary_predictions')):
        os.makedirs(os.path.join(save_dir, 'boundary_predictions'))
        os.makedirs(os.path.join(save_dir, 'extent_predictions'))

# initialize metrics
for batch_i, (img, extent, boundary, distance, hsv, mask) in enumerate(test_dataloader):

    if batch_i % 10 == 0:
        print("    batch {}".format(batch_i))

    if ctx_name == 'gpu':
        img = img.as_in_context(mx.gpu(gpu))
    elif ctx_name == 'cpu':
        img = img.as_in_context(mx.cpu())
    logits, bound, _ = model(img)
    logits = logits.detach().asnumpy()
    bound = bound.detach().asnumpy()

    for i in range(logits.shape[0]):
#         print("    ", i)

        # save predictions
#         print("save_month", save_month)
        save_dir = '../results/india/GeneralBlockchain/{}'.format(model_name)
#         print("save_dir", save_dir)
        image_id = test_names[batch_i * batch_size + i]
#         print("image_id", image_id)
        np.save(os.path.join(save_dir, 'boundary_predictions/{}.npy'.format(image_id)), bound[i][0])
#         print("here")
        np.save(os.path.join(save_dir, 'extent_predictions/{}.npy'.format(image_id)), logits[i][0])

    batch 0
    batch 10
    batch 20
    batch 30
    batch 40
    batch 50


## Partial label experiments

In [6]:
months = ['2019_{}'.format(str(x).zfill(2)) for x in [4, 7, 10]]

splits_path = '../data/splits/sherrie10k_planetImagery_splits_20x20_2field.csv'
splits_df = pd.read_csv(splits_path)
splits_df['image_id'] = splits_df['image_id'].astype(str).str.zfill(5)

# get all img and labels
all_img_names = []
all_label_names = []

fold = 'test'
img_dir = '../data/planet/france/sherrie10k/monthly_mosaics_renamed_clipped_merged/1250px/3x_downsample/'
label_dir = '../data/planet/france/sherrie10k/extent_labels/1250px/3x_downsample/'
image_ids = splits_df['image_id'].values

for month in months:
    for image_id in image_ids:
        img_name = image_id + '_' + month + '.tif'
        img_path = os.path.join(img_dir, month, img_name)
        label_path = os.path.join(label_dir, image_id + '.png')
        if os.path.exists(img_path) and os.path.exists(label_path):
            all_img_names.append(img_path)
            all_label_names.append(label_path)

# split imgs and labels into train/val/test
all_images = pd.DataFrame({'img_path': all_img_names})
all_images['image_id'] = all_images['img_path'].str.split('/').apply(
    lambda x: x[-1]).str.split('.').apply(
    lambda x: x[0]).str.split('_').apply(
    lambda x: x[0])
all_images = all_images.merge(splits_df[['image_id', 'fold']], on='image_id', how='left')
test_names = all_images[all_images['fold'] == fold]['img_path'].values

all_labels = pd.DataFrame({'label_path': all_label_names})
all_labels['image_id'] = all_labels['label_path'].str.split('/').apply(
    lambda x: x[-1]).str.split('.').apply(
    lambda x: x[0])
all_labels = all_labels.merge(splits_df[['image_id', 'fold']], on='image_id', how='left')
test_names_label = all_labels[all_labels['fold'] == fold]['label_path'].values

test_dataset = AirbusMasked(
    fold=fold, 
    image_names=test_names, 
    label_names=test_names_label, 
    classes=codes_to_keep,
    boundary_kernel_size=boundary_kernel_size)

test_dataloader = gluon.data.DataLoader(test_dataset, batch_size=batch_size)

In [7]:
for month in months:
    save_dir = '../results/partial-france/{}/{}'.format(model_name, month)
    
    if not os.path.exists(save_dir):
        os.makedirs(os.path.join(save_dir, 'boundary_predictions'))
        os.makedirs(os.path.join(save_dir, 'extent_predictions'))

# initialize metrics
for batch_i, (img, extent, boundary, distance, hsv, mask) in enumerate(test_dataloader):

    if batch_i % 10 == 0:
        print("    batch {}".format(batch_i))

    if ctx_name == 'gpu':
        img = img.as_in_context(mx.gpu(gpu))
    elif ctx_name == 'cpu':
        img = img.as_in_context(mx.cpu())
    logits, bound, _ = model(img)
    logits = logits.detach().asnumpy()
    bound = bound.detach().asnumpy()

    for i in range(logits.shape[0]):
#         print("    ", i)

        # save predictions
        save_month = test_names[batch_i * batch_size + i].split('/')[-2]
#         print("save_month", save_month)
        save_dir = os.path.join('../results/partial-france/{}'.format(model_name), save_month)
#         print("save_dir", save_dir)
        image_id = test_names[batch_i * batch_size + i].split('/')[-1][:-4]
#         print("image_id", image_id)
        np.save(os.path.join(save_dir, 'boundary_predictions/{}.npy'.format(image_id)), bound[i][0])
#         print("here")
        np.save(os.path.join(save_dir, 'extent_predictions/{}.npy'.format(image_id)), logits[i][0])

    batch 0
    batch 10
    batch 20
    batch 30
    batch 40
    batch 50
    batch 60
    batch 70
    batch 80
    batch 90
    batch 100
    batch 110
    batch 120
    batch 130
    batch 140
    batch 150
    batch 320
    batch 330
    batch 340
    batch 350
    batch 360
    batch 370
    batch 380
    batch 390
    batch 400
    batch 410
    batch 420
    batch 430
    batch 440
    batch 450
    batch 460
    batch 470
    batch 480
    batch 490
    batch 500
    batch 510
    batch 520
    batch 530
    batch 540
    batch 550
    batch 560
    batch 570
    batch 580
    batch 590
    batch 600
    batch 610
    batch 620
    batch 630
    batch 640
    batch 650
    batch 660
    batch 670
    batch 680
    batch 690
    batch 700
    batch 710
    batch 720
    batch 730
    batch 740
