In [1]:
%matplotlib inline

import cv2
import json
import matplotlib.pyplot as plt
import numpy as np
import os

In [2]:
IMAGE_EXTENSION = '.jp2'
JSON_EXTENSION = '.json'

def loadImage(base_path):
    print(f'loading {base_path}')
    img = cv2.imread(base_path + IMAGE_EXTENSION, cv2.IMREAD_COLOR)
    mask = np.zeros(img.shape[:-1], np.uint8)
    if os.path.exists(base_path + JSON_EXTENSION):
        with open(base_path + JSON_EXTENSION) as f:
            labels = json.load(f)
        shapes = labels['shapes']
        for shape in shapes:
            points = np.array([shape['points']]).astype('int32')
            cv2.fillPoly(mask, points, 255)
    return img, mask

def loadData(directory):
    file_names = [os.path.splitext(n)[0] for _, _, names in os.walk(directory) for n in names if n.endswith(".jp2")]
    images = []
    actuals = []
    for f in file_names:
        img, mask = loadImage(os.path.join(directory, f))
        images.append(img)
        actuals.append(mask)
    return file_names, np.array(images), np.array(actuals)

names, inputs, actuals = loadData('/home/dennis/projects/wcc/images')

loading /home/dennis/projects/wcc/images/BQ31_500_042066
loading /home/dennis/projects/wcc/images/BQ31_500_042067
loading /home/dennis/projects/wcc/images/BQ31_500_042068
loading /home/dennis/projects/wcc/images/BQ31_500_041064
loading /home/dennis/projects/wcc/images/BQ31_500_041065
loading /home/dennis/projects/wcc/images/BQ31_500_041066
loading /home/dennis/projects/wcc/images/BQ31_500_043065
loading /home/dennis/projects/wcc/images/BQ31_500_044065
loading /home/dennis/projects/wcc/images/BQ31_500_041067
loading /home/dennis/projects/wcc/images/BQ31_500_041068
loading /home/dennis/projects/wcc/images/BQ31_500_042064
loading /home/dennis/projects/wcc/images/BQ31_500_042065
loading /home/dennis/projects/wcc/images/BQ31_500_043068
loading /home/dennis/projects/wcc/images/BQ31_500_043066
loading /home/dennis/projects/wcc/images/BQ31_500_043067
loading /home/dennis/projects/wcc/images/BQ31_500_044066


In [21]:
counts = (actuals != 0).sum(axis=(1,2))
for name, image, actual, count in zip(names, inputs, actuals, counts):
    print(f'{name} image {image.shape} actual {actual.shape} with count {count}')

BQ31_500_042066 image (4800, 3200, 3) actual (4800, 3200) with count 4008
BQ31_500_042067 image (4800, 3200, 3) actual (4800, 3200) with count 5744
BQ31_500_042068 image (4800, 3200, 3) actual (4800, 3200) with count 0
BQ31_500_041064 image (4800, 3200, 3) actual (4800, 3200) with count 0
BQ31_500_041065 image (4800, 3200, 3) actual (4800, 3200) with count 13517
BQ31_500_041066 image (4800, 3200, 3) actual (4800, 3200) with count 0
BQ31_500_043065 image (4800, 3200, 3) actual (4800, 3200) with count 0
BQ31_500_044065 image (4800, 3200, 3) actual (4800, 3200) with count 8466
BQ31_500_041067 image (4800, 3200, 3) actual (4800, 3200) with count 0
BQ31_500_041068 image (4800, 3200, 3) actual (4800, 3200) with count 14868
BQ31_500_042064 image (4800, 3200, 3) actual (4800, 3200) with count 12797
BQ31_500_042065 image (4800, 3200, 3) actual (4800, 3200) with count 4707
BQ31_500_043068 image (4800, 3200, 3) actual (4800, 3200) with count 0
BQ31_500_043066 image (4800, 3200, 3) actual (4800, 3

In [26]:
# select test images with at least some positives, not the highest number of positives
sorted_indexes = np.argsort(counts)
print(sorted_indexes)
test_candidates = sorted_indexes[sum(counts == 0)-1:-2]
test_candidates

[ 2  3  5  6  8 12 14 15  0 11  1  7 13 10  4  9]


array([15,  0, 11,  1,  7, 13, 10])

In [39]:
test_indexes = np.random.choice(test_candidates, 2, replace=False)
train_indexes = [i for i in sorted_indexes if i not in test_indexes]
print(test_indexes)
print(train_indexes)
test_inputs = inputs[test_indexes]
test_actuals = actuals[test_indexes]
train_inputs = inputs[train_indexes]
train_actuals = actuals[train_indexes]
print(test_inputs.shape)

[ 1 10]
[2, 3, 5, 6, 8, 12, 14, 15, 0, 11, 7, 13, 4, 9]
(2, 4800, 3200, 3)


In [65]:
SAMPLE_SIZE = 1600

def split_samples(images, actuals):
    rcount = images.shape[1] // SAMPLE_SIZE
    ccount = images.shape[2] // SAMPLE_SIZE
    split_images = []
    split_actuals = []
    for image, actual in zip(images, actuals):
        for row in range(0, image.shape[0], SAMPLE_SIZE):
            for col in range(0, image.shape[1], SAMPLE_SIZE):
                split_images.append(image[row:row+SAMPLE_SIZE, col:col+SAMPLE_SIZE])
                split_actuals.append(actual[row:row+SAMPLE_SIZE, col:col+SAMPLE_SIZE])
    return np.array(split_images), np.array(split_actuals)
    
test_samples, test_expects = split_samples(test_inputs, test_actuals)
train_samples, train_expects = split_samples(train_inputs, train_actuals)

(12, 1600, 1600, 3)

In [51]:
onein = test_inputs[0]
print(onein.shape)
splits = np.array([np.vsplit(onein, 3)])
print(splits.shape)

(4800, 3200, 3)


AttributeError: 'list' object has no attribute 'shape'

In [53]:
np.reshape(onein, (6, 1600, 1600, 3)).shape

(6, 1600, 1600, 3)

In [54]:
splits[0].shape

(1600, 3200, 3)

In [56]:
samples = []
for split in splits:
    samples += np.hsplit(split, 2)
np.array(samples).shape

(6, 1600, 1600, 3)

In [60]:
matches = (np.reshape(onein, (6, 1600, 1600, 3)) == samples)
matches

array([[[[ True,  True,  True],
         [ True,  True,  True],
         [ True,  True,  True],
         ...,
         [ True,  True,  True],
         [ True,  True,  True],
         [ True,  True,  True]],

        [[False, False, False],
         [False, False, False],
         [ True, False, False],
         ...,
         [False, False, False],
         [False, False, False],
         [False, False, False]],

        [[False, False, False],
         [False, False,  True],
         [ True,  True, False],
         ...,
         [False, False, False],
         [False, False, False],
         [False, False, False]],

        ...,

        [[False, False, False],
         [False, False, False],
         [False, False, False],
         ...,
         [False, False, False],
         [False, False, False],
         [False, False, False]],

        [[False, False, False],
         [False, False, False],
         [False, False, False],
         ...,
         [False, False, False],
         [Fa

In [61]:
reshaped = np.reshape(onein, (6, 1600, 1600, 3))
(reshaped[0] == samples[0]).all()

False