In [None]:
import os
from collections import Counter
import numpy as np
import cv2
from tqdm import tqdm_notebook

ij_pairs_3x3 = ((0, 0), (0, 1), (0, 2),
                (1, 0), (1, 1), (1, 2),
                (2, 0), (2, 1), (2, 2))
sz = 256
ship_dir = "data/input/"
train_768_dir = os.path.join("data", "train_768")
train_256_dir = os.path.join(ship_dir, 'train_256')
os.makedirs(train_256_dir, exist_ok=True)

img_ids = os.listdir(train_768_dir)
len(img_ids)

In [None]:
def get_tile(img, i, j, sz=256):
    return img[i * sz:(i + 1) * sz, j * sz:(j + 1) * sz, :]

## Create the image tiles

In [None]:
for img_id in tqdm_notebook(img_ids):
    img = cv2.imread(os.path.join(train_768_dir, img_id))
    filebase, fileext = img_id.split('.')
    for idx, (i, j) in enumerate(ij_pairs_3x3):
        tile_id = f'{filebase}_{idx}.{fileext}'
        tile = get_tile(img, i, j)
        cv2.imwrite(os.path.join(train_256_dir, tile_id), tile)

## Check that tiles are exact copies of parent (sanity check on jpg compression)

In [None]:
def fuzzy_diff(tile1, tile2):
    maxab = np.max(np.stack([tile1, tile2]), axis=0)
    a = maxab - tile2
    b = maxab - tile1
    ab = a + b
    return np.sum(ab)

ii = 0
img_matches = Counter()
tile_matches = Counter()
diff_counts = Counter()
for img_id in tqdm_notebook(img_ids):
    ii += 1
    img = cv2.imread(os.path.join(train_768_dir, img_id))
    filebase, fileext = img_id.split('.')
    n_matches = 0
    for idx, (i, j) in enumerate(ij_pairs_3x3):
        tile_id = f'{filebase}_{idx}.{fileext}'
        tile = cv2.imread(os.path.join(train_256_dir, tile_id))
        if np.all(tile == get_tile(img, i, j)):
            tile_matches[idx] += 1
            n_matches += 1
        else:
            diff = fuzzy_diff(tile, get_tile(img, i, j))
            diff_counts[diff // 1000] += 1
                
    img_matches[n_matches] += 1

    if ii % 1000 == 0:
        print(f'{ii:>6} {img_matches}')
        print(f'{len(diff_counts):>6} {diff_counts}')