In [None]:
import os
import shutil
import hashlib
from collections import Counter
from collections import namedtuple

import numpy as np
import matplotlib.pyplot as plt
import cv2
from IPython.display import set_matplotlib_formats
from skimage.transform import resize
from sdcdup.utils import get_project_root
from sdcdup.utils import generate_boundingbox_corners
from sdcdup.utils import overlap_tag_pairs
from sdcdup.utils import overlap_tag_maps
from sdcdup.utils import get_tile
from sdcdup.utils import load_duplicate_truth
from sdcdup.utils import ImgMod
from sdcdup.features import SDCImageContainer
from sdcdup.visualization import get_ticks
from sdcdup.visualization import draw_bbox
from sdcdup.visualization import draw_tile_number
from sdcdup.visualization import draw_tile_bbox
from sdcdup.visualization import show_image_pair
from sdcdup.visualization import ChannelShift

%load_ext dotenv
%dotenv
%matplotlib inline
%reload_ext autoreload
%autoreload 2

RED = (244, 67, 54)  #F44336
GREEN = (76, 175, 80)  #4CAF50
BLUE = (3, 169, 244)  #03A9F4

set_matplotlib_formats('pdf', 'png')
plt.rcParams['font.size'] = 16
plt.rcParams['axes.titlesize'] = 20
plt.rcParams['axes.labelsize'] = 18
plt.rcParams['xtick.labelsize'] = 14
plt.rcParams['ytick.labelsize'] = 14
plt.rcParams['legend.fontsize'] = 14

plt.rcParams['savefig.dpi'] = 75

plt.rcParams['figure.autolayout'] = False
plt.rcParams['figure.figsize'] = 10, 6
plt.rcParams['lines.linewidth'] = 2.0
plt.rcParams['lines.markersize'] = 8

# plt.rcParams['text.usetex'] = True
# plt.rcParams['font.family'] = "serif"
# plt.rcParams['font.serif'] = "cm"
# plt.rcParams['text.latex.preamble'] = r"\usepackage{subdepth}, \usepackage{type1cm}"

project_root = get_project_root()
train_image_dir = os.path.join(project_root, os.getenv('RAW_DATA_DIR'), 'train_768')
persistent_data_dir = os.path.join(project_root, os.getenv('PERSISTENT_DATA_DIR'))
samples_dir = os.path.join(persistent_data_dir, 'samples')
figures_dir = os.path.join(project_root, 'notebooks', 'figures')

boundingbox_corners = generate_boundingbox_corners()
ticks = get_ticks()

In [None]:
def copy2persist(img_id):
    src = os.path.join(train_image_dir, img_id)
    dst = os.path.join(samples_dir, img_id)
    if not os.path.exists(dst):
        shutil.copy2(src, dst)

def get_overlap_pixel_scores(img1_id, img2_id, img1_overlap_tag):
    scores = None
    if (img1_id, img2_id, img1_overlap_tag) in overlap_image_maps:
        scores = overlap_image_maps[(img1_id, img2_id, img1_overlap_tag)]
    if scores is None:
        Scores = namedtuple('Scores', 'px0 pix')
        px0 = sdcic.gen_px0_scores(img1_id, img2_id, img1_overlap_tag)
        pix = sdcic.gen_pix_scores(img1_id, img2_id, img1_overlap_tag)
        scores = Scores(px0, pix)
    return scores

def plot_image_grid(img_ids, ncols, nrows, filebase=None):
    fig, ax = plt.subplots(nrows, ncols, figsize=(ncols*4, nrows*4))
    for i, img_id in enumerate(img_ids):
        img = cv2.cvtColor(cv2.imread(os.path.join(train_image_dir, img_id)), cv2.COLOR_BGR2RGB)
        ax[i // ncols, i % ncols].imshow(img)
        ax[i // ncols, i % ncols].set_title(img_id)
        ax[i // ncols, i % ncols].set_xticks(ticks)
        ax[i // ncols, i % ncols].set_yticks(ticks)
    plt.tight_layout()
    
    if filebase:
        filename = f'{filebase}_{ncols}_{nrows}.jpg'
        plt.savefig(os.path.join(figures_dir, filename))

In [None]:
matches_files = ['matches_bmh96_0.9.csv']
sdcic = SDCImageContainer()
overlap_image_maps = sdcic.load_image_overlap_properties(matches_files, score_types=['px0', 'pix'])
print(len(overlap_image_maps))

In [None]:
dup_truth = load_duplicate_truth(filepath=persistent_data_dir, filename='duplicate_truth.txt')
print(len(dup_truth))

In [None]:
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 5
line_type = 8
bbox_thickness = 16
img_id = '21d7ea9bf.jpg'

img_sizes = {
    '00': (1280, 1280, 3),
    '01': (1280, 1024, 3),
    '02': (1280,  768, 3),
    '12': (1280, 1024, 3),
    '22': (1280, 1280, 3),
    '03': (1024, 1280, 3),
    '04': (1024, 1024, 3),
    '05': (1024,  768, 3),
    '15': (1024, 1024, 3),
    '25': (1024, 1280, 3),
    '06': ( 768, 1280, 3),
    '07': ( 768, 1024, 3),
    '08': ( 768,  768, 3),
    '18': ( 768, 1024, 3),
    '28': ( 768, 1280, 3),
    '36': (1024, 1280, 3),
    '37': (1024, 1024, 3),
    '38': (1024,  768, 3),
    '48': (1024, 1024, 3),
    '58': (1024, 1280, 3),
    '66': (1280, 1280, 3),
    '67': (1280, 1024, 3),
    '68': (1280,  768, 3),
    '78': (1280, 1024, 3),
    '88': (1280, 1280, 3),
}
nrows = 5
ncols = 5
fig, ax = plt.subplots(nrows, ncols, figsize=(16, 16))
for ii, overlap_img1_tag in enumerate(img_sizes):
    overlap_img2_tag = overlap_tag_pairs[overlap_img1_tag]
    img_size = img_sizes[overlap_img1_tag]
    bbox1a = boundingbox_corners[overlap_img1_tag]
    bbox2a = boundingbox_corners[overlap_img2_tag]
    bbox1 = np.array([bbox2a[0], bbox2a[0] + np.array([768, 768])])
    bbox2 = np.array([bbox1a[0], bbox1a[0] + np.array([768, 768])])

    shaded_overlap = (slice(img_size[0] - 768, 768), slice(img_size[1] - 768, 767))
    
    img = cv2.cvtColor(cv2.imread(os.path.join(samples_dir, img_id)), cv2.COLOR_BGR2RGB) / 255.
    img = resize(img, img_size)
    img[:] = 255
    img[shaded_overlap] = 196
    img = img.astype('uint8')
    
    draw_bbox(img, bbox1, bbox_thickness, GREEN, img_size=np.array([img_size[1], img_size[0]]))
    draw_bbox(img, bbox2, bbox_thickness, RED, img_size=np.array([img_size[1], img_size[0]]))
    
    n_overlaps = len(overlap_tag_maps[overlap_img1_tag])
    cv2.putText(img, str(n_overlaps), ((img_size[1] // 2) - 64, (img_size[0] // 2) + 64), font, font_scale, (0, 0, 0), 12)
    
    ax[ii // ncols, ii % ncols].imshow(img)
    ax[ii // ncols, ii % ncols].set_axis_off()
    
plt.tight_layout();
# plt.savefig(os.path.join(figures_dir, f'overlap-5x5.jpg'))

In [None]:
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 5
line_type = 8
bbox_thickness = 8
tile_indexes = [0, 1, 2, 3, 4, 5, 6, 7, 8]
img_id = '21d7ea9bf.jpg'

fig, ax = plt.subplots(1, 1, figsize=(4, 4))

img = cv2.cvtColor(cv2.imread(os.path.join(samples_dir, img_id)), cv2.COLOR_BGR2RGB)

for idx in tile_indexes:
    draw_tile_bbox(img, idx, bbox_thickness, RED)
    draw_tile_number(img, idx, color=RED)
    
ax.imshow(img)
ax.set_title(img_id)
ax.set_xticks(ticks)
ax.set_yticks(ticks);

# plt.savefig(os.path.join(figures_dir, f'overlap_key.jpg'))

## Check to see how many exact duplicate tiles we have.

In [None]:
dup_tile_hashes = {}
dup_tile_counts = Counter()
dup_image_samples = []
for img_id, tile_hashes in sdcic.img_metrics['md5'].items():
    c0 = Counter(tile_hashes)
    for tile_hash, c in c0.items():
        if c == 1:
            continue
        tile_indexes = np.where(tile_hashes == tile_hash)[0]
        
        if tile_hash not in dup_tile_hashes:
            dup_image_samples.append((img_id, tile_hash, tile_indexes))
            dup_tile_hashes[tile_hash] = {}

        for idx in tile_indexes:
            if img_id not in dup_tile_hashes[tile_hash]:
                dup_tile_hashes[tile_hash][img_id] = []
            if idx not in dup_tile_hashes[tile_hash][img_id]:
                dup_tile_hashes[tile_hash][img_id].append(idx)
                dup_tile_counts[tile_hash] += 1

print(dup_tile_counts)

dup_hashes = tuple([key for key in dup_tile_hashes])

for img_id, tile_hashes in sdcic.img_metrics['md5'].items():
    c0 = Counter(tile_hashes)
    for tile_hash, c in c0.items():
        if c > 1:
            continue
        if tile_hash not in dup_hashes:
            continue
        tile_indexes = np.where(tile_hashes == tile_hash)[0]
        
        for idx in tile_indexes:
            if img_id not in dup_tile_hashes[tile_hash]:
                dup_tile_hashes[tile_hash][img_id] = []
            if idx not in dup_tile_hashes[tile_hash][img_id]:
                dup_tile_hashes[tile_hash][img_id].append(idx)
                dup_tile_counts[tile_hash] += 1
                
print(dup_tile_counts)

dup_image_counts = {key: len(val) for key, val in dup_tile_hashes.items()}
print(dup_image_counts)

bbox_thickness = 8
ncols = 3
nrows = 2
assert ncols * nrows == len(dup_image_samples)

fig, ax = plt.subplots(nrows, ncols, figsize=(15, 10))

for i, (img_id, tile_hash, tile_indexes) in enumerate(dup_image_samples):
    copy2persist(img_id)
    img = cv2.cvtColor(cv2.imread(os.path.join(samples_dir, img_id)), cv2.COLOR_BGR2RGB)
    
    title = f'{img_id} | {tile_hash}'
    
    for ii, idx in enumerate(tile_indexes):
        if ii == 0:
            print(img_id, tile_hash, dup_image_counts[tile_hash], dup_tile_counts[tile_hash])
#             tile = get_tile(img, idx)
#             print(tile[0, 0], tile[0, -1])
#             print(tile[-1, 0], tile[-1, -1])

    for idx in tile_indexes:
        draw_tile_bbox(img, idx, bbox_thickness, RED)
        draw_tile_number(img, idx, color=RED)
    
    ax[i // ncols, i % ncols].imshow(img)
    ax[i // ncols, i % ncols].set_title(title)
    ax[i // ncols, i % ncols].set_xticks(ticks)
    ax[i // ncols, i % ncols].set_yticks(ticks)

plt.tight_layout()

# plt.savefig(os.path.join(figures_dir, f'images_with_duplicate_tiles.jpg'))

In [None]:
# mostly black images
img_ids = [
    '03ffa7680.jpg', '8d5521663.jpg', '5a70ef013.jpg', '9a2f9d347.jpg', '37a912dca.jpg', 
    '4add7aa1d.jpg', '3db3ef7cc.jpg', '73fec0637.jpg', '7df214d98.jpg', 'c2955cd21.jpg', 
    'de018b2a8.jpg', '8ce769141.jpg', 'fc0e22a0a.jpg', '770c46cd4.jpg', 'd6e432b79.jpg', 
    'd5d1b6fb8.jpg', '0e4d7dd93.jpg', '9ddeed533.jpg', 'addc11de0.jpg', '65418dfe4.jpg', 
    '119d6a3d6.jpg', '1b287c905.jpg', 'b264b0f96.jpg', '996f92939.jpg', 'e5c3b1f59.jpg']
plot_image_grid(img_ids, 5, 5, filebase='black_images')

In [None]:
def draw_image_quad(
    img1_id, img2_id, 
    img1_overlap_tag='08', 
    with_scores=True,
    bbox_thickness=4,
    bbox_color=None,
    draw_bboxes=True,
    persist=True,
    save=False):
    
    if persist:
        copy2persist(img1_id)
        copy2persist(img2_id)
        image_dir = samples_dir
    else:
        image_dir = train_image_dir
        
    if not bbox_color:
        if img1_overlap_tag == '08':
            draw_bboxes = False
        if (img1_id, img2_id, img1_overlap_tag) in dup_truth:
            bbox_color = GREEN if dup_truth[(img1_id, img2_id, img1_overlap_tag)] else RED
        else:
            bbox_color = BLUE
            
    fig, ((ax11, ax12), (ax21, ax22)) = plt.subplots(2, 2, figsize=(12, 12))
    
    imgmod1 = ImgMod(os.path.join(image_dir, img1_id))
    imgmod2 = ImgMod(os.path.join(image_dir, img2_id))
    
    show_image_pair(ax11, ax12,
                    imgmod1, imgmod2, img1_overlap_tag,
                    draw_bboxes, bbox_thickness, bbox_color,
                    img1_id, img2_id, ticks)
    
    if with_scores:
        scores = get_overlap_pixel_scores(img1_id, img2_id, img1_overlap_tag)
        title1 = f'px0: min: {np.min(scores.px0)}, max: {np.max(scores.px0)}'
        title2 = f'pix: min: {np.min(scores.pix)}, max: {np.max(scores.pix)}'
    else:
        title1 = img1_id
        title2 = img2_id
    
    show_image_pair(ax21, ax22,
                    imgmod1, imgmod2, img1_overlap_tag,
                    draw_bboxes, bbox_thickness, bbox_color,
                    title1, title2, ticks, shift=ChannelShift('median', True))

    plt.tight_layout()
    
    if save:
        filename = os.path.join(figures_dir, f'{img1_id}_{img2_id}_{img1_overlap_tag}.jpg')
        if os.path.exists(filename):
            print(f'{filename} already exists. Overwriting...')
        else:
            print(f'{filename} saved.')
        plt.savefig(filename)

def draw_image_pair(
    img1_id, img2_id, 
    img1_overlap_tag='08',
    shift=ChannelShift('', True),
    with_scores=False,
    bbox_thickness=4,
    bbox_color=None, 
    draw_bboxes=True,
    persist=True,
    save=False):
    
    if persist:
        copy2persist(img1_id)
        copy2persist(img2_id)
        image_dir = samples_dir
    else:
        image_dir = train_image_dir
        
    if not bbox_color:
        if img1_overlap_tag == '08':
            draw_bboxes = False
        if (img1_id, img2_id, img1_overlap_tag) in dup_truth:
            bbox_color = GREEN if dup_truth[(img1_id, img2_id, img1_overlap_tag)] else RED
        else:
            bbox_color = BLUE
    
    if with_scores:
        scores = get_overlap_pixel_scores(img1_id, img2_id, img1_overlap_tag)
        title1 = f'px0: min: {np.min(scores.px0)}, max: {np.max(scores.px0)}'
        title2 = f'pix: min: {np.min(scores.pix)}, max: {np.max(scores.pix)}'
    else:
        title1 = img1_id
        title2 = img2_id
    
    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 6))
    
    imgmod1 = ImgMod(os.path.join(image_dir, img1_id))
    imgmod2 = ImgMod(os.path.join(image_dir, img2_id))
    
    show_image_pair(ax1, ax2, 
                    imgmod1, imgmod2, img1_overlap_tag, 
                    draw_bboxes, bbox_thickness, bbox_color, 
                    title1, title2, ticks, shift=shift)
    
    plt.tight_layout()
    
    if save:
        filename = os.path.join(figures_dir, f'{img1_id}_{img2_id}_{img1_overlap_tag}.jpg')
        if os.path.exists(filename):
            print(f'{filename} already exists. Overwriting...')
        else:
            print(f'{filename} saved.')
        plt.savefig(filename)


## Need 2 examples of "no dup_tool required, obvious" duplicate and non-duplicate image pairs.

In [None]:
# ship location
# ship heading
# wake pattern
# wave crests
draw_image_pair('c53ccd713.jpg', 'e64e7ff78.jpg', bbox_color=GREEN, save=True)

## Here we have two images, of the same location but taken at different times. 
Notice the two ships missing from the bottom right of `89a2baf91.jpg`.  Also, notice the difference in water texture; rough vs. calm. So these are definitely not duplicates.

In [None]:
# ship location
# water state (calm vs rough)
# land erosion
draw_image_pair('0efcd3f26.jpg', '89a2baf91.jpg', bbox_color=RED, save=True)
# draw_image_pair('0efcd3f26.jpg', '89a2baf91.jpg', shift=ChannelShift('median', True), with_scores=True)

In [None]:
# ship location
# ship heading
# water state (polluted)
draw_image_pair('21d7ea9bf.jpg', '89d46f4c4.jpg', bbox_color=GREEN, save=True)
# draw_image_pair('21d7ea9bf.jpg', '89d46f4c4.jpg', shift=ChannelShift('median', True), with_scores=True)

In [None]:
draw_image_pair('21d7ea9bf.jpg', '89d46f4c4.jpg', shift=ChannelShift('median', False), with_scores=True)

In [None]:
# ship location
# water state (calm vs rough)
# vegetation
# shadows
draw_image_pair('37c75e67a.jpg', 'c2212d548.jpg', bbox_color=RED)

In [None]:
# ship location
# ship heading
# wave crests
# shadows
draw_image_pair('0a78f9786.jpg', 'f432c322a.jpg', bbox_color=RED)

## Now show examples of overlaps

In [None]:
draw_image_pair('001234638.jpg', 'dde85f1d2.jpg')

In [None]:
draw_image_pair('001234638.jpg', 'dde85f1d2.jpg', '07', save=True)

In [None]:
# draw_image_pair('001234638.jpg', 'dde85f1d2.jpg', '07', shift=ChannelShift('median', True), with_scores=True)

In [None]:
draw_image_pair('000e37fc6.jpg', 'bba55046f.jpg')

In [None]:
draw_image_pair('000e37fc6.jpg', 'bba55046f.jpg', '18', with_scores=True)

In [None]:
# draw_image_pair('000e37fc6.jpg', 'bba55046f.jpg', '18', shift=ChannelShift('median', True), with_scores=True)

## These examples are not so obvious.  Use ChannelShift to compare overlapping regions.

### Example 1

In [None]:
draw_image_pair('46b87e21c.jpg', 'f881c203f.jpg', bbox_color=RED, save=True)

In [None]:
draw_image_pair('46b87e21c.jpg', 'f881c203f.jpg', '48', bbox_color=BLUE)
# mv 46b87e21c.jpg_f881c203f.jpg_48.jpg 46b87e21c.jpg_f881c203f.jpg_48a.jpg

In [None]:
draw_image_pair('46b87e21c.jpg', 'f881c203f.jpg', '48', shift=ChannelShift('median', True), with_scores=True, save=True)

In [None]:
draw_image_pair('46b87e21c.jpg', 'f881c203f.jpg', '07', shift=ChannelShift('median', True), with_scores=True, save=True)

### Example 2

In [None]:
draw_image_pair('9b34f2f64.jpg', 'e8b058856.jpg', bbox_color=RED)

In [None]:
draw_image_pair('9b34f2f64.jpg', 'e8b058856.jpg', '05', bbox_color=BLUE)

In [None]:
draw_image_pair('9b34f2f64.jpg', 'e8b058856.jpg', '05', shift=ChannelShift('median', True), with_scores=True)

In [None]:
draw_image_pair('9b34f2f64.jpg', 'e8b058856.jpg', '18', shift=ChannelShift('median', True), with_scores=True)

### Example 3

In [None]:
draw_image_pair('356f4c539.jpg', '6dd7430f6.jpg')

In [None]:
draw_image_pair('356f4c539.jpg', '6dd7430f6.jpg', shift=ChannelShift('median', True), with_scores=True)

In [None]:
draw_image_pair('356f4c539.jpg', '6dd7430f6.jpg', '02', shift=ChannelShift('median', True), with_scores=True)

## Same location but different timestamps
Notice the same ships are not present in both images.

Also the water is more calm/smooth in one than the other.

These are non-duplicates.

In [None]:
draw_image_pair('2556bfc6c.jpg', 'd3474ec95.jpg', bbox_color=RED)
# draw_image_pair('2556bfc6c.jpg', 'd3474ec95.jpg', bbox_color=RED, shift=ChannelShift('median', True))

In [None]:
draw_image_pair('2c09a2423.jpg', 'b4eba96e8.jpg', bbox_color=RED)
# draw_image_pair('2c09a2423.jpg', 'b4eba96e8.jpg', bbox_color=RED, shift=ChannelShift('median', True), with_scores=True)

In [None]:
draw_image_pair('2556bfc6c.jpg', '2c09a2423.jpg', '28')

In [None]:
draw_image_pair('2556bfc6c.jpg', '2c09a2423.jpg', '28', shift=ChannelShift('median', True), with_scores=True)

In [None]:
# A Duplicate example with the same images.
# draw_image_pair('2556bfc6c.jpg', 'b4eba96e8.jpg', '28')
# draw_image_pair('2556bfc6c.jpg', 'b4eba96e8.jpg', '28', shift=ChannelShift('median', True), with_scores=True)

## These are duplicates but very different pixelwise (large hamming distance)

In [None]:
draw_image_pair('536356d11.jpg', '783d9495a.jpg', '15')

In [None]:
draw_image_pair('536356d11.jpg', '783d9495a.jpg', '15', shift=ChannelShift('median', True), with_scores=True)

In [None]:
draw_image_pair('536356d11.jpg', '88c2acaf8.jpg', '15', save=True)

In [None]:
draw_image_pair('536356d11.jpg', '88c2acaf8.jpg', '15', shift=ChannelShift('median', True), with_scores=True, save=True)

## What about these? dup or non-dup?

In [None]:
draw_image_pair('908f11dd6.jpg', 'f127f2891.jpg', '08')

In [None]:
draw_image_pair('908f11dd6.jpg', 'f127f2891.jpg', '08', shift=ChannelShift('median', False), with_scores=True)

In [None]:
draw_image_pair('908f11dd6.jpg', 'c5d9bc753.jpg', '02')

In [None]:
draw_image_pair('908f11dd6.jpg', 'c5d9bc753.jpg', '02', shift=ChannelShift('median', True), with_scores=True)

In [None]:
draw_image_pair('2b6c7fd55.jpg', 'c5d9bc753.jpg', '15')

In [None]:
draw_image_pair('2b6c7fd55.jpg', 'c5d9bc753.jpg', '15', shift=ChannelShift('median', False), with_scores=True)

## Clouds look the same but actually different.

In [None]:
draw_image_quad('0ef6cd331.jpg', '2095da0cb.jpg', save=True)

## Blue edges. Both different but look the same

In [None]:
draw_image_quad('b8ce38df4.jpg', 'ddfc36407.jpg')

In [None]:
draw_image_quad('8a0542232.jpg', 'ddfc36407.jpg')

In [None]:
draw_image_quad('8a0542232.jpg', 'ddfc36407.jpg', '68')

## This is the example used in [hls_shift](notebooks/eda/hls_shift.ipynb)

In [None]:
draw_image_quad('03a5fd8d2.jpg', '676f4cfd0.jpg', with_scores=True, save=True)

## Find all images that overlap with img_id

In [None]:
overlap_image_maps2 = {}
for (img1_id, img2_id, img1_overlap_tag), scores in tqdm_notebook(overlap_image_maps.items()):
    if (img1_id, img2_id) not in overlap_image_maps2:
        overlap_image_maps2[(img1_id, img2_id)] = {}
    overlap_image_maps2[(img1_id, img2_id)][img1_overlap_tag] = scores

In [None]:
img_id, pix_thresh = 'd049cb0be.jpg', 5000
img_id, pix_thresh = 'c5d9bc753.jpg', 10000
img_id, pix_thresh = '2556bfc6c.jpg', 5000000
img_id, pix_thresh = 'd3474ec95.jpg', 5000000
pset = set()
overlay_set = set()
for (img1_id, img2_id), overlap_map in overlap_image_maps2.items():
    if img_id in (img1_id, img2_id):
        assert len(overlap_map) == 1
        for img1_overlap_tag, scores in overlap_map.items():
#             if np.max(scores.shp) == 0:
#                 break
            if np.max(scores.pix) > pix_thresh:
                break
        else:
            overlay_set.add((img1_id, img2_id, img1_overlap_tag))
            pset.add(img1_id)
            pset.add(img2_id)

# for p in sorted(pset):
#     print(p)

plot_image_grid(sorted(pset), 4, 4)

for i, overlay in enumerate(sorted(overlay_set)):
    pixel_scores = sdcic.gen_pix_scores(*overlay)
    if np.max(pixel_scores) < 5000:
        print((overlay[0], overlay[1]))

To run the next few cells, you'll need to download and extract the Airbus_SDC dataset as outlined in the project README.  I didn't want the project repo to be bloated with tons of images.  Though, I have included the results of the cells for your convenience.

In [None]:
overlap_group1 = [
    '3e98c83f7.jpg', 
    'b356b1f4a.jpg', 
    'd42dcdc8c.jpg', 
    '861367193.jpg', 
    '536356d11.jpg', 
    '9b82e7a76.jpg',
    '783d9495a.jpg', 
    'cc29cb437.jpg',
    '385df9573.jpg']

plot_image_grid(overlap_group1, 3, 3, filebase='overlap_group1')

![](../figures/overlap_group1_3_3.jpg)

In [None]:
overlap_group2 = [
    'd0e99b467.jpg', 
    'f0d46bbd8.jpg', 
    'd049cb0be.jpg', 
    '813a4728e.jpg',
    'a4e6f04a8.jpg', 
    'e80ae5e73.jpg', 
    '88c2acaf8.jpg', 
    '30d3278a2.jpg', 
    'e6e729afa.jpg']

plot_image_grid(overlap_group2, 3, 3, filebase='overlap_group2')

![](../figures/overlap_group2_3_3.jpg)

In [None]:
image_pairs = [(g1, g2) for g1, g2 in zip(overlap_group1, overlap_group2)]

pixel_scores = []
for img1_id, img2_id in image_pairs:
    pixel_scores.append(sdcic.gen_pix_scores(img1_id, img2_id, '08'))

print(np.asarray(pixel_scores))

In [None]:
pixel_scores9 = np.asarray(pixel_scores).reshape((9, 3, 3))
print('pixel scores between tiles of overlap_group1 and overlap_group2\n')
print(pixel_scores9)

In [None]:
for img1_id, img2_id in image_pairs:
    draw_image_pair(img1_id, img2_id, shift=ChannelShift('median', True), persist=False, save=True)

![](../figures/3e98c83f7.jpg_d0e99b467.jpg_08.jpg)