# recovering original image from patches
Knowing that all patches overlap each other in a predictable way, we can recover somehow original images from these patches. Recovered images will help to make better stratifications and augmentations. Lets start.

In [None]:
import numpy as np, pandas as pd, matplotlib.pyplot as plt
import os, tqdm
from skimage.io import imread
from skimage.util import img_as_float32

dftrain = pd.read_csv('../input/train_ship_segmentations.csv')
dftest = pd.read_csv('../input/test_ship_segmentations.csv')

trainids = dftrain.ImageId.unique().tolist()
trainids.remove('6384c3e78.jpg')  # bad format
trainset = set(trainids)

testids = dftest.ImageId.unique().tolist()

allids = trainids + testids

def read_image(imgid):
    dirn = 'train' if imgid in trainset else 'test'
    fn = '../input/{}/{}'.format(dirn, imgid)
    return img_as_float32(imread(fn))

### define feature extractor

In [None]:
from skimage.feature import BRIEF

r = np.arange(0, 768, 256) + 128
xx, yy = np.meshgrid(r, r)
xx, yy = xx.flatten(), yy.flatten()
pts = np.stack([yy, xx], axis=0).T

extractor = BRIEF(patch_size=17, sample_seed=123)

def brief_features(imgid):
    img = read_image(imgid)
    extractor.extract(img.mean(axis=-1), pts)
    l1 = []
    for d in extractor.descriptors:
        l1.append( hash(np.packbits(d).tobytes()) )
    return l1

### extract brief features on 256x256 divisions 

In [None]:
from multiprocessing import Pool
bfs = []
with Pool() as pool:
    for bfa in tqdm.tqdm(pool.imap(brief_features, allids), total=len(allids)):
        bfs.append(bfa)
        
cols, rows = [0, 1, 2, 0, 1, 2, 0, 1, 2], [0, 0, 0, 1, 1, 1, 2, 2, 2]
l = []
for imgid, bfa in zip(allids, bfs):
    l.extend( [list(e) for e in zip([imgid]*9, bfa, rows, cols)] )
    
df_patch  = pd.DataFrame(l, columns=['ImageId', 'bf', 'row', 'col'])    

#del l, bfs
#gc.collect()

### remove hash entries collided, constant etc.

In [None]:
colls = set(df_patch.bf.value_counts().index[:8].values)
df_patch = df_patch[~df_patch.bf.isin(colls)].copy()

### build predicates between patches

In [None]:
p_left, p_right, p_top, p_bottom = {}, {}, {}, {}
grid = np.zeros((3,3), dtype=np.int64)
for _, df1 in tqdm.tqdm(df_patch.groupby('ImageId'), total=df_patch.ImageId.nunique()):
    grid.fill(0)
    for _, r in df1.iterrows():
        grid[r.row, r.col] = r.bf
        
    # horizontal
    for r1 in [0,1,2]:
        for c1 in [0,1]:
            if (grid[r1,c1] != 0) and (grid[r1,c1+1] != 0):
                p_right[grid[r1,c1]] = grid[r1,c1+1]
                p_left[grid[r1,c1+1]] = grid[r1,c1]

    # vertical
    for r1 in [0,1]:
        for c1 in [0,1,2]:
            if (grid[r1,c1] != 0) and (grid[r1+1,c1] != 0):
                p_bottom[grid[r1,c1]] = grid[r1+1,c1]
                p_top[grid[r1+1,c1]] = grid[r1,c1]

### stitching into a big image

In [None]:
def stitch_by_expand(seed):
    # returns bfs, rows, cols
    burned = {}
    qu = [ [seed, 0, 0]]  # queue to be burned
    while len(qu) > 0:
        #print(len(burned), len(qu))
        qu1 = []
        for t in qu:
            bf = t[0]
            if bf in burned: continue
            r, c = t[1], t[2]
            burned[bf] = [r, c]
            if bf in p_left:
                qu1.append([p_left[bf], r, c-1])
            if bf in p_right:
                qu1.append([p_right[bf], r, c+1])
            if bf in p_top:
                qu1.append([p_top[bf], r-1, c])
            if bf in p_bottom:
                qu1.append([p_bottom[bf], r+1, c])
        qu = qu1

    pos = np.array(list(burned.values()))
    pos -= pos.min(axis=0)
    return list(burned.keys()), pos[:,0].tolist(), pos[:,1].tolist()

In [None]:
l = []
explored = set()
bigImageId = 0
for bf in tqdm.tqdm(df_patch.bf.unique()):
    if bf in explored: continue
    bfs, rows, cols = stitch_by_expand(bf)
    explored.update(bfs)
    l1 = [ [bigImageId, bf1, row1, col1] for bf1, row1, col1 in zip(bfs, rows, cols) ]
    l.extend(l1)
    bigImageId += 1

In [None]:
bigImageId

In [None]:
dfbig = pd.DataFrame(l, columns=['BigImageId', 'bf', 'brow', 'bcol'])
dfbig = dfbig.merge(df_patch.drop_duplicates('bf'), on='bf', how='left')  # keep one bf feature

In [None]:
def read_big_image(bimgid):
    df1 = dfbig[(dfbig.BigImageId==bimgid) & (dfbig.brow < 15) & (dfbig.bcol < 15)].sort_values(by='ImageId') # limit size to 2560 x 2560
    nrows, ncols = df1[['brow', 'bcol']].max(axis=0) + [1, 1]

    dest = np.zeros((int(nrows*256), int(ncols*256), 3), dtype=np.float32)
    img = None
    imgid = -1
    for _, r in df1.iterrows():
        if r.ImageId != imgid:
            imgid = r.ImageId
            img = read_image(imgid)
        dx, dy = r.bcol * 256, r.brow * 256
        sx, sy = r.col * 256, r.row * 256
        dest[dy:dy+256,dx:dx+256,:] = img[sy:sy+256,sx:sx+256,:]

    return dest

### show some results

In [None]:
bimgids = dfbig.BigImageId.value_counts()[:1024].index.values

In [None]:
bimg = read_big_image(np.random.choice(bimgids))
plt.figure(figsize=(12,12))
plt.imshow(bimg)
plt.show()

In [None]:
bimg = read_big_image(np.random.choice(bimgids))
plt.figure(figsize=(12,12))
plt.imshow(bimg)
plt.show()

In [None]:
bimg = read_big_image(np.random.choice(bimgids))
plt.figure(figsize=(12,12))
plt.imshow(bimg)
plt.show()

In [None]:
bimg = read_big_image(np.random.choice(bimgids))
plt.figure(figsize=(12,12))
plt.imshow(bimg)
plt.show()

In [None]:
bimg = read_big_image(np.random.choice(bimgids))
plt.figure(figsize=(12,12))
plt.imshow(bimg)
plt.show()

In [None]:
bimg = read_big_image(np.random.choice(bimgids))
plt.figure(figsize=(12,12))
plt.imshow(bimg)
plt.show()