In [None]:
import joblib
import json

import numpy as np
import pandas as pd

In [None]:
def crop(fn):
    im = plt.imread('{}/{}'.format(image_folder,fn))
    x1, y1, x2, y2 = df2.ix[detix[fn], 2:]
    if np.isnan((x1, y1, x2, y2)).any():
        x1 = -1e6
        x2 = 1e6
        y1 = -1e6
        y2 = 1e6

    W = max(np.abs(x2-x1), np.abs(y2-y1))
    W = int(W*1.25 / 2)
    cy, cx = int((y2+y1)/2), int((x2+x1)/2)
    x0 = max(0, cx-W)
    x1 = min(im.shape[1], cx+W)
    y0 = max(0, cy-W)
    y1 = min(im.shape[0], cy+W)

    im = im[y0:y1, x0:x1]
    im = skimage.transform.resize(im, (384, 384), preserve_range=True).astype('uint8')
    return im, (y0, x0, y1, x1)

# I. Crop Training and Test Images

In [None]:
mkdir ./data/detcrop_train

In [None]:
image_folder = './data/train'

df = pd.read_csv('./data/train.csv')
df2 = pd.read_csv('../kaggle-whale/whaledet_train.csv')
fns = df2.Image
detix = {df2.Image.ix[i]: i for i in range(len(df))}

def process(fn):
    im, data = crop(fn)
    plt.imsave('{}/{}'.format('./data/detcrop_train', fn), im)
    return (fn, data)

df3 = pd.DataFrame(index=np.arange(len(df2)), columns=['Image', 'y0', 'x0', 'y1', 'x1'])
results = joblib.Parallel(n_jobs=10)(joblib.delayed(process)(fn) for fn in fns)
for fn, data in results:
    df3.loc[detix[fn]] = [fn] + list(data)
df3.to_csv('./data/detcrop_train.csv')

In [None]:
mkdir ./data/detcrop_test

In [None]:
image_folder = './data/test'
df2 = pd.read_csv('../kaggle-whale/whaledet_test.csv')
fns = df2.Image
detix = {df2.Image.ix[i]: i for i in range(len(df2))}

def process(fn):
    im, data = crop(fn)
    plt.imsave('{}/{}'.format('./data/detcrop_test', fn), im)
    return (fn, data)

df3 = pd.DataFrame(index=np.arange(len(df2)), columns=['Image', 'y0', 'x0', 'y1', 'x1'])
results = joblib.Parallel(n_jobs=10)(joblib.delayed(process)(fn) for fn in fns)
for fn, data in results:
    df3.loc[detix[fn]] = [fn] + list(data)
df3.to_csv('./data/detcrop_test.csv')

# II. Transform Annotations for Cropped Images

In [None]:
df = pd.read_csv('./data/detcrop_train.csv')
detix = {df.Image.ix[i]: i for i in range(len(df))}

points1 = json.load(open('./whale-2015/points1.json'))
points2 = json.load(open('./whale-2015/points2.json'))

p1s = np.array([(p['annotations'][0]['x'], p['annotations'][0]['y']) for p in points1])
p2s = np.array([(p['annotations'][0]['x'], p['annotations'][0]['y']) for p in points2])
fns = np.array([p['filename'] for p in points1])

In [None]:
points1b = []
for p in points1:
    x = p['annotations'][0]['x']
    y = p['annotations'][0]['y']
    fn = p['filename']
    y0, x0, y1, x1 = df.ix[detix[fn]][2:]
    ya = (y-y0)*384./(y1-y0)
    xa = (x-x0)*384./(x1-x0)
    p['annotations'][0]['x'] = xa
    p['annotations'][0]['y'] = ya
    if 0 < xa < 384 and 0 < ya < 384:
        points1b.append(p)

In [None]:
points2b = []
for p in points2:
    x = p['annotations'][0]['x']
    y = p['annotations'][0]['y']
    fn = p['filename']
    y0, x0, y1, x1 = df.ix[detix[fn]][2:]
    ya = (y-y0)*384./(y1-y0)
    xa = (x-x0)*384./(x1-x0)
    p['annotations'][0]['x'] = xa
    p['annotations'][0]['y'] = ya
    if 0 < xa < 384 and 0 < ya < 384:
        points2b.append(p)

In [None]:
p1s = np.array([(p['annotations'][0]['x'], p['annotations'][0]['y']) for p in points1b])
p2s = np.array([(p['annotations'][0]['x'], p['annotations'][0]['y']) for p in points2b])

In [None]:
json.dump(points1b, open('./data/detcrop_points1.json','w'), indent=0)    
json.dump(points2b, open('./data/detcrop_points2.json','w'), indent=0)    

# III. Transform test points back to uncropped images

In [None]:
points1 = json.load(open('./data/detcrop_testpoints1.json'))
points2 = json.load(open('./data/detcrop_testpoints2.json'))

p1s = np.array([(p['annotations'][0]['x'], p['annotations'][0]['y']) for p in points1])
p2s = np.array([(p['annotations'][0]['x'], p['annotations'][0]['y']) for p in points2])
fns = np.array([p['filename'] for p in points1])

In [None]:
df = pd.read_csv('./data/detcrop_test.csv')
detix = {df.Image.ix[i]: i for i in range(len(df))}

points1 = json.load(open('detcrop_testpoints1.json'))
points2 = json.load(open('detcrop_testpoints2.json'))

p1s = np.array([(p['annotations'][0]['x'], p['annotations'][0]['y']) for p in points1])
p2s = np.array([(p['annotations'][0]['x'], p['annotations'][0]['y']) for p in points2])
fns = np.array([p['filename'] for p in points1])

In [None]:
points1b = []
for p in points1:
    xa = p['annotations'][0]['x']
    ya = p['annotations'][0]['y']
    fn = p['filename']
    y0, x0, y1, x1 = df.ix[detix[fn]][2:]
    y = ya * (y1-y0)/384. + y0
    x = xa * (x1-x0)/384. + x0
    p['annotations'][0]['x'] = x
    p['annotations'][0]['y'] = y
    points1b.append(p)

In [None]:
points2b = []
for p in points2:
    xa = p['annotations'][0]['x']
    ya = p['annotations'][0]['y']
    fn = p['filename']
    y0, x0, y1, x1 = df.ix[detix[fn]][2:]
    y = ya * (y1-y0)/384. + y0
    x = xa * (x1-x0)/384. + x0
    p['annotations'][0]['x'] = x
    p['annotations'][0]['y'] = y
    points2b.append(p)

In [None]:
json.dump(points1b, open('./data/detcrop_testpoints1.json','w'), indent=0)    
json.dump(points2b, open('./data/detcrop_testpoints2.json','w'), indent=0)    