In [1]:
import os, random, glob, pickle, collections, math
import numpy as np
import pandas as pd
import ujson as json
from PIL import Image
import gc
import glob
import shutil, csv, time

#import utils; reload(utils)
#from utils import *

from sklearn.metrics import log_loss
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
%matplotlib inline 

from keras.models import Sequential, Model, load_model, model_from_json
from keras.layers import GlobalAveragePooling2D, Flatten, Dropout, Dense, LeakyReLU
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils
from keras.preprocessing import image
from keras import backend as K
K.set_image_dim_ordering('tf')

Using Theano backend.
Using gpu device 0: Tesla K80 (CNMeM is disabled, cuDNN 5103)


In [2]:
TRAIN_DIR = '../data/fish/train-all/'
TEST_DIR =  '../data/fish/test/' 
FISH_CLASSES = ['NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']
CONF_THRESH = 0.8
ROWS = 224
COLS = 224
BATCHSIZE = 32 # 256 #64
LEARNINGRATE = 1e-4
BG_THRESH_HI = 0.3
BG_THRESH_LO = 0.1
bags = 5
learn_round = 2
path = '../data/fish/'
p=16
full = True
chk_folder = 'yolo544_0.7up'

def load_img(path, bbox, target_size=None):
    img = Image.open(path)
    img = img.convert('RGB')
    cropped = img.crop((bbox[0],bbox[1],bbox[2],bbox[3]))
    if target_size:
        cropped = cropped.resize((target_size[1], target_size[0]))
    return cropped

def preprocess_input(x):
    #resnet50 image preprocessing
    # 'RGB'->'BGR'
    x = x[:, :, ::-1]
    x[:, :, 0] -= 103.939
    x[:, :, 1] -= 116.779
    x[:, :, 2] -= 123.68
    return x
def refresh_directory_structure(name, sub_dirs):
    gdir = os.path.join(path, name)
    if os.path.exists(gdir):
        shutil.rmtree(gdir)
    os.makedirs(gdir)
    for sub_dir in sub_dirs:
        os.makedirs(os.path.join(gdir, sub_dir))

In [3]:
# Load up YOLO bounding boxes for each class
import glob
all_files = glob.glob(os.path.join('../darknet/results', "*.txt"))
allFiles = [f for f in all_files if 'FISH544.' in f]
frame = pd.DataFrame()
list_ = []
for file_ in allFiles:
    df = pd.read_csv(file_,index_col=None, header=None, sep = " ", names = ['fname', 'proba', 'x0', 'y0', 'x1', 'y1'])
    df['class'] = file_.split('_')[-1].split('.')[0]
    list_.append(df)
yolo_frame = pd.concat(list_)
# # Sort the predictions on the area 
# yolo_frame['area'] = (yolo_frame['x1']-yolo_frame['x0']) * (yolo_frame['y1']-yolo_frame['y0'])
# yolo_frame = yolo_frame.sort(['fname','area'], ascending=[1, 0]).reset_index(drop=True)   
# Sort the predictions on the probability 
yolo_frame.head(2)

Unnamed: 0,fname,proba,x0,y0,x1,y1,class
0,img_06237,0.018711,593.570007,0.157189,703.270691,29.283024,FISH544
1,img_06237,0.759979,582.109192,363.861847,679.074768,544.313232,FISH544


In [4]:
yolo_frame = yolo_frame[yolo_frame['proba']>0.4]
yolo_frame['proba_max'] = df.groupby('fname')['proba'].transform('max')
yolo_frame = yolo_frame.sort(['fname', 'proba'], ascending=[1, 0])
yolo_frame.head(5)

  app.launch_new_instance()


Unnamed: 0,fname,proba,x0,y0,x1,y1,class,proba_max
5582,img_00007,0.937557,692.043762,277.020538,1171.319214,503.0737,FISH544,0.937557
7014,img_00009,0.898794,308.747009,133.555511,671.860291,267.542755,FISH544,0.898794
7013,img_00009,0.85489,591.912354,100.679451,941.899048,209.58194,FISH544,0.898794
7016,img_00009,0.834543,921.427551,139.406799,1161.772217,295.115021,FISH544,0.898794
1626,img_00018,0.883484,603.907715,208.838745,961.977539,415.435059,FISH544,0.883484


In [5]:
## Cut off the predictions on a probabilty
#yolo_frame = yolo_frame[yolo_frame['proba_max']>0.6]
#yolo_frame = yolo_frame[yolo_frame['proba_max']<0.7]
yolo_frame = yolo_frame[yolo_frame['proba_max']>0.7]
yolo_frame = pd.concat([yolo_frame[(yolo_frame['proba']==yolo_frame['proba_max'])&(yolo_frame['proba_max']<0.85)],
                       yolo_frame[yolo_frame['proba'] > .8449]], axis = 0)
yolo_frame = yolo_frame.sort(['fname', 'proba'], ascending=[1, 0]).reset_index(drop=True)



In [6]:
print len(yolo_frame.fname.unique())
yolo_frame.head(5)

787


Unnamed: 0,fname,proba,x0,y0,x1,y1,class,proba_max
0,img_00007,0.937557,692.043762,277.020538,1171.319214,503.0737,FISH544,0.937557
1,img_00009,0.898794,308.747009,133.555511,671.860291,267.542755,FISH544,0.898794
2,img_00009,0.85489,591.912354,100.679451,941.899048,209.58194,FISH544,0.898794
3,img_00018,0.883484,603.907715,208.838745,961.977539,415.435059,FISH544,0.883484
4,img_00018,0.868072,673.314819,111.830704,1022.509033,219.649155,FISH544,0.883484


In [7]:
GTbbox_test_df = pd.DataFrame(columns=['image_folder', 'image_file','crop_index','crop_class','xmin','ymin','xmax','ymax'])  
iddict = {}
for c in ['test']:
    print(c)
    for l in range(yolo_frame.shape[0]):
        image_file, proba, xmin, ymin, xmax, ymax, fish_class, area = yolo_frame.iloc[l].values.tolist()
        if image_file in iddict:
            iddict[image_file] += 1
        else:
            iddict[image_file] = 0
        image = Image.open(TEST_DIR+c+'/'+image_file+'.jpg')
        width_image, height_image = image.size
        width = xmax - xmin
        height = ymax - ymin
        delta_width = p/(COLS-2*p)*width
        delta_height = p/(ROWS-2*p)*height
        xmin_expand = xmin-delta_width
        ymin_expand = ymin-delta_height
        xmax_expand = xmin+width+delta_width
        ymax_expand = ymin+height+delta_height
        assert max(xmin_expand,0)<min(xmax_expand,width_image)
        assert max(ymin_expand,0)<min(ymax_expand,height_image)
        GTbbox_test_df.loc[len(GTbbox_test_df)] = [c, image_file+'.jpg', iddict[image_file],fish_class,max(xmin_expand,0),max(ymin_expand,0),min(xmax_expand,width_image),min(ymax_expand,height_image)]                    
GTbbox_test_df = GTbbox_test_df.sort(['image_file','crop_index']).reset_index(drop=True)

test




In [8]:
GTbbox_test_df.shape

(880, 8)

In [9]:
refresh_directory_structure('check', [chk_folder])

In [10]:
def load_img(path, bbox, target_size=None):
    img = Image.open(path)
    img = img.convert('RGB')
    cropped = img.crop((bbox[0],bbox[1],bbox[2],bbox[3]))
    if target_size:
        cropped = cropped.resize((target_size[1], target_size[0]))
    return cropped

def load_img(path, bbox, target_size=None):
    img = Image.open(path)
    imsize = Image.open(path).size
    height, width = bbox[2]-bbox[0], bbox[3]-bbox[1]
    length = max(height, width)    
    # Make it square
    dim = [width, height]
    for i in range(2):
        offset = length - dim[0+i]
        if bbox[0+i]+length+(offset/2) > imsize[0+i]:
            bbox[0+i] = bbox[2+i] - length + (offset/2)
            bbox[2+i] = bbox[2+i] + (offset/2)
        else:
            bbox[2+i] = bbox[0+i] + length
        bbox[0+i] -= length*0.05
        bbox[2+i] += length*0.05
        
    img = img.convert('RGB')
    cropped = img.crop((bbox[0],bbox[1],bbox[2],bbox[3]))
    if target_size:
        cropped = cropped.resize((target_size[1], target_size[0]))
    if height < width:
        cropped = cropped.rotate(-90)
    return cropped

for index, row in GTbbox_test_df.iterrows():
    row = row.tolist()
    image_file = os.path.join(row[0], row[1])
    fish = row[3]
    bbox = row[4:8]
    cropped = load_img(os.path.join('../data/fish',image_file),bbox,target_size=(ROWS,COLS))
    cropped.save(os.path.join(path, 'check',chk_folder, row[1]))
    

In [None]:
chk_folder = 'train'
refresh_directory_structure('check', [chk_folder])
# Now look at the training images
file_name = 'GTbbox_df.pickle'
if os.path.exists('../data/'+file_name):
    print ('Loading from file '+file_name)
    GTbbox_df = pd.read_pickle('../data/'+file_name)
GTbbox_df.head(3)

In [None]:
for index, row in GTbbox_df.iterrows():
    row = row.tolist()
    image_file = os.path.join(row[0], row[1])
    fish = row[3]
    bbox = row[4:8]
    cropped = load_img(os.path.join('../data/fish/train-all',image_file),bbox,target_size=(ROWS,COLS))
    cropped.save(os.path.join(path, 'check', chk_folder, row[1]))

In [None]:

path = '../data/fish/'

In [None]:
chk_folder = 'train'
for index, row in GTbbox_df.iterrows():
    row = row.tolist()
    image_file = os.path.join(row[0], row[1])
    fish = row[3]
    bbox = row[4:8]
    cropped = load_img2(os.path.join('../data/fish/train-all',image_file),bbox,target_size=(ROWS,COLS))
    cropped.save(os.path.join(path, 'check', chk_folder, row[1]))
