In [39]:
import os, random, glob, pickle, collections, math
import numpy as np
import pandas as pd
import ujson as json
from PIL import Image
import gc
import glob
import shutil, csv, time

#import utils; reload(utils)
#from utils import *

from sklearn.metrics import log_loss
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

import matplotlib.pyplot as plt
%matplotlib inline 

from keras.models import Sequential, Model, load_model, model_from_json
from keras.layers import GlobalAveragePooling2D, Flatten, Dropout, Dense, LeakyReLU
from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import np_utils
from keras.preprocessing import image
from keras import backend as K
K.set_image_dim_ordering('tf')

In [40]:
TRAIN_DIR = '../data/fish/train-all/'
TEST_DIR =  '../data/fish/test/' #'../RFCN/JPEGImages/'
# RFCN_MODEL = 'resnet101_rfcn_ohem_iter_30000'
CHECKPOINT_DIR = './checkpoints/checkpoint05/'
LOG_DIR = './logs/log05/'
FISH_CLASSES = ['NoF', 'ALB', 'BET', 'DOL', 'LAG', 'OTHER', 'SHARK', 'YFT']
CONF_THRESH = 0.8
ROWS = 224
COLS = 224
BATCHSIZE = 32 # 256 #64
LEARNINGRATE = 1e-4
BG_THRESH_HI = 0.3
BG_THRESH_LO = 0.1
bags = 5
learn_round = 2
path = '../data/fish/'
p=16
full = True
chk_folder = 'yolo_11k_to0.85'

def load_img(path, bbox, target_size=None):
    img = Image.open(path)
    img = img.convert('RGB')
    cropped = img.crop((bbox[0],bbox[1],bbox[2],bbox[3]))
    if target_size:
        cropped = cropped.resize((target_size[1], target_size[0]))
    return cropped

def preprocess_input(x):
    #resnet50 image preprocessing
    # 'RGB'->'BGR'
    x = x[:, :, ::-1]
    x[:, :, 0] -= 103.939
    x[:, :, 1] -= 116.779
    x[:, :, 2] -= 123.68
    return x
def refresh_directory_structure(name, sub_dirs):
    gdir = os.path.join(path, name)
    if os.path.exists(gdir):
        shutil.rmtree(gdir)
    os.makedirs(gdir)
    for sub_dir in sub_dirs:
        os.makedirs(os.path.join(gdir, sub_dir))

In [41]:
# Load up YOLO bounding boxes for each class
import glob
all_files = glob.glob(os.path.join('../yolo_coords', "*.txt"))
allFiles = [f for f in all_files if 'FISH' in f]
frame = pd.DataFrame()
list_ = []
for file_ in allFiles:
    df = pd.read_csv(file_,index_col=None, header=None, sep = " ", names = ['fname', 'proba', 'x0', 'y0', 'x1', 'y1'])
    df['class'] = file_.split('_')[-1].split('.')[0]
    list_.append(df)
yolo_frame = pd.concat(list_)
# Cut off the predictions on a probabilty
yolo_frame = yolo_frame[yolo_frame['proba']>0.85]
# Sort the predictions on the area 
yolo_frame['area'] = (yolo_frame['x1']-yolo_frame['x0']) * (yolo_frame['y1']-yolo_frame['y0'])
yolo_frame = yolo_frame.sort(['fname','area'], ascending=[1, 0]).reset_index(drop=True)   
GTbbox_test_df = pd.DataFrame(columns=['image_folder', 'image_file','crop_index','crop_class','xmin','ymin','xmax','ymax'])  
iddict = {}
for c in ['test']:
    print(c)
    for l in range(yolo_frame.shape[0]):
        image_file, proba, xmin, ymin, xmax, ymax, fish_class, area = yolo_frame.iloc[l].values.tolist()
        if image_file in iddict:
            iddict[image_file] += 1
        else:
            iddict[image_file] = 0
        image = Image.open(TEST_DIR+c+'/'+image_file+'.jpg')
        width_image, height_image = image.size
        width = xmax - xmin
        height = ymax - ymin
        delta_width = p/(COLS-2*p)*width
        delta_height = p/(ROWS-2*p)*height
        xmin_expand = xmin-delta_width
        ymin_expand = ymin-delta_height
        xmax_expand = xmin+width+delta_width
        ymax_expand = ymin+height+delta_height
        assert max(xmin_expand,0)<min(xmax_expand,width_image)
        assert max(ymin_expand,0)<min(ymax_expand,height_image)
        GTbbox_test_df.loc[len(GTbbox_test_df)] = [c, image_file+'.jpg', iddict[image_file],fish_class,max(xmin_expand,0),max(ymin_expand,0),min(xmax_expand,width_image),min(ymax_expand,height_image)]                    
GTbbox_test_df = GTbbox_test_df.sort(['image_file','crop_index']).reset_index(drop=True)




test




In [42]:
GTbbox_test_df[:2]

Unnamed: 0,image_folder,image_file,crop_index,crop_class,xmin,ymin,xmax,ymax
0,test,img_00007.jpg,0.0,FISH,740.284363,264.430939,1153.247192,521.133301
1,test,img_00009.jpg,0.0,FISH,345.490326,130.617371,669.393555,264.911346


In [43]:
refresh_directory_structure('check', [chk_folder])

In [44]:
def load_img(path, bbox, target_size=None):
    img = Image.open(path)
    img = img.convert('RGB')
    cropped = img.crop((bbox[0],bbox[1],bbox[2],bbox[3]))
    if target_size:
        cropped = cropped.resize((target_size[1], target_size[0]))
    return cropped

for index, row in GTbbox_test_df.iterrows():
    row = row.tolist()
    image_file = os.path.join(row[0], row[1])
    fish = row[3]
    bbox = row[4:8]
    cropped = load_img(os.path.join('../data/fish',image_file),bbox,target_size=(ROWS,COLS))
    cropped.save(os.path.join(path, 'check',chk_folder, row[1]))
    

In [47]:
chk_folder = 'train'
refresh_directory_structure('check', [chk_folder])
# Now look at the training images
file_name = 'GTbbox_df.pickle'
if os.path.exists('../data/'+file_name):
    print ('Loading from file '+file_name)
    GTbbox_df = pd.read_pickle('../data/'+file_name)
GTbbox_df.head(3)

Loading from file GTbbox_df.pickle


Unnamed: 0,image_folder,image_file,crop_index,crop_class,xmin,ymin,xmax,ymax
0,ALB,img_00003.jpg,0.0,ALB,377.0,66.0,730.0,173.0
1,ALB,img_00003.jpg,1.0,ALB,670.0,95.0,1008.0,219.0
2,ALB,img_00003.jpg,2.0,ALB,820.0,328.0,1123.0,485.0


In [50]:
for index, row in GTbbox_df.iterrows():
    row = row.tolist()
    image_file = os.path.join(row[0], row[1])
    fish = row[3]
    bbox = row[4:8]
    cropped = load_img(os.path.join('../data/fish/train-all',image_file),bbox,target_size=(ROWS,COLS))
    cropped.save(os.path.join(path, 'check', chk_folder, row[1]))

In [None]:
def load_img2(path, bbox, target_size=None):
    img = Image.open(path)
    imsize = Image.open(path).size
    height, width = bbox[2]-bbox[0], bbox[3]-bbox[1]
    length = max(height, width)
    for i in [0,1]:
        if bbox[0+i]+length > imsize[0+i]:
            bbox[0+i] = bbox[2+i] - length
        else:
            bbox[2+i] = bbox[0+i] + length
    img = img.convert('RGB')
    cropped = img.crop((bbox[0],bbox[1],bbox[2],bbox[3]))
    if target_size:
        cropped = cropped.resize((target_size[1], target_size[0]))
    return cropped

In [56]:
image_file = 'ALB/img_00003.jpg'
path = os.path.join('../data/fish/train-all',image_file)
Image.open(path).size[0]


1280