# The Nature Conservancy Fisheries Monitoring Kaggle competition

The goal of this competition is to develop algorithms to automatically detect and classify species of tunas, sharks and more that fishing boats catch, which will accelerate the video review process. Faster review and more reliable data will enable countries to reallocate human capital to management and enforcement activities which will have a positive impact on conservation and our planet.

### Import libraries : 

In [25]:
import os,glob,cv2,datetime,time,warnings
import numpy as np
from sklearn.cross_validation import KFold
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D, AveragePooling2D
from keras.optimizers import SGD, Adagrad
from keras.callbacks import EarlyStopping
from keras.utils import np_utils
from sklearn.metrics import log_loss
from keras import __version__ as keras_version

### Import images : 
Get image and resize them to 48*48 pixels

In [26]:
#Get and resize image
def get_im_cv2(path, img_rows, img_cols):
    img = cv2.imread(path, 0)
    resized = cv2.resize(img, (img_cols, img_rows), interpolation = cv2.INTER_LINEAR)
    return resized

#Load train data 
def load_train():
    X_train = []
    X_train_id = []
    y_train = []
    start_time = time.time()
    
    print('Read train images')
    folders = ['ALB', 'BET', 'DOL', 'LAG', 'NoF', 'OTHER', 'SHARK', 'YFT']
    for fold in folders:
        index = folders.index(fold) #Get the folder index
        print('Load folder {} (Index : {})'.format(fold,index))
        path = os.path.join('.','data','train',fold,'*.jpg')
        files = glob.glob(path)
        for file in files:
            flbase = os.path.basename(file)
            img = get_im_cv2(file,32,32)
            X_train.append(img) #Append the image file in the train array
            X_train_id.append(file) #Append the image id in the X_train_id array
            y_train.append(index) #Add the index of folder 
    print('Read train data time {} seconds'.format(round(time.time() - start_time,2)))
    return X_train,y_train,X_train_id

#Load test data
def load_test():
    path = os.path.join('.','data','test_stg1','*.jpg')
    files = sorted(glob.glob(path))
    
    X_test = []
    X_test_id = []
    
    print('Read train images')
    for file in files:
        flbase = os.path.basename(file)
        img = get_im_cv2(file,32,32)
        X_test.append(img)
        X_train_id.append(flbase)
    return X_test,X_test_id

### Read and normalize data 

In [31]:
def read_and_normalize_train_data():
    train_data, train_target, train_id = load_train()

    print('Convert to numpy...')
    train_data = np.array(train_data, dtype=np.uint8)
    train_target = np.array(train_target, dtype=np.uint8)

    print('Reshape...')
    train_data = train_data.transpose()

    print('Convert to float...')
    train_data = train_data.astype('float32')
    train_data = train_data / 255
    train_target = np_utils.to_categorical(train_target, 8)

    print('Train shape:', train_data.shape)
    print(train_data.shape[0], 'train samples')
    return train_data, train_target, train_id

In [29]:
X_train,y_train,X_train_id = load_train()

Read train images
Load folder ALB (Index : 0)
Load folder BET (Index : 1)
Load folder DOL (Index : 2)
Load folder LAG (Index : 3)
Load folder NoF (Index : 4)
Load folder OTHER (Index : 5)
Load folder SHARK (Index : 6)
Load folder YFT (Index : 7)
Read train data time 39.89 seconds


In [32]:
t_data, y_train, t_id = read_and_normalize_train_data()

Read train images
Load folder ALB (Index : 0)
Load folder BET (Index : 1)
Load folder DOL (Index : 2)
Load folder LAG (Index : 3)
Load folder NoF (Index : 4)
Load folder OTHER (Index : 5)
Load folder SHARK (Index : 6)
Load folder YFT (Index : 7)
Read train data time 37.23 seconds
Convert to numpy...
Reshape...
Convert to float...
('Train shape:', (32, 32, 3777))
(32, 'train samples')


In [36]:
t_data[0]

array([[ 0.29019609,  0.29019609,  0.6156863 , ...,  0.65098041,
         0.27450982,  0.29019609],
       [ 0.15686275,  0.22352941,  0.65490198, ...,  0.65098041,
         0.21568628,  0.17254902],
       [ 0.47450981,  0.39607844,  0.96862745, ...,  0.61960787,
         0.36078432,  0.50196081],
       ..., 
       [ 0.43921569,  0.41568628,  1.        , ...,  0.27058825,
         0.25098041,  0.45490196],
       [ 0.41568628,  0.3882353 ,  0.98039216, ...,  0.36470589,
         0.30980393,  0.43137255],
       [ 0.35686275,  0.34901962,  1.        , ...,  0.30980393,
         0.23137255,  0.41176471]], dtype=float32)