import *

In [None]:
import cv2 as cv
import glob
import random
import numpy as np
from scipy.misc import imread
import os
from keras.utils import np_utils
from keras.models import Sequential
from keras.layers import Dense,Convolution2D,MaxPooling2D,Flatten,Activation
from keras.optimizers import Adam
from sklearn.cross_validation import train_test_split

In [None]:
species =['ALB', 'BET', 'DOL', 'LAG', 'NoF', 'OTHER', 'SHARK', 'YFT']
select = 1000

ROWS = 90  #720
COLS = 160 #1280
CHANNELS = 3
PATH = './input/'

def get_image(file):
    pos1 = file.rfind('/img_')
    return  file[pos1+1:]

def get_id(file):
    pos1 = file.rfind('_')
    pos2 = file.rfind('.')
    return  file[pos1+1:pos2]

def load_train_data(select):
    train_files = sorted(glob.glob(PATH+'/train/*/*.jpg'), key=lambda x: random.random())[:select]
    train = np.array([imread(img) for img in train_files])
    X_train = np.array([cv.resize(img,(ROWS,COLS))for img in train])
    y =np.array([species.index(os.path.dirname(img).replace(PATH+'/train/','')) for img in train_files])
    ids =np.array([get_id(img) for img in train_files])
    X_train = np.array(X_train,dtype=np.float32)/255
    return  X_train, y, ids

def load_test_data():
    test_files = sorted(glob.glob(PATH+'/test_stg1/*.jpg'))
    test = np.array([imread(img) for img in test_files])
    X_test = np.array([cv.resize(img,(ROWS,COLS))for img in test])
    X_test = np.array(X_test,dtype=np.float32)/255
    ids =np.array([get_image(img) for img in test_files])
    return X_test, ids

In [None]:
X, y, ids = load_train_data(select)

print (X.shape)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [None]:
from keras.models import load_model
from keras.layers import Dropout
model = Sequential()
model.add(Convolution2D(
    nb_filter=32,
    nb_row=5,
    nb_col=5,
    border_mode='same',
    input_shape=(3,ROWS,COLS)
))
model.add(Activation('relu'))
model.add(MaxPooling2D(
    pool_size=(2,2),
    strides=(2,2),
    border_mode='same',
))

model.add(Convolution2D(64,5,5,border_mode='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(
    pool_size=(2,2),
    strides=(2,2),
    border_mode='same',
))

model.add(Convolution2D(128,5,5,border_mode='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(
    pool_size=(2,2),
    strides=(2,2),
    border_mode='same',
))


model.add(Flatten())
model.add(Dense(128))
model.add(Dropout(0.5))
model.add(Activation('relu'))


model.add(Dense(8))
model.add(Dropout(0.5))
model.add(Activation('softmax'))

adam = Adam()
model.compile(optimizer=adam,loss='categorical_crossentropy',metrics=['accuracy'])

model.fit(X_train,y_train,nb_epoch=50,batch_size=32)
loss, accuracy = model.evaluate(X_test,y_test)

print ('\n test loss:',loss)
print ('\n test accuracy',accuracy)

model.save("my_mode.h5")

In [None]:
test, ids = load_test_data()
data = test.transpose((0,3,2,1))
predictions = model.predict(data, verbose=1)

In [None]:
import pandas as pd
import datetime

result1 = pd.DataFrame(predictions, columns=['ALB', 'BET', 'DOL', 'LAG', 'NoF', 'OTHER', 'SHARK', 'YFT'])
result1.loc[:, 'image'] = pd.Series(ids, index=result1.index)

now = datetime.datetime.now()
sub_file = 'submission_'+ str(now.strftime("%Y-%m-%d-%H-%M")) + '.csv'
result1.to_csv(sub_file, index=False)