# Final step:
这是最后一步，使用训练好的classifier对localizer截取好的图片进行分类

In [17]:
# import os
import h5py
import numpy as np
np.random.seed(2017)
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K
K.set_image_dim_ordering('th')


In [2]:
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Flatten
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D, AveragePooling2D
from keras.optimizers import SGD, Adagrad
from keras.utils import np_utils
from keras.constraints import maxnorm
from sklearn.metrics import log_loss
from keras import __version__ as keras_version
from keras.callbacks import EarlyStopping

In [3]:
img_width, img_height = 100, 100

# 这里构建好分类器的结构，不训练，只是为了加载w

In [4]:
# build the VGG16 network
model = Sequential()
model.add(ZeroPadding2D((1, 1), input_shape=(3, img_width, img_height)))

model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_2'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))

model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_2'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))

model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_2'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_3'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))

model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_2'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_3'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))

model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_2'))
model.add(ZeroPadding2D((1, 1)))
model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_3'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))

In [5]:
top_model = Sequential()
top_model.add(Flatten(input_shape=model.output_shape[1:]))
top_model.add(Dense(96, activation='relu',init='he_uniform'))
top_model.add(Dropout(0.4))
top_model.add(Dense(24, activation='relu',init='he_uniform'))
top_model.add(Dropout(0.2))
top_model.add(Dense(8, activation='softmax'))

In [6]:
model.add(top_model)
model.load_weights('classification/local-fine-tune-model_da_forth10epoch.h5')

LOAD TEST DATA

In [7]:
def load_from_file(filename):
	import numpy as np
	return np.load( filename + '.npy')

In [8]:
train_target = load_from_file('classification/train_target_100')
train_data = load_from_file('classification/train_data_100')

In [13]:
train_data.shape

(3764, 3, 100, 100)

In [8]:
import os
import glob
import cv2
import datetime
import pandas as pd
import time
import warnings

In [9]:
def get_im_cv2(path):
    img = cv2.imread(path)
    resized = cv2.resize(img, (100, 100), cv2.INTER_LINEAR)
    return resized

In [10]:
def load_test(foldname):
    path = os.path.join('..', 'input', 'test_stg1', foldname,'*.jpg')
    files = sorted(glob.glob(path))
    X_test = []
    X_test_id = []
    for fl in files:
        flbase = os.path.basename(fl)
        img = get_im_cv2(fl)
        X_test.append(img)
        X_test_id.append(flbase)
    return X_test, X_test_id

In [11]:
def read_and_normalize_test_data(name):
    start_time = time.time()
    test_data, test_id = load_test(name)

    test_data = np.array(test_data, dtype=np.uint8)
    test_data = test_data.transpose((0, 3, 1, 2))

    test_data = test_data.astype('float32')
    test_data = test_data / 255

    print('Test shape:', test_data.shape)
    print(test_data.shape[0], 'test samples')
    print('Read and process test data time: {} seconds'.format(round(time.time() - start_time, 2)))
    return test_data, test_id

In [28]:
test_data, test_id = read_and_normalize_test_data('local7')

('Test shape:', (1000, 3, 100, 100))
(1000, 'test samples')
Read and process test data time: 10.1 seconds


In [29]:
test_data1, test_id1 = read_and_normalize_test_data('local1_1')

('Test shape:', (1000, 3, 100, 100))
(1000, 'test samples')
Read and process test data time: 9.28 seconds


In [32]:
print test_id[100]
print test_id1[100]

img_00730.jpg
img_00730.jpg


In [12]:
def merge_several_folds_mean(data, nfolds):
    a = np.array(data[0])
    for i in range(1, nfolds):
        a += np.array(data[i])
    a /= nfolds
    return a.tolist()


In [15]:
def run_cross_validation_process_test(model):
    batch_size = 24
    num_fold = 0
    yfull_test = []
    test_id = []
    fold_names = ['local1_1', 'local3', 'local7', 'local_v2_2']
    nfolds = len(fold_names)
    for i in range(nfolds):
        name = fold_names[i]
        num_fold += 1
        print('Start KFold number {} from {}'.format(num_fold, nfolds))
        test_data, test_id = read_and_normalize_test_data(name)
        test_prediction = model.predict(test_data, batch_size=batch_size, verbose=2)
        yfull_test.append(test_prediction)

    test_res = merge_several_folds_mean(yfull_test, nfolds)
    info_string = 'localizer4subtest_'+'loss_'+ '_folds_' + str(nfolds)
    create_submission(test_res, test_id, info_string)



In [13]:
def create_submission(predictions, test_id, info):
    result1 = pd.DataFrame(predictions, columns=['ALB', 'BET', 'DOL', 'LAG', 'NoF', 'OTHER', 'SHARK', 'YFT'])
    result1.loc[:, 'image'] = pd.Series(test_id, index=result1.index)
    now = datetime.datetime.now()
    sub_file = 'submission_' + info + '_' + str(now.strftime("%Y-%m-%d-%H-%M")) + '.csv'
    result1.to_csv(sub_file, index=False)


In [None]:
run_cross_validation_process_test(model)

Start KFold number 1 from 4
('Test shape:', (1000, 3, 100, 100))
(1000, 'test samples')
Read and process test data time: 9.84 seconds
Start KFold number 2 from 4
('Test shape:', (1000, 3, 100, 100))
(1000, 'test samples')
Read and process test data time: 10.08 seconds
Start KFold number 3 from 4
('Test shape:', (1000, 3, 100, 100))
(1000, 'test samples')
Read and process test data time: 9.71 seconds
('Test shape:', (1000, 3, 100, 100))
(1000, 'test samples')
Read and process test data time: 9.86 seconds
