### Xception_35_bagging

In [1]:
import os
import shutil
from os.path import isfile, isdir, join, pardir

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from keras.preprocessing import image
from keras.models import Model, load_model
from keras.preprocessing.image import ImageDataGenerator
from keras.layers import Input, Dense, Flatten, GlobalAveragePooling2D, Dropout
from keras import optimizers
from keras.utils.vis_utils import model_to_dot, plot_model
from keras.utils import to_categorical
from keras import regularizers
from keras.callbacks import EarlyStopping, ModelCheckpoint

from keras.applications import xception
from keras.applications.xception import Xception

import pydot
import cv2
import h5py

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


将训练集数据分为5份

In [2]:
driver_group = np.array([['p002', 'p012', 'p014', 'p015', 'p016'], 
                ['p021', 'p022', 'p024', 'p026', 'p035'],
                ['p039', 'p041', 'p042', 'p045', 'p047'],
                ['p049', 'p050', 'p051', 'p052', 'p056'],
                ['p061', 'p064', 'p066', 'p072', 'p075', 'p081']])

In [3]:
driver_dataset_folder_path = join(pardir, 'data', 'driver_dataset')
driver_imgs_list = pd.read_csv(join(pardir, 'data', 'driver_imgs_list.csv'))
image_valid_folder_path = join(pardir, 'data', 'driver_dataset', 'valid')

test_image_path = join(driver_dataset_folder_path, 'test')
xception_test_datagen = ImageDataGenerator(preprocessing_function=xception.preprocess_input)
xception_test_generator = xception_test_datagen.flow_from_directory(
    test_image_path,
    target_size=(299, 299),
    batch_size=32,
    shuffle=False,
    class_mode='categorical')

image_names = []
for root, dirs, file_name in os.walk(join(test_image_path, '0')):
    image_names.append(file_name)
image_names = np.array(image_names).transpose()

def rmrf_mkdir(dirname):
    if os.path.exists(dirname):
        shutil.rmtree(dirname)
    os.mkdir(dirname)

Found 79726 images belonging to 1 classes.


In [18]:
train1_dir = join(pardir, 'data', 'driver_dataset', 'train1')
valid1_dir = join(pardir, 'data', 'driver_dataset', 'valid1')

In [19]:
rmrf_mkdir(train1_dir)
rmrf_mkdir(valid1_dir)

train_driver_id1 = np.array([driver_group[0],
                           driver_group[1],
                           driver_group[2],
                           driver_group[3]]).flatten()
valid_driver_id1 = np.array(driver_group[4])

In [20]:
train_driver_id1

array(['p002', 'p012', 'p014', 'p015', 'p016', 'p021', 'p022', 'p024',
       'p026', 'p035', 'p039', 'p041', 'p042', 'p045', 'p047', 'p049',
       'p050', 'p051', 'p052', 'p056'], dtype='<U4')

In [21]:
for index, row in driver_imgs_list.iterrows():
    if row['subject'] in valid_driver_id1:
        for i in range(10):
            image_path = join(driver_dataset_folder_path, 'train', 'c%s'%i)
            if os.path.exists(join(image_path, row['img'])):
                if not os.path.exists(join(valid1_dir, 'c%s'%i)):
                    os.makedirs(join(valid1_dir, 'c%s'%i))
                shutil.copy(join(image_path, row['img']), join(valid1_dir, 'c%s'%i,row['img']))

In [22]:
for index, row in driver_imgs_list.iterrows():
    if row['subject'] in train_driver_id1:
        for i in range(10):
            image_path = join(driver_dataset_folder_path, 'train', 'c%s'%i)
            if os.path.exists(join(image_path, row['img'])):
                if not os.path.exists(join(train1_dir, 'c%s'%i)):
                    os.makedirs(join(train1_dir, 'c%s'%i))
                shutil.copy(join(image_path, row['img']), join(train1_dir, 'c%s'%i,row['img']))

In [4]:
xception_train_datagen = ImageDataGenerator(
    rotation_range=10.,
    width_shift_range=0.05,
    height_shift_range=0.05,
    preprocessing_function=xception.preprocess_input)

xception_valid_datagen = ImageDataGenerator(
    preprocessing_function=xception.preprocess_input)

In [23]:
xception_train_generator1 = xception_train_datagen.flow_from_directory(
    train1_dir,
    target_size=(299, 299),
    batch_size=16,
    class_mode='categorical')

xception_valid_generator1 = xception_valid_datagen.flow_from_directory(
    valid1_dir,
    target_size=(299, 299),
    batch_size=16,
    class_mode='categorical',
    shuffle=False)

Found 17778 images belonging to 10 classes.
Found 4646 images belonging to 10 classes.


In [24]:
model_xception = Xception(include_top=False, weights='imagenet')

for i, layer in enumerate(model_xception.layers):
    print (i, layer.name)
    
for i in range(35):
    model_xception.layers[i].trainable = False
        
input = Input(shape=(299, 299, 3),name = 'image_input')
 
output_xception_conv = model_xception(input)
   
x = GlobalAveragePooling2D()(output_xception_conv)
x = Dropout(0.5)(x)
x = Dense(10, activation='softmax')(x)

adam = optimizers.Adam(lr=1e-4)

0 input_2
1 block1_conv1
2 block1_conv1_bn
3 block1_conv1_act
4 block1_conv2
5 block1_conv2_bn
6 block1_conv2_act
7 block2_sepconv1
8 block2_sepconv1_bn
9 block2_sepconv2_act
10 block2_sepconv2
11 block2_sepconv2_bn
12 conv2d_5
13 block2_pool
14 batch_normalization_5
15 add_13
16 block3_sepconv1_act
17 block3_sepconv1
18 block3_sepconv1_bn
19 block3_sepconv2_act
20 block3_sepconv2
21 block3_sepconv2_bn
22 conv2d_6
23 block3_pool
24 batch_normalization_6
25 add_14
26 block4_sepconv1_act
27 block4_sepconv1
28 block4_sepconv1_bn
29 block4_sepconv2_act
30 block4_sepconv2
31 block4_sepconv2_bn
32 conv2d_7
33 block4_pool
34 batch_normalization_7
35 add_15
36 block5_sepconv1_act
37 block5_sepconv1
38 block5_sepconv1_bn
39 block5_sepconv2_act
40 block5_sepconv2
41 block5_sepconv2_bn
42 block5_sepconv3_act
43 block5_sepconv3
44 block5_sepconv3_bn
45 add_16
46 block6_sepconv1_act
47 block6_sepconv1
48 block6_sepconv1_bn
49 block6_sepconv2_act
50 block6_sepconv2
51 block6_sepconv2_bn
52 block6_se

In [26]:
model_1 = Model(inputs=input, outputs=x) 

model_1.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [27]:
history_1 = model_1.fit_generator(
    xception_train_generator1,
    epochs=2,
    validation_data=xception_valid_generator1)

Epoch 1/2
Epoch 2/2


In [28]:
model_1.save(join(pardir, 'model', 'xception_1.h5'))
print("xception model saved.")

xception model saved.


In [29]:
model_xception_1 = load_model(join(pardir, 'model', 'xception_1.h5'))
print("Model loaded.")

Model loaded.


In [30]:
pred_xception_1 = model_xception_1.predict_generator(xception_test_generator, verbose=1)
print(pred_xception_1.shape)

(79726, 10)


In [31]:
result_xception_1 = np.append(image_names, pred_xception_1, axis = 1)
predict_result_xception_1 = pd.DataFrame(result_xception_1, 
            columns=['img', 'c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9'])
predict_result_xception_1.to_csv('predict_result_xception_1.csv', index=False)

In [63]:
train2_dir = join(pardir, 'data', 'driver_dataset', 'train2')
valid2_dir = join(pardir, 'data', 'driver_dataset', 'valid2')

rmrf_mkdir(train2_dir)
rmrf_mkdir(valid2_dir)

train_driver_id2 = np.concatenate((driver_group[0],
                                   driver_group[2],
                                   driver_group[3]))
train_driver_id2 = np.concatenate((train_driver_id2,
                                   driver_group[4]))
valid_driver_id2 = np.array(driver_group[1])

In [64]:
train_driver_id2

array(['p002', 'p012', 'p014', 'p015', 'p016', 'p039', 'p041', 'p042',
       'p045', 'p047', 'p049', 'p050', 'p051', 'p052', 'p056', 'p061',
       'p064', 'p066', 'p072', 'p075', 'p081'], dtype='<U4')

In [65]:
for index, row in driver_imgs_list.iterrows():
    if row['subject'] in valid_driver_id2:
        for i in range(10):
            image_path = join(driver_dataset_folder_path, 'train', 'c%s'%i)
            if os.path.exists(join(image_path, row['img'])):
                if not os.path.exists(join(valid2_dir, 'c%s'%i)):
                    os.makedirs(join(valid2_dir, 'c%s'%i))
                shutil.copy(join(image_path, row['img']), join(valid2_dir, 'c%s'%i,row['img']))

In [66]:
for index, row in driver_imgs_list.iterrows():
    if row['subject'] in train_driver_id2:
        for i in range(10):
            image_path = join(driver_dataset_folder_path, 'train', 'c%s'%i)
            if os.path.exists(join(image_path, row['img'])):
                if not os.path.exists(join(train2_dir, 'c%s'%i)):
                    os.makedirs(join(train2_dir, 'c%s'%i))
                shutil.copy(join(image_path, row['img']), join(train2_dir, 'c%s'%i,row['img']))

In [68]:
xception_train_generator2 = xception_train_datagen.flow_from_directory(
    train2_dir,
    target_size=(299, 299),
    batch_size=16,
    class_mode='categorical')

xception_valid_generator2 = xception_valid_datagen.flow_from_directory(
    valid2_dir,
    target_size=(299, 299),
    batch_size=16,
    class_mode='categorical',
    shuffle=False)

Found 16684 images belonging to 10 classes.
Found 5740 images belonging to 10 classes.


In [70]:
model_2 = Model(inputs=input, outputs=x) 

model_2.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [71]:
history_2 = model_2.fit_generator(
    xception_train_generator2,
    epochs=2,
    validation_data=xception_valid_generator2)

Epoch 1/2
Epoch 2/2


In [72]:
model_2.save(join(pardir, 'model', 'xception_2.h5'))
print("xception model saved.")

xception model saved.


In [73]:
model_xception_2 = load_model(join(pardir, 'model', 'xception_2.h5'))
print("Model loaded.")

Model loaded.


In [76]:
pred_xception_2 = model_xception_2.predict_generator(xception_test_generator, verbose=1)
print(pred_xception_2.shape)

(79726, 10)


In [77]:
result_xception_2 = np.append(image_names, pred_xception_2, axis = 1)
predict_result_xception_2 = pd.DataFrame(result_xception_2, 
            columns=['img', 'c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9'])
predict_result_xception_2.to_csv('predict_result_xception_2.csv', index=False)

In [7]:
train3_dir = join(pardir, 'data', 'driver_dataset', 'train3')
valid3_dir = join(pardir, 'data', 'driver_dataset', 'valid3')

rmrf_mkdir(train3_dir)
rmrf_mkdir(valid3_dir)

train_driver_id3 = np.concatenate((driver_group[0],
                                   driver_group[1],
                                   driver_group[3]))
train_driver_id3 = np.concatenate((train_driver_id3,
                                   driver_group[4]))
valid_driver_id3 = np.array(driver_group[2])

In [8]:
for index, row in driver_imgs_list.iterrows():
    if row['subject'] in valid_driver_id3:
        for i in range(10):
            image_path = join(driver_dataset_folder_path, 'train', 'c%s'%i)
            if os.path.exists(join(image_path, row['img'])):
                if not os.path.exists(join(valid3_dir, 'c%s'%i)):
                    os.makedirs(join(valid3_dir, 'c%s'%i))
                shutil.copy(join(image_path, row['img']), join(valid3_dir, 'c%s'%i,row['img']))
                #os.symlink(join(image_path, row['img']), join(valid1_dir, 'c%s'%i,row['img']))

In [9]:
for index, row in driver_imgs_list.iterrows():
    if row['subject'] in train_driver_id3:
        for i in range(10):
            image_path = join(driver_dataset_folder_path, 'train', 'c%s'%i)
            if os.path.exists(join(image_path, row['img'])):
                if not os.path.exists(join(train3_dir, 'c%s'%i)):
                    os.makedirs(join(train3_dir, 'c%s'%i))
                shutil.copy(join(image_path, row['img']), join(train3_dir, 'c%s'%i,row['img']))
                #os.symlink(join(image_path, row['img']), join(train1_dir, 'c%s'%i,row['img']))

In [10]:
xception_train_generator3 = xception_train_datagen.flow_from_directory(
    train3_dir,
    target_size=(299, 299),
    batch_size=16,
    class_mode='categorical')

xception_valid_generator3 = xception_valid_datagen.flow_from_directory(
    valid3_dir,
    target_size=(299, 299),
    batch_size=16,
    class_mode='categorical',
    shuffle=False)

Found 19018 images belonging to 10 classes.
Found 3406 images belonging to 10 classes.


In [13]:
model_3 = Model(inputs=input, outputs=x) 

model_3.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [14]:
history_3 = model_3.fit_generator(
    xception_train_generator3,
    epochs=2,
    validation_data=xception_valid_generator3)

Epoch 1/2
Epoch 2/2


In [15]:
model_3.save(join(pardir, 'model', 'xception_3.h5'))
print("xception model saved.")

xception model saved.


In [16]:
model_xception_3 = load_model(join(pardir, 'model', 'xception_3.h5'))
print("Model loaded.")

Model loaded.


In [17]:
pred_xception_3 = model_xception_3.predict_generator(xception_test_generator, verbose=1)
print(pred_xception_3.shape)

(79726, 10)


In [18]:
result_xception_3 = np.append(image_names, pred_xception_3, axis = 1)
predict_result_xception_3 = pd.DataFrame(result_xception_3, 
            columns=['img', 'c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9'])
predict_result_xception_3.to_csv('predict_result_xception_3.csv', index=False)

In [19]:
train4_dir = join(pardir, 'data', 'driver_dataset', 'train4')
valid4_dir = join(pardir, 'data', 'driver_dataset', 'valid4')

rmrf_mkdir(train4_dir)
rmrf_mkdir(valid4_dir)

train_driver_id4 = np.concatenate((driver_group[0],
                                   driver_group[1],
                                   driver_group[2]))
train_driver_id4 = np.concatenate((train_driver_id4,
                                   driver_group[4]))
valid_driver_id4 = np.array(driver_group[3])

In [21]:
for index, row in driver_imgs_list.iterrows():
    if row['subject'] in valid_driver_id4:
        for i in range(10):
            image_path = join(driver_dataset_folder_path, 'train', 'c%s'%i)
            if os.path.exists(join(image_path, row['img'])):
                if not os.path.exists(join(valid4_dir, 'c%s'%i)):
                    os.makedirs(join(valid4_dir, 'c%s'%i))
                shutil.copy(join(image_path, row['img']), join(valid4_dir, 'c%s'%i,row['img']))
                #os.symlink(join(image_path, row['img']), join(valid1_dir, 'c%s'%i,row['img']))

In [22]:
for index, row in driver_imgs_list.iterrows():
    if row['subject'] in train_driver_id4:
        for i in range(10):
            image_path = join(driver_dataset_folder_path, 'train', 'c%s'%i)
            if os.path.exists(join(image_path, row['img'])):
                if not os.path.exists(join(train4_dir, 'c%s'%i)):
                    os.makedirs(join(train4_dir, 'c%s'%i))
                shutil.copy(join(image_path, row['img']), join(train4_dir, 'c%s'%i,row['img']))
                #os.symlink(join(image_path, row['img']), join(train1_dir, 'c%s'%i,row['img']))

In [23]:
xception_train_generator4 = xception_train_datagen.flow_from_directory(
    train4_dir,
    target_size=(299, 299),
    batch_size=16,
    class_mode='categorical')

xception_valid_generator4 = xception_valid_datagen.flow_from_directory(
    valid4_dir,
    target_size=(299, 299),
    batch_size=16,
    class_mode='categorical',
    shuffle=False)

Found 18169 images belonging to 10 classes.
Found 4255 images belonging to 10 classes.


In [24]:
model_4 = Model(inputs=input, outputs=x) 

model_4.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [25]:
history_4 = model_4.fit_generator(
    xception_train_generator4,
    epochs=2,
    validation_data=xception_valid_generator4)

Epoch 1/2
Epoch 2/2


In [26]:
model_4.save(join(pardir, 'model', 'xception_4.h5'))
print("xception model saved.")

xception model saved.


In [27]:
model_xception_4 = load_model(join(pardir, 'model', 'xception_4.h5'))
print("Model loaded.")

Model loaded.


In [28]:
pred_xception_4 = model_xception_4.predict_generator(xception_test_generator, verbose=1)
print(pred_xception_4.shape)

(79726, 10)


In [29]:
result_xception_4 = np.append(image_names, pred_xception_4, axis = 1)
predict_result_xception_4 = pd.DataFrame(result_xception_4, 
            columns=['img', 'c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9'])
predict_result_xception_4.to_csv('predict_result_xception_4.csv', index=False)

In [8]:
train5_dir = join(pardir, 'data', 'driver_dataset', 'train5')
valid5_dir = join(pardir, 'data', 'driver_dataset', 'valid5')

In [7]:
rmrf_mkdir(train5_dir)
rmrf_mkdir(valid5_dir)

train_driver_id5 = np.concatenate((driver_group[1],
                                   driver_group[2],
                                   driver_group[3]))
train_driver_id5 = np.concatenate((train_driver_id5,
                                   driver_group[4]))
valid_driver_id5 = np.array(driver_group[0])

In [8]:
for index, row in driver_imgs_list.iterrows():
    if row['subject'] in valid_driver_id5:
        for i in range(10):
            image_path = join(driver_dataset_folder_path, 'train', 'c%s'%i)
            if os.path.exists(join(image_path, row['img'])):
                if not os.path.exists(join(valid5_dir, 'c%s'%i)):
                    os.makedirs(join(valid5_dir, 'c%s'%i))
                shutil.copy(join(image_path, row['img']), join(valid5_dir, 'c%s'%i,row['img']))
                #os.symlink(join(image_path, row['img']), join(valid1_dir, 'c%s'%i,row['img']))

In [9]:
for index, row in driver_imgs_list.iterrows():
    if row['subject'] in train_driver_id5:
        for i in range(10):
            image_path = join(driver_dataset_folder_path, 'train', 'c%s'%i)
            if os.path.exists(join(image_path, row['img'])):
                if not os.path.exists(join(train5_dir, 'c%s'%i)):
                    os.makedirs(join(train5_dir, 'c%s'%i))
                shutil.copy(join(image_path, row['img']), join(train5_dir, 'c%s'%i,row['img']))
                #os.symlink(join(image_path, row['img']), join(train1_dir, 'c%s'%i,row['img']))

In [9]:
xception_train_generator5 = xception_train_datagen.flow_from_directory(
    train5_dir,
    target_size=(299, 299),
    batch_size=16,
    class_mode='categorical')

xception_valid_generator5 = xception_valid_datagen.flow_from_directory(
    valid5_dir,
    target_size=(299, 299),
    batch_size=16,
    class_mode='categorical',
    shuffle=False)

Found 18047 images belonging to 10 classes.
Found 4377 images belonging to 10 classes.


In [10]:
model_5 = Model(inputs=input, outputs=x) 

model_5.compile(optimizer=adam, loss='categorical_crossentropy', metrics=['categorical_accuracy'])

In [12]:
history_5 = model_5.fit_generator(
    xception_train_generator5,
    epochs=2,,
    validation_data=xception_valid_generator5)

Epoch 1/2
Epoch 2/2


In [13]:
model_5.save(join(pardir, 'model', 'xception_5.h5'))
print("xception model saved.")

xception model saved.


In [14]:
model_xception_5 = load_model(join(pardir, 'model', 'xception_5.h5'))
print("Model loaded.")

Model loaded.


In [15]:
pred_xception_5 = model_xception_5.predict_generator(xception_test_generator, verbose=1)
print(pred_xception_5.shape)

(79726, 10)


In [16]:
result_xception_5 = np.append(image_names, pred_xception_5, axis = 1)
predict_result_xception_5 = pd.DataFrame(result_xception_5, 
            columns=['img', 'c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9'])
predict_result_xception_5.to_csv('predict_result_xception_5.csv', index=False)

#### 融合5个模型的结果

In [32]:
img_name = pd.read_csv('sample_submission.csv')['img']
category = ['c0', 'c1', 'c2', 'c3', 'c4', 'c5', 'c6', 'c7', 'c8', 'c9']

In [39]:
result1 = pd.read_csv('predict_result_xception_1.csv')
result2 = pd.read_csv('predict_result_xception_2.csv')
result3 = pd.read_csv('predict_result_xception_3.csv')
result4 = pd.read_csv('predict_result_xception_4.csv')
result5 = pd.read_csv('predict_result_xception_5.csv')

average = (result1[category] + result2[category] + result3[category] + result4[category] + result5[category])/5

average.insert(0, 'img', pd.Series(img_name))

average.to_csv('Xception_35_bagging.csv', index=False)

#### 融合3个模型的结果

In [37]:
result2 = pd.read_csv('predict_result_xception_2.csv')
result3 = pd.read_csv('predict_result_xception_3.csv')
result4 = pd.read_csv('predict_result_xception_4.csv')

average = (result2[category] + result3[category] + result4[category])/3

average.insert(0, 'img', pd.Series(img_name))

average.to_csv('Xception_35_bagging_3.csv', index=False)