created on: Fri Jan 15 11:21:27 2020
<br>
Group 7
<br>
@author: E.G.

In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import random
import os
from PIL import Image
from shutil import copyfile

# deep learning
import keras
from keras import backend as K
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D, InputLayer, ReLU, AveragePooling2D
from keras.layers.core import Dense, Dropout, Activation, Flatten
from keras.optimizers import SGD, Adam
from keras.utils import np_utils

from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.applications.resnet50 import preprocess_input
from keras.callbacks import ReduceLROnPlateau
from keras.applications.vgg16 import VGG16

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
project_path = 'C:/Users/emma.grandgirard/Documents/B - Projet Interpromo/'
data_path = 'Data/data_Interpromo2020/Interpromo2020/All Data/ANALYSE IMAGE/IMG SEATGURU/'

# Images parameters
size = (224, 224)
greys = False

In [3]:
# Read annotated CSV
ind_int = pd.read_csv(project_path + 'CSV_annotate/SEATGURU/g7_SEATGURU_Int.csv', sep=';',
                      engine='python', index_col=None, encoding='utf-8')

ind_int = ind_int['Picture name'].values

## Create directories

In [4]:
imgs_list = os.listdir(project_path + data_path)
imgs_list = [img for img in imgs_list if 'Airbus' in img]
print(len(imgs_list))

1043


In [11]:
def create_dirs_seatguru(aircraft_types: list, new_path: str):
    
    """Creates one directory per aircraft type with all corresponding images"""

    for typ in aircraft_types:
        typ_imgs = [[project_path + data_path + img, img] for img in imgs_list if (typ in img and img in ind_int)]
        os.makedirs(new_path + typ, exist_ok=True)

        for img in typ_imgs:
            copyfile(img[0], new_path + typ + '/' + img[1])
        
        print(f'{typ}: {len(os.listdir(new_path + typ))} images')

In [12]:
airbus_planes = ['A320', 'A321', 'A330', 'A350']
crea_path = project_path + 'Data/Int/Airbus/'
create_dirs_seatguru(aircraft_types=airbus_planes, new_path=crea_path)

A320: 161 images
A321: 117 images
A330: 250 images
A350: 39 images


## Train-test split and read data with keras

In [13]:
def split_train_test_seatguru(new_paths: list, path: str, aircraft_types: list, split_limit: float=.7, s: int=8, 
                              ext: str='.jpg'):
    
    for typ in aircraft_types:
        os.makedirs(new_paths[0] + '/' + typ, exist_ok=True)
        os.makedirs(new_paths[1] + '/' + typ, exist_ok=True)
        
        picts = os.listdir(path + '/' + typ)
        picts = [pic for pic in picts if pic[-4:] == ext]
        random.seed(a=s)
        random.shuffle(picts)
        
        for pict in picts[:int(split_limit * len(picts))]:
            copyfile(path + typ + '/' + pict, new_paths[0] + '/' + typ + '/' + pict)
            
        for pict in picts[int(split_limit * len(picts)):]:
            copyfile(path + typ + '/' + pict, new_paths[1] + '/' + typ + '/' + pict)

In [14]:
new_paths = [crea_path + 'data_train', crea_path + 'data_test']

In [16]:
# Split train and test
split_train_test_seatguru(new_paths=new_paths, path=crea_path,
                          aircraft_types=airbus_planes)

In [17]:
# Image data generator
train_datagen = ImageDataGenerator(preprocessing_function=preprocess_input)
train_generator = train_datagen.flow_from_directory(new_paths[0],
                                                    target_size=size,
                                                    color_mode='rgb',
                                                    batch_size=32,
                                                    class_mode='categorical',
                                                    shuffle=True)

test_generator = train_datagen.flow_from_directory(new_paths[1],
                                                   target_size=size,
                                                   color_mode='rgb',
                                                   batch_size=32,
                                                   class_mode='categorical',
                                                   shuffle=True)

Found 395 images belonging to 4 classes.
Found 172 images belonging to 4 classes.


# Transfer learning

In [18]:
# Create the base pre-trained model
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(size[0], size[1], 1 if greys else 3))

# Add layers
x = base_model.output

x = Conv2D(256, kernel_size=(3, 3))(x)
x = keras.layers.BatchNormalization()(x)
x = ReLU()(x)

x = Conv2D(256, kernel_size=(3, 3))(x)
x = keras.layers.BatchNormalization()(x)
x = ReLU()(x)

x = MaxPooling2D(pool_size=(2, 2))(x)

x = Flatten()(x)

x = Dense(1024)(x)
x = keras.layers.BatchNormalization()(x)
x = ReLU()(x)

x = Dense(512)(x)
x = keras.layers.BatchNormalization()(x)
x = ReLU()(x)

# Last layer used to predict our classes
predictions = Dense(4, activation = 'softmax')(x)

# This is the model we will train
model = Model(inputs=base_model.input, outputs=predictions)

# Don't retrain pre-trained layers
for layer in base_model.layers:
    layer.trainable = False

# Compile the model (should be done *after* setting layers to non-trainable)
model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.001),
              loss='categorical_crossentropy', metrics=['accuracy'])

model.summary()

Instructions for updating:
Colocations handled automatically by placer.
Model: "model_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_____________________________________________________

In [19]:
# Reduce learning rate
reduce_lr = ReduceLROnPlateau(monitor='val_accuracy', 
                              patience=2, 
                              verbose=1, 
                              factor=0.5, 
                              min_lr=0.00001)

In [32]:
step_size_train = train_generator.n // train_generator.batch_size
model.fit_generator(generator=train_generator,
                    steps_per_epoch=step_size_train,
                    epochs=10,
                    validation_data=test_generator,
                    callbacks=[reduce_lr])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10

Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 9/10
Epoch 10/10

Epoch 00010: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.


<keras.callbacks.callbacks.History at 0x2d0bac9cb70>

In [None]:
%run g7_pickle_save_load.ipynb

In [33]:
save_model_classes(project_path + 'Models/', 'model_int_Airbus', train_generator, model)