In [1]:
from keras.preprocessing.image import ImageDataGenerator, Iterator
from keras.models import Sequential, Model
from keras.layers import Dropout, Flatten, Dense, Input
from keras import applications
from keras.layers import AveragePooling2D, GlobalAveragePooling2D
from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping, CSVLogger
from keras.applications.mobilenet import MobileNet
from keras.layers import Conv2D, Convolution2D, MaxPooling2D, ZeroPadding2D, BatchNormalization, Activation
from keras.optimizers import RMSprop
from keras import backend as K
import numpy as np
import pandas as pd
from keras import layers
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from keras.models import load_model

Using TensorFlow backend.


In [2]:
#parameters
classes = 12
batch_size= 6
train_total = 3820
validation_total = 930

In [3]:
model_mobilenet = MobileNet(include_top=False, weights = None,input_shape=(None,None,3), alpha=0.5)

In [4]:
x = model_mobilenet.output
x = Dropout(0.5)(x)
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='relu')(x)
x = Dropout(0.5)(x)
x = Dense(classes, activation='softmax')(x)

In [5]:
model = Model(inputs=model_mobilenet.input, outputs=x)

In [6]:
model.load_weights('mobilenet_from_scratch_image_1500_local_split.hdf5')

In [7]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, None, None, 3)     0         
_________________________________________________________________
conv1 (Conv2D)               (None, None, None, 16)    432       
_________________________________________________________________
conv1_bn (BatchNormalization (None, None, None, 16)    64        
_________________________________________________________________
conv1_relu (Activation)      (None, None, None, 16)    0         
_________________________________________________________________
conv_dw_1 (DepthwiseConv2D)  (None, None, None, 16)    144       
_________________________________________________________________
conv_dw_1_bn (BatchNormaliza (None, None, None, 16)    64        
_________________________________________________________________
conv_dw_1_relu (Activation)  (None, None, None, 16)    0         
__________

In [8]:
model.compile(loss='categorical_crossentropy',
             optimizer=RMSprop(lr=1e-6),
             metrics=['accuracy'])

In [9]:
train_datagen = ImageDataGenerator(rescale=1./255,
                                   shear_range = 0.2,
                                   rotation_range = 20,
                                   height_shift_range=0.2,
                                   width_shift_range=0.2,
                                   zoom_range=0.2,
                                   fill_mode='reflect',
                                   horizontal_flip=True,
                                   vertical_flip=True)

In [10]:
class_indices = {'Black-grass': 0,
 'Charlock': 1,
 'Cleavers': 2,
 'Common Chickweed': 3,
 'Common wheat': 4,
 'Fat Hen': 5,
 'Loose Silky-bent': 6,
 'Maize': 7,
 'Scentless Mayweed': 8,
 'Shepherds Purse': 9,
 'Small-flowered Cranesbill': 10,
 'Sugar beet': 11}

In [11]:
from keras.utils import to_categorical
import random
from keras.preprocessing.image import load_img, img_to_array
from random import shuffle

def vary_size_gen(files, preds, batch_size, imggen):
    this_data = list(zip(files, preds))
    while(True):
        shuffle(this_data)
        for start in range(0, len(this_data), batch_size):
            rand_num = random.randint(500,1500)
            end = min(start + batch_size, len(this_data))
            y_batch = [this_data[i][1] for i in range(start,end)]
            
            x_batch = [this_data[i][0] for i in range(start,end)]
            x_batch = [img_to_array(load_img(x, target_size=(rand_num,rand_num)), 'channels_last') for x in x_batch]
            x_batch = [x/255. for x in x_batch]
            x_batch = [imggen.random_transform(x) for x in x_batch]
            
            x_batch = np.array(x_batch, np.float32)
            y_batch = np.array(y_batch, np.float32)
            
            yield (x_batch, y_batch)

In [12]:
import re
import os
train_class =[]
train_names =[]
for i in os.listdir('train'):
    for j in os.scandir('train/' + i):
        train_class.append(re.sub(r'train\/(.*)\/.*',r'\1',j.path))
        train_names.append(j.path)

In [13]:
train_classes = [class_indices[i] for i in train_class]

In [14]:
valid_class =[]
valid_names =[]
for i in os.listdir('validation'):
    for j in os.scandir('validation/' + i):
        valid_class.append(re.sub(r'validation\/(.*)\/.*',r'\1',j.path))
        valid_names.append(j.path)

In [15]:
validation_classes = [class_indices[i] for i in valid_class]

In [16]:
def vary_size_gen2(files, preds, batch_size, imggen = None):
    this_data = list(zip(files, preds))
    sizes = list(range(500,1700,200))
    while(True):
        shuffle(sizes)
        for size in sizes:
            shuffle(this_data)
            for start in range(0, len(this_data), batch_size):
                end = min(start + batch_size, len(this_data))
                y_batch = [this_data[i][1] for i in range(start,end)]

                x_batch = [this_data[i][0] for i in range(start,end)]
                x_batch = [img_to_array(load_img(x, target_size=(size,size)), 'channels_last') for x in x_batch]
                x_batch = [x/255. for x in x_batch]
                if imggen is not None:
                    x_batch = [imggen.random_transform(x) for x in x_batch]

                x_batch = np.array(x_batch, np.float32)
                y_batch = np.array(y_batch, np.float32)

                yield (x_batch, y_batch)

In [17]:
model.evaluate_generator(vary_size_gen2(valid_names,
                                        to_categorical(validation_classes),
                                        16),
                         int(6*np.ceil(validation_total/16)))

[0.47224498745636917, 0.91792114695340499]

In [18]:
model.evaluate_generator(vary_size_gen2(valid_names,
                                        to_categorical(validation_classes),
                                        16),
                         int(6*np.ceil(validation_total/16)))

[0.47424255480176314, 0.91774193548387095]

In [None]:
model.evaluate_generator(vary_size_gen2(train_names,
                                        to_categorical(train_classes),
                                        16),
                         int(6*np.ceil(train_total/16)))

[0.35734229576716536, 0.93869982546952802]

In [None]:
model.evaluate_generator(vary_size_gen2(train_names,
                                        to_categorical(train_classes),
                                        16,
                                        train_datagen),
                         int(6*np.ceil(train_total/16)))

In [None]:
1+1

In [None]:
train_target_size_generator = vary_size_gen2(train_names,
                                             to_categorical(train_classes),
                                             batch_size, train_datagen)

In [None]:
validation_target_size_generator = vary_size_gen2(valid_names,
                                                       to_categorical(validation_classes),
                                                       16)

In [None]:
validation_target_size_generator = validation_size_gen(train_names,
                                                       to_categorical(train_classes),
                                                       16)

In [None]:
model.evaluate_generator(validation_target_size_generator, int(6*np.ceil(train_total/16)))

In [None]:
model.compile(loss='categorical_crossentropy',
             optimizer=RMSprop(lr=1e-4),
             metrics=['accuracy'])

In [None]:
callbacks = [ModelCheckpoint(filepath='mobilenet_image_1500_local_split_vary_size.hdf5', verbose=1, save_best_only=True, save_weights_only=True),
            ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=5, verbose=1),
            EarlyStopping(monitor='val_loss', patience=20, verbose=1),
            CSVLogger('./13-metrics_vary_size.csv')]

In [None]:
history = model.fit_generator(
                    train_target_size_generator,
                    steps_per_epoch = int(np.ceil(train_total/batch_size)),
                    epochs=100,
                    validation_data=validation_target_size_generator,
                    validation_steps= int(6*np.ceil(validation_total/16)),
                    verbose=2,
                    callbacks=callbacks)

In [None]:
model.load_weights('mobilenet_image_1500_local_split_vary_size.hdf5')

In [None]:
model.evaluate_generator(validation_target_size_generator, int(np.ceil(6*validation_total/16)))

In [None]:
test_datagen = ImageDataGenerator(rescale = 1./255)
test_generator = test_datagen.flow_from_directory(
                    'test',
                    shuffle=False,
                    target_size=(image_size,image_size),
                    batch_size=batch_size,
                    class_mode=None)

In [None]:
predictions_2 = model.predict_generator(test_generator,int(np.ceil(794/batch_size)))
class_ids = {validation_generator.class_indices[x]: x for x in validation_generator.class_indices}
predicted_classes_2 = [class_ids[x] for x in np.argmax(predictions_2, axis=1)]

In [None]:
m = [i for i,j in zip(predicted_classes,predicted_classes_2) if(i!=j)]
m

In [None]:
from os.path import basename
test_ids = [basename(x) for x in test_generator.filenames]
submission = pd.DataFrame({'file':test_ids,'species':predicted_classes_2})
submission.to_csv('submission_mobilenet_size_increase_1500_pseudo_label.csv', encoding="utf8", index=False)

In [None]:
from IPython.display import FileLink
FileLink('submission_mobilenet_size_increase_1500_pseudo_label.csv')

In [None]:
# 97.607