In [0]:
import numpy as np
import pandas as pd

import zipfile
import os

import tensorflow as tf
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import LearningRateScheduler, ModelCheckpoint, EarlyStopping
from tensorflow.keras.callbacks import Callback
from tensorflow.keras.regularizers import l2
from tensorflow.keras import optimizers
from tensorflow.keras.models import Model
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.layers import *
import tensorflow.keras.models as M
import tensorflow.keras.layers as L

import efficientnet.tfkeras as efn

In [0]:
RANDOM_SEED = 33

tf.test.gpu_device_name()

'/device:GPU:0'

In [0]:
!nvidia-smi

Mon May  4 15:01:57 2020       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 440.64.00    Driver Version: 418.67       CUDA Version: 10.1     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|   0  Tesla P100-PCIE...  Off  | 00000000:00:04.0 Off |                    0 |
| N/A   39C    P0    30W / 250W |    353MiB / 16280MiB |      0%      Default |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Processes:                                                       GPU Memory |
|  GPU       PID   Type   Process name                             Usage      |
+-------

In [0]:
train_df = pd.read_csv('train.csv')
sample_submission = pd.read_csv('sample-submission.csv')
train_df.head(1)

Unnamed: 0,Id,Category
0,100155.jpg,0


In [0]:
train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    rotation_range = 5,
    width_shift_range=0.1,
    height_shift_range=0.1,
    validation_split=0.15, # set validation split
    horizontal_flip=False)

test_datagen = ImageDataGenerator(rescale=1. / 255)

train_generator = train_datagen.flow_from_directory(
    'train/',
    target_size=(200, 200),
    batch_size=20,
    class_mode='categorical',
    shuffle=True, seed=RANDOM_SEED,
    subset='training') # set as training data

test_generator = train_datagen.flow_from_directory(
    'train/',
    target_size=(200, 200),
    batch_size=20,
    class_mode='categorical',
    shuffle=True, seed=RANDOM_SEED,
    subset='validation') # set as validation data

test_sub_generator = test_datagen.flow_from_dataframe(
    dataframe=sample_submission,
    directory= 'test_upload/',
    x_col="Id",
    y_col=None,
    shuffle=False,
    class_mode=None,
    seed=RANDOM_SEED,
    target_size=(200, 200),
    batch_size=20,)

Found 13232 images belonging to 10 classes.
Found 2329 images belonging to 10 classes.
Found 6675 validated image filenames.


In [0]:
base_model = efn.EfficientNetB6(weights='imagenet', include_top=False, input_shape=(200, 200, 3))
#base_model = Xception(weights='imagenet', include_top=False, input_shape = (200, 200, 3))

Downloading data from https://github.com/Callidior/keras-applications/releases/download/efficientnet/efficientnet-b6_weights_tf_dim_ordering_tf_kernels_autoaugment_notop.h5


In [0]:
base_model.trainable = False

In [0]:
"""
model=M.Sequential()
model.add(base_model)
model.add(L.GlobalAveragePooling2D(),)
model.add(L.Dense(256, activation='elu'))
model.add(L.BatchNormalization())
model.add(L.Dropout(0.25))
model.add(L.Dense(128, activation='elu'))
model.add(L.BatchNormalization())
model.add(L.Dropout(0.25))
model.add(L.Dense(10, activation='softmax'))
"""



x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dense(256, activation='elu')(x)
x = BatchNormalization()(x)
x = Dropout(0.25)(x)
x = Dense(128, activation='elu')(x)
x = BatchNormalization()(x)
x = Dropout(0.25)(x)
predictions = Dense(10, activation='softmax')(x)


model = Model(inputs=base_model.input, outputs=predictions)

In [0]:
tf.keras.backend.clear_session()
LR=0.001
model.compile(loss="categorical_crossentropy", optimizer=optimizers.Adam(lr=LR), metrics=["accuracy"])

In [0]:
checkpoint = ModelCheckpoint('best_model.hdf5' , 
                             monitor = ['val_accuracy'] , 
                             verbose = 1, 
                             mode = 'max')
callbacks_list = [checkpoint]


history = model.fit(
        train_generator,
        steps_per_epoch = train_generator.samples//train_generator.batch_size,
        validation_data = test_generator, 
        epochs = 5,
        validation_steps = test_generator.samples//test_generator.batch_size,
        callbacks = callbacks_list
)

Epoch 1/5
Epoch 00001: saving model to best_model.hdf5
Epoch 2/5
Epoch 00002: saving model to best_model.hdf5
Epoch 3/5
Epoch 00003: saving model to best_model.hdf5
Epoch 4/5
Epoch 00004: saving model to best_model.hdf5
Epoch 5/5
Epoch 00005: saving model to best_model.hdf5


In [0]:
scores = model.evaluate(test_generator, steps=len(test_generator), verbose=1)
print("Accuracy: %.2f%%" % (scores[1]*100))

Accuracy: 67.50%


In [0]:
from google.colab import drive
drive.mount('/content/drive')

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive


In [0]:
model.save('/content/drive/My Drive/MODEL_DL/model_1_SF_car_pred.hdf5')

In [0]:
len(base_model.layers)

659

In [0]:
# 2 2 2 2 2 2 2 2 2 2 2 2 2 -------------------------

base_model.trainable = True

# Fine-tune from this layer onwards
fine_tune_at = len(base_model.layers)//2

# Freeze all the layers before the `fine_tune_at` layer
for layer in base_model.layers[:fine_tune_at]:
  layer.trainable =  False

In [0]:
len(base_model.trainable_variables)

291

In [0]:
LR=0.0001
model.compile(loss="categorical_crossentropy", optimizer=optimizers.Adam(lr=LR), metrics=["accuracy"])

In [0]:
history = model.fit(
        train_generator,
        steps_per_epoch = train_generator.samples//train_generator.batch_size,
        validation_data = test_generator, 
        validation_steps = test_generator.samples//test_generator.batch_size,
        epochs = 7,
        callbacks = callbacks_list
)

Epoch 1/7
Epoch 00001: saving model to best_model.hdf5
Epoch 2/7
Epoch 00002: saving model to best_model.hdf5
Epoch 3/7
Epoch 00003: saving model to best_model.hdf5
Epoch 4/7
Epoch 00004: saving model to best_model.hdf5
Epoch 5/7
Epoch 00005: saving model to best_model.hdf5
Epoch 6/7
Epoch 00006: saving model to best_model.hdf5
Epoch 7/7
Epoch 00007: saving model to best_model.hdf5


In [0]:
model.save('/content/drive/My Drive/MODEL_DL/model_2_SF_car_pred.hdf5')

In [0]:
# 3333333333 -------------------
base_model.trainable = True

In [0]:
LR=0.00001
model.compile(loss="categorical_crossentropy", optimizer=optimizers.Adam(lr=LR), metrics=["accuracy"])

In [0]:
# Обучаем
history = model.fit(
        train_generator,
        steps_per_epoch = train_generator.samples//train_generator.batch_size,
        validation_data = test_generator, 
        validation_steps = test_generator.samples//test_generator.batch_size,
        epochs = 9,
        callbacks = callbacks_list
)

Epoch 1/9
Epoch 00001: saving model to best_model.hdf5
Epoch 2/9
Epoch 00002: saving model to best_model.hdf5
Epoch 3/9
Epoch 00003: saving model to best_model.hdf5
Epoch 4/9
Epoch 00004: saving model to best_model.hdf5
Epoch 5/9
Epoch 00005: saving model to best_model.hdf5
Epoch 6/9
Epoch 00006: saving model to best_model.hdf5
Epoch 7/9
Epoch 00007: saving model to best_model.hdf5
Epoch 8/9
Epoch 00008: saving model to best_model.hdf5
Epoch 9/9
Epoch 00009: saving model to best_model.hdf5


In [0]:
model.save('/content/drive/My Drive/MODEL_DL/model_3_SF_car_pred.hdf5')

In [0]:
scores = model.evaluate(test_generator, steps=len(test_generator), verbose=1)
print("Accuracy: %.2f%%" % (scores[1]*100))

Accuracy: 95.36%


In [0]:
train_datagen = ImageDataGenerator(rescale=1. / 255, 
                                    #rotation_range = 90,
                                    #shear_range=0.2,
                                    zoom_range=[0.75,1.25],
                                   validation_split=0.15,
                                    #brightness_range=[0.5, 1.5],
                                    #width_shift_range=0.1,
                                    #height_shift_range=0.1,
                                    horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1. / 255)

train_generator = train_datagen.flow_from_directory(
    'train/',
    target_size=(400, 400),
    batch_size=6,
    class_mode='categorical',
    shuffle=True, seed=RANDOM_SEED,
    subset='training') # set as training data

test_generator = train_datagen.flow_from_directory(
    'train/',
    target_size=(400, 400),
    batch_size=6,
    class_mode='categorical',
    shuffle=True, seed=RANDOM_SEED,
    subset='validation') # set as validation data

test_sub_generator = test_datagen.flow_from_dataframe(
    dataframe=sample_submission,
    directory= 'test_upload/',
    x_col="Id",
    y_col=None,
    shuffle=False,
    class_mode=None,
    seed=RANDOM_SEED,
    target_size=(400, 400),
    batch_size=6,)

Found 13232 images belonging to 10 classes.
Found 2329 images belonging to 10 classes.
Found 6675 validated image filenames.


In [0]:
base_model = efn.EfficientNetB6(weights='imagenet', include_top=False, input_shape=(400, 400, 3))

LR=0.00001
model.compile(loss="categorical_crossentropy", optimizer=optimizers.Adam(lr=LR), metrics=["accuracy"])
#model.load_weights('best_model.hdf5')

In [0]:
history = model.fit(
        train_generator,
        steps_per_epoch = train_generator.samples//train_generator.batch_size,
        validation_data = test_generator, 
        validation_steps = test_generator.samples//test_generator.batch_size,
        epochs = 9,
        callbacks = callbacks_list
)

Epoch 1/10
Epoch 00001: saving model to best_model.hdf5
Epoch 2/10
Epoch 00002: saving model to best_model.hdf5
Epoch 3/10
Epoch 00003: saving model to best_model.hdf5
Epoch 4/10
Epoch 00004: saving model to best_model.hdf5
Epoch 5/10
Epoch 00005: saving model to best_model.hdf5
Epoch 6/10
Epoch 00006: saving model to best_model.hdf5
Epoch 7/10
Epoch 00007: saving model to best_model.hdf5
Epoch 8/10
Epoch 00008: saving model to best_model.hdf5
Epoch 9/10
Epoch 00009: saving model to best_model.hdf5
Epoch 10/10
  76/2205 [>.............................] - ETA: 20:18 - loss: 0.1580 - accuracy: 0.9403

KeyboardInterrupt: ignored

In [0]:
model.save('/content/drive/My Drive/MODEL_DL/model_4_SF_car_pred.hdf5')

In [0]:
scores = model.evaluate(test_generator, steps=len(test_generator), verbose=1)
print("Accuracy: %.2f%%" % (scores[1]*100))

Accuracy: 96.82%


In [0]:
train_datagen = ImageDataGenerator(rescale=1. / 255, 
                                    #rotation_range = 90,
                                    #shear_range=0.2,
                                    zoom_range=[0.75,1.25],
                                    #brightness_range=[0.5, 1.5],
                                    #width_shift_range=0.1,
                                    #height_shift_range=0.1,
                                    horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1. / 255)

train_generator = train_datagen.flow_from_directory(
    'train/',
    target_size=(400, 400),
    batch_size=6,
    class_mode='categorical',
    shuffle=True, seed=RANDOM_SEED,
    subset='training') # set as training data


test_sub_generator = test_datagen.flow_from_dataframe(
    dataframe=sample_submission,
    directory= 'test_upload/',
    x_col="Id",
    y_col=None,
    shuffle=False,
    class_mode=None,
    seed=RANDOM_SEED,
    target_size=(400, 400),
    batch_size=6,)

Found 15561 images belonging to 10 classes.
Found 6675 validated image filenames.


In [0]:
base_model = efn.EfficientNetB6(weights='imagenet', include_top=False, input_shape=(400, 400, 3))

LR=0.00001
model.compile(loss="categorical_crossentropy", optimizer=optimizers.Adam(lr=LR), metrics=["accuracy"])
model.load_weights('best_model.hdf5')

In [0]:
checkpoint = ModelCheckpoint('best_model.hdf5' , 
                             monitor = ['accuracy'] , 
                             verbose = 1, 
                             mode = 'max')
callbacks_list = [checkpoint]

history = model.fit(
        train_generator,
        steps_per_epoch = train_generator.samples//train_generator.batch_size,
        epochs = 3,
        callbacks = callbacks_list
)

Epoch 1/3
Epoch 00001: saving model to best_model.hdf5
Epoch 2/3
Epoch 00002: saving model to best_model.hdf5
Epoch 3/3
  21/2593 [..............................] - ETA: 24:03 - loss: 0.1183 - accuracy: 0.9603

KeyboardInterrupt: ignored

In [0]:
model.save('/content/drive/My Drive/MODEL_DL/model_5_SF_car_pred.hdf5')

In [0]:
test_sub_generator.samples

6675

In [0]:
test_sub_generator.reset()
predictions = model.predict_generator(test_sub_generator, steps=len(test_sub_generator), verbose=1) 
predictions = np.argmax(predictions, axis=-1) #multiple categories
label_map = (train_generator.class_indices)
label_map = dict((v,k) for k,v in label_map.items()) #flip k,v
predictions = [label_map[k] for k in predictions]



In [0]:
filenames_with_dir=test_sub_generator.filenames
submission = pd.DataFrame({'Id':filenames_with_dir, 'Category':predictions}, columns=['Id', 'Category'])
submission['Id'] = submission['Id'].replace('test_upload/','')
submission.to_csv('/content/drive/My Drive/submission_2.csv', index=False)
print('Save submit')