In [1]:
from keras_preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D, Dropout
import os
import keras
import pandas as pd
from google.colab import drive
from keras.applications.inception_v3 import InceptionV3
from keras.applications.mobilenet_v2 import MobileNetV2
import numpy as np
import matplotlib.pyplot as plt
from keras.callbacks import ModelCheckpoint


In [2]:
drive.mount('/content/drive')
os.chdir("/content/drive/My Drive/Colab Notebooks/") 

Mounted at /content/drive


In [3]:
def append_ext(fn):
    return fn+".jpg"

In [4]:
label_path = 'cs-t0828-2020-hw1/training_labels.csv'
y_train = pd.read_csv(label_path)
y_train['id'] = y_train['id'].astype(str).str.zfill(6).apply(append_ext)
print(y_train)

               id                                label
0      009350.jpg          Ford F-150 Regular Cab 2007
1      002645.jpg                      BMW X6 SUV 2012
2      002267.jpg              BMW 1 Series Coupe 2012
3      008553.jpg              Fisker Karma Sedan 2012
4      006990.jpg  Dodge Ram Pickup 3500 Crew Cab 2010
...           ...                                  ...
11180  000184.jpg                  Acura TL Sedan 2012
11181  005863.jpg          Chevrolet Malibu Sedan 2007
11182  002482.jpg        BMW 6 Series Convertible 2007
11183  014926.jpg            Suzuki Kizashi Sedan 2012
11184  002927.jpg              BMW M6 Convertible 2010

[11185 rows x 2 columns]


In [5]:
test_filenames = os.listdir('cs-t0828-2020-hw1/testing_data/testing_data/')
x_test = pd.DataFrame({'id':test_filenames})

In [6]:
def generator(IMG_SIZE, batch_size):

  validation_split = 0.1

  train_datagen = ImageDataGenerator(
      preprocessing_function=keras.applications.inception_v3.preprocess_input,
      validation_split=validation_split,
      shear_range=0.2,
      zoom_range=0.5,
      rotation_range=45,
      width_shift_range=.15, 
      height_shift_range=.15
  )

  valid_datagen = ImageDataGenerator(
      preprocessing_function=keras.applications.inception_v3.preprocess_input,
      validation_split=validation_split
  )
  
  test_datagen = ImageDataGenerator(
      preprocessing_function=keras.applications.inception_v3.preprocess_input
  )

  train_generator = train_datagen.flow_from_dataframe( 
      dataframe=y_train, 
      directory="cs-t0828-2020-hw1/training_data/training_data/", 
      batch_size=batch_size,
      x_col="id", 
      y_col="label", 
      has_ext=True, 
      subset="training", 
      class_mode="categorical", 
      target_size=(IMG_SIZE,IMG_SIZE)
  ) 
  valid_generator = valid_datagen.flow_from_dataframe( 
      dataframe=y_train, 
      directory="cs-t0828-2020-hw1/training_data/training_data/", 
      batch_size=batch_size,
      x_col="id", 
      y_col="label", 
      has_ext=True, 
      subset="validation", 
      class_mode="categorical", 
      target_size=(IMG_SIZE,IMG_SIZE)
  ) 

  test_generator = test_datagen.flow_from_dataframe(
      dataframe=x_test,
      directory='cs-t0828-2020-hw1/testing_data/testing_data/',
      x_col ='id',
      y_col = None,
      class_mode=None,
      target_size=(IMG_SIZE,IMG_SIZE),
      batch_size=1,
      shuffle=False
  )

  return train_generator, valid_generator, test_generator


In [7]:
def inceptionv3(IMG_SIZE):
  base_model = InceptionV3(weights='imagenet', include_top=False, classes=y_train['label'].nunique(), classifier_activation='softmax', input_shape=(IMG_SIZE,IMG_SIZE,3))
  # add a global spatial average pooling layer
  x = base_model.output
  x = GlobalAveragePooling2D()(x)
  # let's add a fully-connected layer

  x = Dense(1024, activation='relu')(x)
  # and a logistic layer -- let's say we have 196 classes
  predictions = Dense(196, activation='softmax')(x)
  
  # this is the model we will train
  model = Model(inputs=base_model.input, outputs=predictions)
  
  # first: train only the top layers (which were randomly initialized)
  # i.e. freeze all convolutional InceptionV3 layers
  for layer in base_model.layers:
      layer.trainable = True

  model.compile(optimizer='sgd',loss='categorical_crossentropy', metrics=['accuracy'])
  return model

In [8]:
def mobilenet_v2():
  model = MobileNetV2(weights=None, include_top=True, classes=y_train['label'].nunique(), classifier_activation='softmax')
  model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
  return model

In [9]:
#parameters setup
IMG_SIZE = 800
batch_size = 8
epochs = 50
weights_path = '800_best_model.hdf5'

model = inceptionv3(IMG_SIZE)
#model = mobilenet_v2()
if os.path.isfile(weights_path):
  model.load_weights(weights_path)

train_generator, valid_generator, test_generator = generator(IMG_SIZE, batch_size)

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
Found 10067 validated image filenames belonging to 196 classes.
Found 1118 validated image filenames belonging to 196 classes.
Found 5000 validated image filenames.


In [10]:
reduce_lr = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=3, verbose=0, mode='auto', epsilon=0.0001, cooldown=0, min_lr=0)
checkpoint = ModelCheckpoint(weights_path, monitor='val_loss', verbose=1, save_best_only=True, mode='auto', period=1)



In [None]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size 
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size

model.fit_generator(
    generator=train_generator, 
    steps_per_epoch=STEP_SIZE_TRAIN, 
    validation_data=valid_generator, 
    validation_steps=STEP_SIZE_VALID, 
    epochs=epochs,
    callbacks=[reduce_lr,checkpoint],
    verbose=1)

Instructions for updating:
Please use Model.fit, which supports generators.
Epoch 1/50
Epoch 00001: val_loss improved from inf to 0.79452, saving model to 800_best_model.hdf5
Epoch 2/50
Epoch 00002: val_loss improved from 0.79452 to 0.77205, saving model to 800_best_model.hdf5
Epoch 3/50
Epoch 00003: val_loss improved from 0.77205 to 0.62357, saving model to 800_best_model.hdf5
Epoch 4/50
Epoch 00004: val_loss improved from 0.62357 to 0.55393, saving model to 800_best_model.hdf5
Epoch 5/50
Epoch 00005: val_loss improved from 0.55393 to 0.51906, saving model to 800_best_model.hdf5
Epoch 6/50
Epoch 00006: val_loss improved from 0.51906 to 0.45074, saving model to 800_best_model.hdf5
Epoch 7/50
Epoch 00007: val_loss improved from 0.45074 to 0.41051, saving model to 800_best_model.hdf5
Epoch 8/50
Epoch 00008: val_loss did not improve from 0.41051
Epoch 9/50
Epoch 00009: val_loss did not improve from 0.41051
Epoch 10/50
Epoch 00010: val_loss improved from 0.41051 to 0.40877, saving model to

In [None]:
STEP_SIZE_TEST = test_generator.n // test_generator.batch_size 

test_generator.reset()
pred=model.predict_generator(test_generator,
steps=STEP_SIZE_TEST,
verbose=1)

predicted_class_indices=np.argmax(pred,axis=1)

labels = (train_generator.class_indices)
labels = dict((v,k) for k,v in labels.items())
predictions = [labels[k] for k in predicted_class_indices]

filenames=test_generator.filenames
id = [int(i.split('.')[0]) for i in filenames]
results=pd.DataFrame({"id":id,
                      "label":predictions}).sort_values(by='id')
results.to_csv("results.csv",index=False)