In [0]:
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
from tensorflow import keras
from tensorflow.keras.applications.resnet50 import ResNet50
import tensorflow.keras.backend as K
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import optimizers
from tensorflow.keras.models import Sequential,Model,load_model
from tensorflow.keras.layers import Dense, Flatten, Dropout,BatchNormalization
import os
import pandas as pd
import tarfile

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [0]:
path = "/content/drive/My Drive/comp5329assignment2/"

tar = tarfile.open(path+"train.tar.gz")
names = tar.getnames()
for name in names:
    tar.extract(name, path = "/content/train_data/")
tar.close()

In [0]:
label_file = np.loadtxt(path+"train.txt", dtype=bytes)
np.random.shuffle(label_file)

In [0]:
label_0 = label_file[:,0].astype(str)
label_1 = label_file[:,1].astype(str)

new_arr = np.append(label_0.reshape(-1,1),label_1.reshape(-1,1), axis = 1)
df = pd.DataFrame(new_arr,columns = ['Filenames', 'labels'])
df["labels"] = df["labels"].apply(lambda x: list(map(int, x.split(","))))

In [6]:
image_size = 224
batch_size = 32
train_dir = "/content/train_data/train2014"

train_datagen = keras.preprocessing.image.ImageDataGenerator(
    rescale = 1./255,
    zca_epsilon=1e-06,  
    rotation_range=15,  
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.,  
    zoom_range=0.05, 
    fill_mode='nearest',
    cval=0., 
    horizontal_flip=True,  
    vertical_flip=False
    )

validation_datagen = keras.preprocessing.image.ImageDataGenerator(rescale = 1./255)

train_generator = train_datagen.flow_from_dataframe(
    dataframe = df[0:25540],
    directory = train_dir,
    x_col = 'Filenames',
    y_col = 'labels',
    batch_size = batch_size,
    seed = 42,
    shuffle = True,
    class_mode = 'categorical',
    target_size = (image_size, image_size)
    )


val_generator = validation_datagen.flow_from_dataframe(
    dataframe = df[25540:],
    directory = train_dir,
    x_col = 'Filenames',
    y_col = 'labels',
    batch_size = batch_size,
    seed = 42,
    class_mode = 'categorical',
    target_size = (image_size, image_size)
    )

Found 25540 images belonging to 20 classes.
Found 6385 images belonging to 20 classes.


In [7]:
# base_model = keras.applications.vgg16.VGG16(weights='imagenet',include_top=False, input_shape=(224,224,3))
# base_model = keras.applications.resnet50.ResNet50(weights='imagenet',include_top=False, input_shape=(224,224,3))
# base_model = keras.applications.xception.Xception(weights='imagenet',include_top=False, input_shape=(224,224,3))
# base_model = keras.applications.densenet.DenseNet121(weights='imagenet',include_top=False, input_shape=(224,224,3))
# base_model = keras.applications.densenet.DenseNet169(weights='imagenet',include_top=False, input_shape=(224,224,3))
base_model = keras.applications.densenet.DenseNet201(weights='imagenet',include_top=False, input_shape=(224,224,3))

base_model.trainable = False
# base_model.trainable = True


Instructions for updating:
Colocations handled automatically by placer.
Downloading data from https://github.com/keras-team/keras-applications/releases/download/densenet/densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5


In [8]:
model = tf.keras.Sequential([
    base_model,
    keras.layers.GlobalAveragePooling2D(),
    keras.layers.Flatten(),
    
    keras.layers.Dense(512),
    keras.layers.BatchNormalization(),
    keras.layers.ReLU(),
    keras.layers.Dropout(0.5),
    
    keras.layers.Dense(512),
    keras.layers.BatchNormalization(),
    keras.layers.ReLU(),
    keras.layers.Dropout(0.5),

    keras.layers.Dense(20, activation='sigmoid')
])


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [9]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
densenet201 (Model)          (None, 7, 7, 1920)        18321984  
_________________________________________________________________
global_average_pooling2d (Gl (None, 1920)              0         
_________________________________________________________________
flatten (Flatten)            (None, 1920)              0         
_________________________________________________________________
dense (Dense)                (None, 512)               983552    
_________________________________________________________________
batch_normalization_v1 (Batc (None, 512)               2048      
_________________________________________________________________
re_lu (ReLU)                 (None, 512)               0         
_________________________________________________________________
dropout (Dropout)            (None, 512)               0         
__________

In [0]:
def metric(y_true,y_pred):

    return tf.math.reduce_max(
            tf.multiply(y_true, K.round(y_pred)),
            axis=1,
            keepdims=None,
            name=None,
            reduction_indices=None,
            keep_dims=None
            )
# https://github.com/mkocabas/focal-loss-keras/blob/master/focal_loss.py
def focal_loss(gamma=2., alpha=.25):
    def focal_loss_fixed(y_true, y_pred):
        pt_1 = tf.where(tf.equal(y_true, 1), y_pred, tf.ones_like(y_pred))
        pt_0 = tf.where(tf.equal(y_true, 0), y_pred, tf.zeros_like(y_pred))
        return -K.sum(alpha * K.pow(1. - pt_1, gamma) * K.log(pt_1))-K.sum((1-alpha) * K.pow( pt_0, gamma) * K.log(1. - pt_0))
    return focal_loss_fixed


model.compile(optimizer = 'adam',loss="binary_crossentropy",metrics=[metric] )
# model.compile(optimizer = 'adam',loss=focal_loss(),metrics=[metric] )

steps_per_epoch = train_generator.n // batch_size
validation_steps = val_generator.n // batch_size

history = model.fit_generator(train_generator, epochs = 20,steps_per_epoch = steps_per_epoch,validation_data = val_generator, validation_steps= validation_steps)



In [12]:
test_generator = validation_datagen.flow_from_dataframe(
    dataframe = df[25540:],
    directory = train_dir,
    x_col = 'Filenames',
    has_ext = True,
    y_col = 'labels',
#     subset = "validation",
    batch_size = 32,
    seed = 42,
    shuffle = False,
    class_mode = 'categorical',
    target_size = (image_size, image_size)
    )
test_generator.reset()


Found 6385 images belonging to 20 classes.


In [0]:
y_pre_cat = model.predict_generator( test_generator )

y_pred = np.argmax(y_pre_cat, axis=1)
y_true = df[25540:]["labels"].values


assert(y_true.shape[0] == y_pred.shape[0])
count = 0
for i in range(y_pred.shape[0]):
    if y_pred[i] in y_true[i]:
        count += 1
print(count/y_pred.shape[0])


In [0]:
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

# Use all data, re-train model

In [15]:
train_datagen = keras.preprocessing.image.ImageDataGenerator(
    rescale = 1./255,
    zca_epsilon=1e-06,  
    rotation_range=15,  
    width_shift_range=0.1,
    height_shift_range=0.1,
    shear_range=0.,  
    zoom_range=0.05, 
    fill_mode='nearest',
    cval=0., 
    horizontal_flip=True,  
    vertical_flip=False
    )


train_generator = train_datagen.flow_from_dataframe(
    dataframe = df,
    directory = train_dir,
    x_col = 'Filenames',
    y_col = 'labels',
    batch_size = batch_size,
    seed = 42,
    shuffle = True,
    class_mode = 'categorical',
    target_size = (image_size, image_size)
    )

Found 31925 images belonging to 20 classes.


In [0]:
base_model = keras.applications.densenet.DenseNet201(weights='imagenet',include_top=False, input_shape=(224,224,3))

base_model.trainable = False

model = tf.keras.Sequential([
    base_model,
    keras.layers.GlobalAveragePooling2D(),
    keras.layers.Flatten(),
    
    keras.layers.Dense(512),
    keras.layers.BatchNormalization(),
    keras.layers.ReLU(),
    keras.layers.Dropout(0.5),
    
    keras.layers.Dense(512),
    keras.layers.BatchNormalization(),
    keras.layers.ReLU(),
    keras.layers.Dropout(0.5),

    keras.layers.Dense(20, activation='sigmoid')
])

model.compile(optimizer = 'adam',loss="binary_crossentropy")
steps_per_epoch = train_generator.n // batch_size

history = model.fit_generator(train_generator, epochs = 20,steps_per_epoch = steps_per_epoch)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


In [0]:
model.save(path+"my_model.h5")


In [0]:
tar = tarfile.open(path+"test.tar.gz")

names = tar.getnames()
for name in names:
    tar.extract(name, path = "/content/test_data/")
tar.close()
names = names[1:]

In [18]:
test_files_name = []
for n in names:
    test_files_name.append(n[8:])
    
# 15516
len(test_files_name)

15516

In [0]:
assert(len(test_files_name) == 15516)

In [0]:
test_df = pd.DataFrame(test_files_name,columns = ['test_filenames'])

In [21]:
validation_datagen = keras.preprocessing.image.ImageDataGenerator(rescale = 1./255)
test_dir = "/content/test_data/val2014"
test_generator = validation_datagen.flow_from_dataframe(
    dataframe = test_df,
    directory = test_dir,
    x_col = 'test_filenames',
    batch_size = 32,
    seed = 42,
    shuffle = False,
    class_mode = None,
    target_size = (image_size, image_size)
    )
test_generator.reset()


Found 15516 images.


In [0]:
y_pre_cat = model.predict_generator( test_generator )

y_pred = np.argmax(y_pre_cat, axis=1)

# model.predict_generator( test_generator )

In [31]:
y_pred.shape

(15516,)

# Save predicted label

In [0]:
assert(y_pred.shape[0] == 15516)

test_file = np.array(test_files_name)
res = np.append(test_file.reshape(-1,1),y_pred.reshape(-1,1), axis = 1)

In [0]:
pd.DataFrame(res).to_csv(path+"Prediected_labels.txt", header=None, index=None, sep='\t')


# To load predicted test data

In [0]:
predicted_test_data = pd.read_csv(path+'Prediected_labels.txt', sep="\t", header=None)