# Download dataset from google drive

In [None]:
data_dir = '/home/projects/flickr90k/'

with open(os.path.join(data_dir,'train_test_split.json'), 'r') as f:
    split_info = json.load(f)

train_val_df = info_to_df(split_info['train'])
test_df = info_to_df(split_info['test'])

train_df, val_df = train_test_split(train_val_df,
                                    test_size=0.10,
                                    stratify=train_val_df['class'],
                                    random_state=42)

print("Training data size: {} \nValidation data size: {} \nTest data size: {}" \
      .format(len(train_df), len(val_df), len(test_df)))

bird  cat  flower  jumbojet  sportscar		    tree
boat  dog  frog    mushroom  train_test_split.json


# Prepare metadata for Keras

In [3]:
import json
import os
import pandas as pd

In [4]:
data_dir = '/home/projects/flickr90k/'

with open(os.path.join(data_dir,'train_test_split.json'), 'r') as f:
    split_info = json.load(f)

In [5]:
def info_to_df(paths: list, x_col='filename', y_col='class') -> pd.DataFrame:
    info_ds = pd.DataFrame(paths, columns=[x_col])
    info_ds[y_col] = info_ds[x_col].apply(lambda x: x.split('/')[0])

    return info_ds.sample(frac=1)

In [6]:
train_df = info_to_df(split_info['train'])
test_df = info_to_df(split_info['test'])

# Calculate class weights

In [7]:
from sklearn.utils import class_weight
import numpy as np

class_weights = class_weight.\
                compute_class_weight('balanced',
                                      np.unique(train_df['class'].values),
                                      train_df['class'].values)
class_weights = dict(enumerate(class_weights))

# Extract high dimensional features

In [8]:
from keras.applications.vgg16 import VGG16
from keras.applications.vgg16 import preprocess_input
from keras.preprocessing import image
from keras.models import Model

Using TensorFlow backend.


In [10]:
train_generator = image.ImageDataGenerator()\
                       .flow_from_dataframe(train_df, data_dir,
                                            target_size=(224, 224),
                                            batch_size=85,
                                            class_mode='sparse')
test_generator = image.ImageDataGenerator()\
                       .flow_from_dataframe(test_df, data_dir,
                                            target_size=(224, 224),
                                            batch_size=50,
                                            class_mode='sparse')

Found 85000 images belonging to 10 classes.
Found 5000 images belonging to 10 classes.


In [11]:
vgg16_model = VGG16()

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5


In [12]:
vgg_last_fc = Model(inputs=vgg16_model.input, 
outputs=vgg16_model.get_layer('fc2').output)

In [13]:
training_x = np.zeros((85000, 4096))
training_y = np.zeros((85000, 1))

In [15]:
from tqdm import tqdm
for i in tqdm(range(0, 85000, 85)):
    data = next(train_generator)
    x = preprocess_input(data[0])
    vgg_features =  vgg_last_fc.predict(x)
    training_x[i:i+85] = vgg_features
    
    training_y[i:i+85] = np.expand_dims(data[1], axis=1)

100%|██████████| 1000/1000 [23:07<00:00,  1.35s/it]


# Train SVM

In [16]:
from sklearn.model_selection import train_test_split
from sklearn.svm import LinearSVC
from sklearn.metrics import accuracy_score

In [17]:
(x_train, x_val, y_train, y_val) = train_test_split(training_x, training_y,
                                                    test_size=0.2)

In [18]:
clf = LinearSVC()

In [19]:
clf.fit(x_train, y_train)

  y = column_or_1d(y, warn=True)


LinearSVC(C=1.0, class_weight=None, dual=True, fit_intercept=True,
     intercept_scaling=1, loss='squared_hinge', max_iter=1000,
     multi_class='ovr', penalty='l2', random_state=None, tol=0.0001,
     verbose=0)

In [20]:
preds = clf.predict(x_val)

In [21]:
accuracy_score(y_true=y_val, y_pred=preds)

0.8925882352941177

# Train  a model

In [22]:
from keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input
from keras.preprocessing import image
from keras.models import Model
from keras.layers import Input, Lambda, Dense, GlobalAveragePooling2D
from keras.layers.normalization import BatchNormalization
from keras.utils.vis_utils import model_to_dot
from IPython.display import SVG

In [23]:
def custom_model(input_shape, n_classes, resnet_model):
    inputs = Input(shape=input_shape, name='input_images')
    x = Lambda(preprocess_input,
               arguments={'data_format':'channels_last'},
               name='preprocess')(inputs)
    x = BatchNormalization()(x)
    features = resnet_model(x)
    features.trainable = False
    pool = GlobalAveragePooling2D(data_format='channels_last',
                                  name='global_pooling')(features)
    mlp = Dense(300, activation='relu',
                kernel_initializer='glorot_normal',
                name='MLP')(pool)
    mlp = BatchNormalization()(mlp)
    predictions = Dense(n_classes, activation='softmax',
                       name='predictions')(mlp)
    model = Model(inputs=inputs, outputs=predictions)
    model.compile(optimizer='adam',
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    return model

In [31]:
train_generator = image.ImageDataGenerator()\
                       .flow_from_dataframe(train_df, data_dir,
                                            target_size=(224, 224),
                                            batch_size=60)
test_generator = image.ImageDataGenerator()\
                       .flow_from_dataframe(test_df, data_dir,
                                            target_size=(224, 224),
                                            batch_size=50)

Found 85000 images belonging to 10 classes.
Found 5000 images belonging to 10 classes.


In [28]:
resnet_model = ResNet50(weights='imagenet', include_top=False)

In [29]:
model = custom_model((224, 224, 3), 10, resnet_model)

In [27]:
SVG(model_to_dot(model).create(prog='dot', format='svg'))

ImportError: Failed to import `pydot`. Please install `pydot`. For example with `pip install pydot`.

In [33]:
model.fit_generator(train_generator, steps_per_epoch=1400, verbose=1,
                    class_weight=class_weights, epochs=10,
                    validation_data=test_generator,
                    validation_steps=100)

Epoch 1/10


ResourceExhaustedError: OOM when allocating tensor with shape[60,512,28,28] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node training/Adam/gradients/zeros_133}} = Fill[T=DT_FLOAT, _class=["loc:@training/Adam/gradients/resnet50_1/res3c_branch2c/BiasAdd_grad/BiasAddGrad"], index_type=DT_INT32, _device="/job:localhost/replica:0/task:0/device:GPU:0"](training/Adam/gradients/Shape_134, training/Adam/gradients/zeros_133/Const)]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

	 [[{{node metrics_1/acc/Mean/_7297}} = _Recv[client_terminated=false, recv_device="/job:localhost/replica:0/task:0/device:CPU:0", send_device="/job:localhost/replica:0/task:0/device:GPU:0", send_device_incarnation=1, tensor_name="edge_20329_metrics_1/acc/Mean", tensor_type=DT_FLOAT, _device="/job:localhost/replica:0/task:0/device:CPU:0"]()]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.
