# Transfer Learning with Keras

Code snippets

In [None]:
import os

import tensorflow as tf

from tensorflow.keras.applications import resnet50
from tensorflow.keras.applications import vgg16
from tensorflow.keras.applications import inception_v3
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout, Flatten, Input
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.regularizers import l1_l2


from skimage.io import imread

from sklearn.model_selection import train_test_split


import numpy as np

import random

### Create a list of pathnames with associated labels based on folder structure

In [None]:
def create_image_list(img_dir):
    folders = os.listdir(img_dir)
    folders.sort()
    indexes = range(len(folders))
    label_map = {key: value for (key, value) in zip(folders, indexes)}
        
    labeled_image_list = [(os.path.join(img_dir, folder, image), label_map[folder]) 
                          for folder in folders 
                          for image in os.listdir(os.path.join(img_dir, folder))
                         ]
    return zip(*labeled_image_list)

### Define image generator to feed Keras `Model.predict_generator()`

In [None]:
class ImageGenerator(tf.keras.utils.Sequence):
    
    def __init__(self, image_list, preprocess_fn=None, batch_size=64):
        self.image_list = image_list 
        #self.label_list = label_list
        self.batch_size = batch_size
        self.preprocess_fn = preprocess_fn
    
    def __len__(self):
        return len(self.image_list) // self.batch_size
    
    def __getitem__(self, index):
        pathnames = self.image_list[index*self.batch_size:(index+1)*self.batch_size]
        images = self.__load_images(pathnames)
        
        return images
    
    def __load_images(self, pathnames):
        images = []
        for pathname in pathnames:
            img = image.load_img(pathname, target_size=(224,224,3))
            img = image.img_to_array(img)
            images.append(img)
        images = np.asarray(images)
        if self.preprocess_fn != None:
            images = self.preprocess_fn(images)   
        
        return images
    
        

### Define image featurizer based on ResNet50

In [None]:
class ResNetFeaturizer():
    def __init__(self):
        self.base_model = resnet50.ResNet50(
            weights = 'imagenet', 
            input_shape=(224,224,3), 
            include_top = False,
            pooling = 'avg')
        
    def extract(self, image_list):
        image_generator = ImageGenerator(image_list, resnet50.preprocess_input)
        features = self.base_model.predict_generator(image_generator, verbose=1)
        
        return features

### Define a function that configures TF dataset on top of Numpy tensors with images and labels

In [None]:
# Returns a dataset based on a list of TFRecords files passsed as a parameters. 
def create_dataset(data, labels, batch_size, train=True, buffer_size=10000):
    
  labels = tf.one_hot(labels, 6)  
  dataset = tf.data.Dataset.from_tensor_slices((data, labels))
  if train:
    dataset = dataset.shuffle(buffer_size)
  dataset = dataset.batch(batch_size)
  dataset = dataset.repeat()
  return dataset

### Define a small FCN to layer on top of featurizer

In [None]:
def classifier(input_shape=(2048,), units=512, classes=6,  l1=0.01, l2=0.01):
    features = Input(shape=input_shape)
    x = Dense(units, activation='relu')(features)
    x = Dropout(0.5)(x)
    y = Dense(classes, activation='softmax', kernel_regularizer=l1_l2(l1=l1, l2=l2))(x)
    model = Model(inputs=features, outputs=y)
    model.compile(optimizer='adadelta', loss='categorical_crossentropy', metrics=['accuracy'])
    return model
    
   

### Prepare training and validation datasets

The images should be in a folder structure under `img_dir`

In [None]:
img_dir = '../../../Datasets/aerialimages/train'

img_list, label_list = create_image_list(img_dir)

train_imgs, valid_imgs, \
train_labels, valid_labels = train_test_split(img_list, label_list,
                                           test_size=0.15,
                                           random_state=0,
                                           stratify=label_list)

featurizer = ResNetFeaturizer()
    
train_features = featurizer.extract(train_imgs)
valid_features = featurizer.extract(valid_imgs)
train_labels = train_labels[0:len(train_features)]
valid_labels = train_labels[0:len(valid_features)]


    

### Set up and start training



In [None]:
model = classifier(input_shape=(2048,), units=1024, l1=0.006, l2=0.006)
model.summary()

batch_size = 64
steps_per_epoch = len(train_features)//batch_size
validation_steps = len(valid_features)//batch_size

train_dataset = create_dataset(train_features, train_labels, batch_size=batch_size, train=True)
valid_dataset = create_dataset(valid_features, valid_labels, batch_size=batch_size, train=False)

model.fit(train_dataset,
          epochs=100,
          steps_per_epoch = steps_per_epoch,
          validation_data=valid_dataset,
          validation_steps = validation_steps
         )


In [None]:
# Returns a dataset based on a list of TFRecords files passsed as a parameters. 
def create_dataset(files, batch_size, train=True, buffer_size=10000):
  IMAGE_SHAPE = (224, 224, 3,)
  NUM_CLASSES = 6
    
  def scale_image(image):
      image = image / 127.5
      image = image - 1.
      return image
    
  def _parse(example_proto):
    features = {'label': tf.FixedLenFeature((), tf.int64, default_value=0),
                'image': tf.FixedLenFeature((), tf.string, default_value="")}
    parsed_features = tf.parse_single_example(example_proto, features)
    label = parsed_features['label']
    label = tf.one_hot(label, NUM_CLASSES)
    image = image = tf.decode_raw(parsed_features['image'], tf.uint8)
    image = tf.cast(image, tf.float32)
    image = scale_image(image)
    image = tf.reshape(image, IMAGE_SHAPE)
    return image, label
  
  dataset = tf.data.TFRecordDataset(files)
  dataset = dataset.map(_parse)
  if train:
    dataset = dataset.shuffle(buffer_size)
  dataset = dataset.batch(batch_size)
  dataset = dataset.repeat()
  return dataset
  