In [0]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [0]:
from keras.applications.inception_v3 import InceptionV3
from keras.optimizers import SGD
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.layers import Dense, GlobalAveragePooling2D
from keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping, CSVLogger
from keras.utils import to_categorical
from keras.preprocessing.image import img_to_array, load_img

import csv
import numpy as np
import random
import glob
import sys
import operator
import threading
import os.path
import time

Using TensorFlow backend.


In [0]:
def process_image(image, target_shape):
    """Given an image, process it and return the array."""
    # Load the image.
    h, w, _ = target_shape
    image = load_img(image, target_size=(h, w))

    # Turn it into numpy, normalize and return.
    img_arr = img_to_array(image)
    x = (img_arr / 255.).astype(np.float32)

    return x

In [0]:
class threadsafe_iterator:
    def __init__(self, iterator):
        self.iterator = iterator
        self.lock = threading.Lock()

    def __iter__(self):
        return self

    def __next__(self):
        with self.lock:
            return next(self.iterator)



In [0]:
def threadsafe_generator(func):
    """Decorator"""
    def gen(*a, **kw):
        return threadsafe_iterator(func(*a, **kw))
    return gen

In [0]:
class DataSet():

    def __init__(self, seq_length=40, class_limit=None, image_shape=(224, 224, 3)):
        """Constructor.
        seq_length = (int) the number of frames to consider
        class_limit = (int) number of classes to limit the data to.
            None = no limit.
        """
        self.seq_length = seq_length
        self.class_limit = class_limit
        self.sequence_path = os.path.join("gdrive/My Drive/ADBI_Capstone_Video_Classification/data/", 'features')
        #change above line
        self.max_frames = 300  # max number of frames a video can have for us to use it

        # Get the data.
        self.data = self.get_data()

        # Get the classes.
        self.classes = self.get_classes()

        # Now do some minor data cleaning.
        self.data = self.clean_data()

        self.image_shape = image_shape

    @staticmethod
    def get_data():
        """Load our data from file."""
        #change below line
        with open(os.path.join("gdrive/My Drive/ADBI_Capstone_Video_Classification/data/", 'data_file.csv'), 'r') as fin:
            reader = csv.reader(fin)
            data = list(reader)

        data = [x for x in data if x != []]
        return data

    def clean_data(self):
        """Limit samples to greater than the sequence length and fewer
        than N frames. Also limit it to classes we want to use."""
        data_clean = []
        for item in self.data:
            if int(item[3]) >= self.seq_length and int(item[3]) <= self.max_frames \
                    and item[1] in self.classes:
                data_clean.append(item)

        return data_clean

    def get_classes(self):
        """Extract the classes from our data. If we want to limit them,
        only return the classes we need."""
        classes = []
        for item in self.data:
            if item[1] not in classes:
                classes.append(item[1])

        # Sort them.
        classes = sorted(classes)

        # Return.
        if self.class_limit is not None:
            return classes[:self.class_limit]
        else:
            return classes
    
    
    def get_class_one_hot(self, class_str):
        """Given a class as a string, return its number in the classes
        list. This lets us encode and one-hot it for training."""
        # Encode it first.
        label_encoded = self.classes.index(class_str)

        # Now one-hot it.
        label_hot = to_categorical(label_encoded, len(self.classes))

        assert len(label_hot) == len(self.classes)

        return label_hot
    
    
    def get_extracted_sequence(self, data_type, sample):
        """Get the saved extracted features."""
        filename = sample[2]
        path = os.path.join(self.sequence_path, filename + '-' + str(self.seq_length) + \
            '-' + data_type + '.npy')
        if os.path.isfile(path):
            return np.load(path)
        else:
            print("file path: ", path)
            return None
          
    
    
    def split_train_test(self):
        """Split the data into train and test groups."""
        train = []
        test = []
        for item in self.data:
            if item[0] == 'train':
                train.append(item)
            else:
                test.append(item)
        return train, test
    
    
    @threadsafe_generator
    def frame_generator(self, batch_size, train_test, data_type):
      """Return a generator that we can use to train on. There are
      a couple different things we can return:
      data_type: 'features', 'images'
      """
      # Get the right dataset for the generator.
      train, test = self.split_train_test()
      data = train if train_test == 'train' else test

      print("Creating %s generator with %d samples." % (train_test, len(data)))

      while 1:
          X, y = [], []

          # Generate batch_size samples.
          for _ in range(batch_size):
              # Reset to be safe.
              sequence = None

              # Get a random sample.
              sample = random.choice(data)

              # Check to see if we've already saved this sequence.
              if data_type is "images":
                  # Get and resample frames.
                  frames = self.get_frames_for_sample(sample)
                  frames = self.rescale_list(frames, self.seq_length)

                  # Build the image sequence
                  sequence = self.build_image_sequence(frames)
              else:
                  # Get the sequence from disk.
                  sequence = self.get_extracted_sequence(data_type, sample)

                  if sequence is None:
                      raise ValueError("Can't find sequence. Did you generate them?")

                  X.append(sequence)
                  y.append(self.get_class_one_hot(sample[1]))

              yield np.array(X), np.array(y)
          

In [0]:
data = DataSet()

checkpointer = ModelCheckpoint(
    filepath="gdrive/My Drive/ADBI_Capstone_Video_Classification/data/models/inception.{epoch:03d}-{val_loss:.2f}.hdf5",
    verbose=1,
    save_best_only=True)


early_stopper = EarlyStopping(patience=10)


tensorboard = TensorBoard(log_dir=os.path.join('data', 'meta'))

timestamp = time.time()
csv_logger = CSVLogger(os.path.join("gdrive/My Drive/ADBI_Capstone_Video_Classification/data/" , 'meta', 'cnn' + '-' + 'training-' + \
        str(timestamp) + '.log'))

In [0]:
def get_generators():
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        horizontal_flip=True,
        rotation_range=10.,
        width_shift_range=0.2,
        height_shift_range=0.2)

    test_datagen = ImageDataGenerator(rescale=1./255)

    train_generator = train_datagen.flow_from_directory(
        os.path.join("gdrive/My Drive/ADBI_Capstone_Video_Classification/data/", "train"),
        target_size=(299, 299),
        batch_size=16,
        classes=data.classes,
        class_mode='categorical')

    validation_generator = test_datagen.flow_from_directory(
         os.path.join("gdrive/My Drive/ADBI_Capstone_Video_Classification/data/",  'test'),
        target_size=(299, 299),
        batch_size=16,
        classes=data.classes,
        class_mode='categorical')

    return train_generator, validation_generator

In [0]:
def get_model(weights='imagenet'):
    # create the base pre-trained model
    base_model = InceptionV3(weights=weights, include_top=False)

    # add a global spatial average pooling layer
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    # let's add a fully-connected layer
    x = Dense(1024, activation='relu')(x)
    # and a logistic layer
    predictions = Dense(len(data.classes), activation='softmax')(x)

    # this is the model we will train
    model = Model(inputs=base_model.input, outputs=predictions)
    return model

In [0]:
def freeze_all_but_top(model):
    """Used to train just the top layers of the model."""
    # first: train only the top layers (which were randomly initialized)
    # i.e. freeze all convolutional InceptionV3 layers
    for layer in model.layers[:-2]:
        layer.trainable = False

    # compile the model (should be done *after* setting layers to non-trainable)
    model.compile(optimizer='rmsprop', loss='categorical_crossentropy', metrics=['accuracy'])

    return model


In [0]:
def freeze_all_but_mid_and_top(model):
    """After we fine-tune the dense layers, train deeper."""
    # we chose to train the top 2 inception blocks, i.e. we will freeze
    # the first 172 layers and unfreeze the rest:
    for layer in model.layers[:172]:
        layer.trainable = False
    for layer in model.layers[172:]:
        layer.trainable = True

    # we need to recompile the model for these modifications to take effect
    # we use SGD with a low learning rate
    model.compile(
        optimizer=SGD(lr=0.0001, momentum=0.9),
        loss='categorical_crossentropy',
        metrics=['accuracy', 'top_k_categorical_accuracy'])

    return model

In [0]:
def train_model(model, nb_epoch, generators, callbacks=[tensorboard, early_stopper,  csv_logger, checkpointer]):
    train_generator, validation_generator = generators
    model.fit_generator(
        train_generator,
        steps_per_epoch=100,
        validation_data=validation_generator,
        validation_steps=10,
        epochs=nb_epoch,
        callbacks=[tensorboard, early_stopper,  csv_logger, checkpointer])
    return model

In [0]:
def main(weights_file):
    model = get_model()
    generators = get_generators()

    if weights_file is None:
        print("Loading network from ImageNet weights.")
        # Get and train the top layers.
        model = freeze_all_but_top(model)
        model = train_model(model, 10, generators, [tensorboard, early_stopper,  csv_logger, checkpointer])
        #change epochs to 10
    else:
        print("Loading saved model: %s." % weights_file)
        model.load_weights(weights_file)

    # Get and train the mid layers.
    model = freeze_all_but_mid_and_top(model)
    model = train_model(model, 10, generators,
                        [tensorboard, early_stopper,  csv_logger, checkpointer])

In [0]:
if __name__ == '__main__':
    weights_file = None
    main(weights_file)