In [0]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

Mounted at /content/gdrive


In [0]:
from keras.preprocessing.image import img_to_array, load_img
from keras.preprocessing import image as IMAGE
from keras.applications.inception_v3 import InceptionV3, preprocess_input
from keras.models import Model, load_model
from keras.layers import Input

import numpy as np
import os.path
from tqdm import tqdm
import csv
import os
import glob

In [0]:
class Extractor():
    def __init__(self, weights=None):
        """Either load pretrained from imagenet, or load our saved
        weights from our own training."""

        self.weights = weights  

        if weights is None:
            # Get model with pretrained weights.
            base_model = InceptionV3(
                weights='imagenet',
                include_top=True
            )

            # We'll extract features at the final pool layer.
            self.model = Model(
                inputs=base_model.input,
                outputs=base_model.get_layer('avg_pool').output
            )

        else:
            # Load the model first.
            self.model = load_model(weights)

            # Then remove the top so we get features not predictions.
            # From: https://github.com/fchollet/keras/issues/2371
            self.model.layers.pop()
            self.model.layers.pop()  # two pops to get to pool layer
            self.model.outputs = [self.model.layers[-1].output]
            self.model.output_layers = [self.model.layers[-1]]
            self.model.layers[-1].outbound_nodes = []

    def extract(self, image_path):
        img = IMAGE.load_img(image_path, target_size=(299, 299))
        x = IMAGE.img_to_array(img)
        x = np.expand_dims(x, axis=0)
        x = preprocess_input(x)

        # Get the prediction.
        features = self.model.predict(x)

In [0]:
class threadsafe_iterator:
    def __init__(self, iterator):
        self.iterator = iterator
        self.lock = threading.Lock()

    def __iter__(self):
        return self

    def __next__(self):
        with self.lock:
            return next(self.iterator)

In [0]:
def threadsafe_generator(func):
    """Decorator"""
    def gen(*a, **kw):
        return threadsafe_iterator(func(*a, **kw))
    return gen

In [0]:
class DataSet():

    def __init__(self, seq_length=40, class_limit=None, image_shape=(224, 224, 3)):
        """Constructor.
        seq_length = (int) the number of frames to consider
        class_limit = (int) number of classes to limit the data to.
            None = no limit.
        """
        self.seq_length = seq_length
        self.class_limit = class_limit
        self.sequence_path = os.path.join('gdrive/My Drive/ADBI_Capstone_Video_Classification/data/', 'features')
        #change above line
        self.max_frames = 300  # max number of frames a video can have for us to use it
        self.image_shape = image_shape

        # Get the data.
        data = []
        with open(os.path.join('gdrive/My Drive/ADBI_Capstone_Video_Classification/data/', 'data_file.csv'), 'r') as fin:
            reader = csv.reader(fin)
            data = list(reader)

        self.data = [x for x in data if x != []]
        self.classes = self.get_classes(self)
        #Clean up the data
        data = []
        for item in self.data:
            if int(item[3]) >= self.seq_length and int(item[3]) <= self.max_frames \
                    and item[1] in self.classes:
                    data.append(item)

        self.data = data
        
    @staticmethod
    def get_classes(self):
        """Extract the classes from the data"""
        classes = []
        for item in self.data:
            if item[1] not in classes:
                classes.append(item[1])

        classes = sorted(classes)

        """ Return only a few classes if a limit is defined"""
        if self.class_limit is not None:
            return classes[:self.class_limit]
        else:
            return classes

    
    @staticmethod
    def get_frames_for_sample(sample):
        """Given a sample row from the data file, get all the corresponding frame
        filenames."""
        path = os.path.join('gdrive/My Drive/ADBI_Capstone_Video_Classification/data/', sample[0], sample[1])
        filename = sample[2]
        images = sorted(glob.glob(os.path.join(path, filename + '*jpg')))
        return images


    @staticmethod
    def rescale_list(input_list, size):
        """Given a list and a size, return a rescaled/samples list. For example,
        if we want a list of size 5 and we have a list of size 25, return a new
        list of size five which is every 5th element of the origina list."""
        assert len(input_list) >= size

        # Get the number to skip between iterations.
        skip = len(input_list) // size

        # Build our new output.
        output = [input_list[i] for i in range(0, len(input_list), skip)]

        # Cut off the last one if needed.
        return output[:size]


In [0]:
seq_length = 40
class_limit = None  # Number of classes to extract. Can be 1-101 or None for all.

# Get the dataset.
data = DataSet(seq_length=seq_length, class_limit=class_limit)
# get the model.
model = Extractor()

# Loop through data.
pbar = tqdm(total=len(data.data))




  0%|          | 0/12014 [00:00<?, ?it/s][A[A[A

In [0]:
for video in data.data:
    
    print(video)
    # Get the path to the sequence for this video.
    path = os.path.join('gdrive/My Drive/ADBI_Capstone_Video_Classification/data/', 'features', video[2] + '-' + str(seq_length) + \
        '-features')  # numpy will auto-append .npy

    # Check if we already have it.
    if os.path.isfile(path + '.npy'):
        pbar.update(1)
        continue

    # Get the frames for this video.
    frames = data.get_frames_for_sample(video)

    # Now downsample to just the ones we need.
    #frames = data.rescale_list(frames, seq_length)

    # Now loop through and extract features to build the sequence.
    sequence = []
    for f in frames:
        features = model.extract(f)
        sequence.append(features)

    # Save the sequence.
    np.save(path, sequence)

    pbar.update(1)
    



In [0]:
pbar.close()