Copyright 2020, MIT Lincoln Laboratory

SPDX-License-Identifier: BSD-2-Clause

# Example of Moments in Time Model Inference

The following loads and parses a video from the "Skiing" class of the UCF101 dataset.  This notebook shows a minimum example of inference on this video which the model has not previously seen.

View and (optionally) modify the TODOs below to try your own video on a selection of these models.

## Setup

## Get UCF101 video dataset

Go to https://www.crcv.ucf.edu/data/UCF101.php and download the dataset.  Locate the 'Skiing' class and choose a video.

### Import Packages and Methods

In [1]:
import os
import numpy as np
import re
import sys                   
import glob
import argparse
import functools
import subprocess
from PIL import Image
import time
import torch
from torchvision import transforms as trn
import h5py
import shutil
from tensorflow.python.keras.utils import to_categorical
from utils import *

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])






  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


### Enable Model File Reading

In [2]:
os.putenv("HDF5_USE_FILE_LOCKING", "FALSE")
os.system("export $HDF5_USE_FILE_LOCKING")

0

### Create Moments in Time One-Hot Class Labels

In [3]:
set_dir = '/home/gridsan/groups/Moments_in_Time/data-copy/data/parsed/ValidationBatch_90'
# Read folders in set directory. Folders should correspond to classes in this case
categories = os.listdir(set_dir)
# Remove any extraneous .ipynb files
new_categories = []
for cat in categories:
    if '.ipynb' not in cat:
        new_categories.append(cat)
categories = new_categories
# Create one-hot vector labels
categories_labels = dict()
for i in range(len(categories)):
    categories_labels[categories[i]] = to_categorical(np.array(i), num_classes=len(categories))
print(len(categories_labels), 'total classes')

339 total classes


## Parse Video

### Specify Input

In [4]:
# TODO:
# Specify input video and corresponding ground truth Moments in Time class
video_file = 'example_video.avi' ### TODO: add your video file from the UCF101 dataset here ###
ucf101_class_gt = 'Skiing'
mit_class_gt = 'skiing'

### Load and Parse

In [5]:
def read_as_list(filename):
    """Read the file at filename and store its contents into a list."""
    with open(filename) as f:
        result = [line.rstrip() for line in f.readlines()]
        f.close()
        return result

def load_frames(frame_paths, num_frames):
    """Loads frames from file and returns a list of the frames."""
    #print('Running load_frames')
    frames = [Image.open(frame).convert('RGB') for frame in frame_paths]
    if len(frames) >= num_frames:
        return frames[::int(np.ceil(len(frames) / float(num_frames)))]
    else:
        raise ValueError('Video must have at least {} frames'.format(num_frames)) 

def extract_frames(video_file, framesFolder, framerate):
    """Takes a video and converts it into a list of frames."""
    if os.path.exists(framesFolder):
        subprocess.call(['rm', '-rf', framesFolder + '*.jpg'])
    try:
        os.makedirs(os.path.join(framesFolder))
    except OSError:
        pass
    output = subprocess.Popen(['ffmpeg', '-i', video_file], stderr=subprocess.PIPE).communicate()
    # Search and parse 'Duration: 00:05:24.13,' from ffmpeg stderr
    re_duration = re.compile('Duration: (.*?)\.')
    duration = re_duration.search(str(output[1])).groups()[0]
    seconds = functools.reduce(lambda x, y: x * 60 + y,
                               map(int, duration.split(':')))
    #rate = num_frames / float(seconds)
    rate = framerate
    num_frames = seconds * framerate
    output = subprocess.Popen(['ffmpeg', '-i', video_file,
                               '-vf', 'fps={}'.format(rate),
                               '-vframes', str(num_frames),
                               '-loglevel', 'panic', 
                               os.path.join(framesFolder,'%d.jpg') ]).communicate()
    frame_paths = sorted([os.path.join(framesFolder, frame)
                          for frame in os.listdir(framesFolder)])
    frames = load_frames(frame_paths, num_frames)
    subprocess.call(['rm', '-rf', framesFolder + '*.jpg'])
    return frames

def remove(path):
    """ param <path> could either be relative or absolute. """
    if os.path.isfile(path) or os.path.islink(path):
        os.remove(path)  # remove the file
    elif os.path.isdir(path):
        shutil.rmtree(path)  # remove dir and all contains
    else:
        raise ValueError("file {} is not a file or dir.".format(path))

def parse(video_file):
    """Extracts a numpy array representation from a given video file."""
    # Extract frames from video
    framerate = 25 # frames per second (fps)
    framesFolder = os.getcwd() + '/frames'
    frames = extract_frames(video_file, framesFolder, framerate)
    # Transform frames into tensors with values [0,1]
    # Load an image transformer
    transform = trn.ToTensor()
    tframes = []
    for frame in frames:
        #frame = frame.resize((224,224))
        tframes.append(frame)
    tmpdata = torch.stack([transform(frame) for frame in tframes] )
    tmpdata = tmpdata.numpy()
    remove(framesFolder)
    return tmpdata

video_array = parse(video_file)
print('Video shape:', video_array.shape)

Video shape: (200, 3, 240, 320)


## Prepare Model(s)

### Select Trained Model(s)

In [6]:
# TODO:
# Select models from: ['DenseNet169', 'DenseNet201', 'InceptionV3', 'InceptionResNetV2', 'MobileNet', 
#                      'MobileNetV2', 'Resnet50', 'VGG19', 'Xception', 'I3D-InceptionV1', 'C3D', 'LRCN',
#                      'InceptionResnetV2-64avg']

model_names = ['DenseNet169', 'DenseNet201', 'InceptionV3', 'InceptionResNetV2', 'MobileNet', 
               'MobileNetV2', 'Resnet50', 'VGG19', 'Xception', 'I3D-InceptionV1', 'C3D', 'LRCN',
               'InceptionResNetV2-64avg']

# TODO:
# Specify model directory
model_folder = '/home/gridsan/groups/Moments_in_Time/Model-Zoo-v2/models/'

### Load and Compile Model(s)

In [7]:
def get_model(model_name):
    if model_name == 'DenseNet169':
        return load_model(model_folder + 'D169-224x224x3-339-im.h5')
    elif model_name == 'DenseNet201':
        return load_model(model_folder + 'D201-224x224x3-339-im.h5')
    elif model_name == 'InceptionV3':
        return load_model(model_folder + 'Iv3-224x224x3-339-im.h5')
    elif model_name == 'InceptionResNetV2':
        return load_model(model_folder + 'IRv2-224x224x3-339-im.h5')
    elif model_name == 'MobileNet':
        return load_model(model_folder + 'M-224x224x3-339-im.h5')
    elif model_name == 'MobileNetV2':
        return load_model(model_folder + 'Mv2-224x224x3-339-im.h5')
    elif model_name == 'Resnet50':
        return load_model(model_folder + 'R50-224x224x3-339-im.h5')
    elif model_name == 'VGG19':
        return load_model(model_folder + 'VGG19-224x224x3-339-im.h5')
    elif model_name == 'Xception':
        return load_model(model_folder + 'X-224x224x3-339-im.h5')
    elif model_name == 'C3D':
        return load_model(model_folder + 'C3D-16x224x224x3-339-m.h5')
    elif model_name == 'I3D-InceptionV1':
        return load_model(model_folder + 'I3DIv1-16x224x224x3-339-ikm.h5')
    elif model_name == 'LRCN':
        return load_model(model_folder + 'LRCN-16x224x224x3-339-m.h5')
    elif model_name == 'InceptionResNetV2-64avg':
        return load_model(model_folder + 'IRv2avg-64x224x224x3-339-im.h5')
    print('Should not reach here')
    return None

def transformer(model_name):
    if model_name in ['DenseNet169', 'DenseNet201', 'InceptionV3', 'InceptionResNetV2', 'MobileNet', 
                      'MobileNetV2', 'Resnet50', 'VGG19', 'Xception']:
        return 1
    elif model_name in ['I3D-InceptionV1', 'C3D', 'LRCN']:
        return 2
    elif model_name in ['InceptionResNetV2-64avg']:
        return 3
    else:
        print('Should not reach here')
        return None
    
met = get_metrics()
models = [get_model(m) for m in model_names]
transforms = [transformer(m) for m in model_names]
for model in models:
    model.compile(optimizer=SGD(), loss='categorical_crossentropy', metrics=met)
print('----------------------------')
print(len(models), 'model(s) loaded and compiled')

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
----------------------------
13 model(s) loaded and compiled


## Inference

Note that there is still randomness in the frame(s) being selected for inference.  Therefore, running this cell multiple times will give slightly different results.

In [8]:
# TODO:
# Specify number of top prediction classes to show
k = 5

In [9]:
# Performs Inference and Displays Results

def label_to_cat(index, classes):
    for cat in categories_labels.keys():
        if categories_labels[cat][index]==1:
            return cat
def top_classes(tups, k=5):
    top_k = []
    for i in range(len(tups)):
        if tups[i][1] <= k:
            top_k.append(tups[i])
    return sorted(top_k, key=lambda tup: tup[1])


label = categories_labels[mit_class_gt]
datum = [video_array], 0, label

print('Model Name')
print('(class_name, prediction_rank, probability)')
print('------------------------------------------')
print()

for i in range(len(models)):
    model_name = model_names[i]
    model = models[i]
    t = transforms[i]
    y_hats = []
    for i in range(5): # sample averaging
        arr, label = transform(datum, transform_num=t)
        X = np.array([arr])
        y = np.array([label])
        y_hat = model.predict(X, batch_size=1)
        y_hats.append(y_hat)
    y_hat = np.average(y_hats, axis=0)
    order = y_hat.argsort()
    rev_ranks = order.argsort()
    classes = len(y_hat[0])
    tups = []
    for i in range(classes):
        cat = label_to_cat(i, classes)
        tups.append( (cat, 339-rev_ranks[0][i], y_hat[0][i]) )
    short_list = top_classes(tups, k)
    print(model_name)
    for tup in short_list:
        print(tup)
    print()

Model Name
(class_name, prediction_rank, probability)
------------------------------------------

DenseNet169
('skiing', 1, 0.9085125)
('boarding', 2, 0.025353933)
('sliding', 3, 0.0076259063)
('slipping', 4, 0.005177059)
('skating', 5, 0.0050613997)

DenseNet201
('skiing', 1, 0.58287513)
('sliding', 2, 0.05612815)
('slipping', 3, 0.043456513)
('skating', 4, 0.03366632)
('descending', 5, 0.018783813)

InceptionV3
('skiing', 1, 0.5834716)
('sliding', 2, 0.028677518)
('slipping', 3, 0.028165314)
('skating', 4, 0.023576682)
('boarding', 5, 0.021372344)

InceptionResNetV2
('skiing', 1, 0.6004424)
('boarding', 2, 0.053444456)
('jumping', 3, 0.034714162)
('skating', 4, 0.02377763)
('falling', 5, 0.023125973)

MobileNet
('skiing', 1, 0.2398886)
('officiating', 2, 0.14038408)
('slipping', 3, 0.10001288)
('boarding', 4, 0.095631294)
('sliding', 5, 0.038019247)

MobileNetV2
('skiing', 1, 0.16444364)
('sliding', 2, 0.07168311)
('boarding', 3, 0.06656089)
('slipping', 4, 0.06320612)
('skating', 5,

## Questions

Any questions can be directed to Matthew Hutchinson at <hutchinson@alum.mit.edu>.

Python license: https://docs.python.org/3/license.html

TensorFlow license: https://github.com/tensorflow/tensorflow/blob/master/LICENSE

NumPy license: https://numpy.org/doc/stable/license.html

PIL license: http://www.pythonware.com/products/pil/license.htm

PyTorch license: https://github.com/pytorch/pytorch/blob/master/LICENSE

H5Py license: https://docs.h5py.org/en/stable/licenses.html