In [2]:
# import dependencies

import re
import os
import tempfile
import ssl
import cv2
import numpy as np
import tensorflow as tf
import tensorflow_hub as hub
from tensorflow_docs.vis import embed
import imageio
from urllib.request import urlopen 


In [3]:
# get data labels
data_labels = urlopen("https://raw.githubusercontent.com/deepmind/kinetics-i3d/master/data/label_map.txt"
) 
labels = [line.decode("utf-8").strip() for line in data_labels]

In [4]:
# predict this video

video_path = "v_ApplyEyeMakeup_g01_c01.avi"

def crop_frame(frame):
    y, x = frame.shape[0:2]
    min_dim = min(y, x)
    start_x = (x // 2) - (min_dim // 2)
    start_y = (y // 2) - (min_dim // 2)
    return frame[start_y : start_y + min_dim, start_x : start_x + min_dim]


def get_frames(video_path):
    cap = cv2.VideoCapture(video_path)
    frames = []
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frame = crop_frame(frame)
            frame = cv2.resize(frame, (224, 224))
            frame = frame[:, :, [2, 1, 0]]
            frames.append(frame)

            if len(frames) == 0:
                break
    finally:
        cap.release()
    return np.array(frames) / 255


video_frames = get_frames(video_path)

In [5]:
video_frames.shape

(164, 224, 224, 3)

In [6]:
module = hub.load("https://tfhub.dev/deepmind/i3d-kinetics-400/1").signatures['default']

In [7]:
def predict(module, video_frames):
  video_frames = tf.cast(tf.convert_to_tensor(video_frames[None, :]), tf.float32)

  logits = module(video_frames)['default'][0]
  probabilities = tf.nn.softmax(logits)

  print("Actions:")
  for i in np.argsort(probabilities)[::-1][:5]:
    print(f"  {labels[i]:22}: {probabilities[i] * 100:5.2f}%")

In [8]:
predict(module, video_frames)

Actions:
  filling eyebrows      : 98.13%
  applying cream        :  1.57%
  waxing eyebrows       :  0.17%
  playing harmonica     :  0.07%
  brush painting        :  0.04%
