# Import libraries

In [1]:
import os, sys, re

sys.path.append("./")

import torch
from utils_cv.action_recognition.dataset import VideoDataset
from utils_cv.action_recognition.model import VideoLearner 
from utils_cv.common.data import data_path
from IPython.display import clear_output
import json
# Ensure edits to libraries are loaded and plotting is shown in the notebook.
%reload_ext autoreload
%autoreload 2
%matplotlib inline

# Load the pretrained model - PyTorch

In [2]:
# Your data
DATA_PATH = data_path() / "./kinetics700_2020/"

# Number of consecutive frames used as input to the DNN. Use: 32 for high accuracy, 8 for inference speed.
MODEL_INPUT_SIZE = 8

# Batch size. Reduce if running out of memory.
BATCH_SIZE = 16

In [3]:
data = VideoDataset(DATA_PATH, batch_size=BATCH_SIZE, sample_length=MODEL_INPUT_SIZE)

In [4]:
actionModel = VideoLearner(data, num_classes=2)

Loading r2plus1d_34_8_ig65m model


Using cache found in /home/yangze2065/.cache/torch/hub/moabitcoin_ig65m-pytorch_master


In [5]:
actionModel.load(model_name = "R21D8_sharpening_knives")
print(actionModel)

<utils_cv.action_recognition.model.VideoLearner object at 0x7fe1397bb550>


# Loading unknown videos for action prediction

In [10]:
# Set up input and output video path
video_file_path = data_path()/"video_clips/"
predicted_file_path = data_path()/"video_clips/predicted/"
print("Number of Youtube Videos for Testing = " + str(len(os.listdir(video_file_path))) + " videos")

Number of Youtube Videos for Testing = 2 videos


In [15]:
# Load video in sequence
for video in os.listdir(video_file_path):
    if os.path.isfile(os.path.join(video_file_path, video)):
        os.environ['inputFile'] = os.path.join(video_file_path, video)
        video_duration = !ffprobe -v error -show_entries format=duration -of default=noprint_wrappers=1:nokey=1 "$inputFile"
        # Count the whole video duration
        for time in video_duration:
            time = int(float(time))
            
        # Parse the video for trimming and action recognition 
        for i in range(1, time-2, 2): 
            print("video name: " + video)
            print("start time at " , i , "second")
            print("end time at " , i+2 , "second")
            # Extract Youtube video ID
            YoutubeID = os.path.splitext(video)[0]
            os.environ['start'] = str(i)
            # trim video into temporary 2 second clip "temp-TRIM.mp4"
            !ffmpeg -hide_banner -loglevel error -i "$inputFile" -ss "$start" -t 2 -c:a copy "./data/video_clips/temp-TRIM.mp4"
            # parse the 2s "temp-TRIM.mp4" into model for action detection
            top_result = actionModel.predict_video("./data/video_clips/temp-TRIM.mp4")
            # remove the trimmed clip for next iteration
            os.remove("./data/video_clips/temp-TRIM.mp4")
            clear_output()

            # pull out the list from top 5 results and write JSON file
            for predict_label, test_acc in top_result:
                # Only pick those results that is detected for target action with confidance greater than 0.8
                if predict_label == "sharpening_knives" and test_acc >= 0.8:
                    video_data=[
                        {
                            "videoId": str(YoutubeID),
                            "type": "segment",
                            "startTime": float(i),
                            "endTime": float(i+2),
                            "observer": "CSCE636-Spring2021-CochiLocoYang-v10",
                            "isHuman": "false",
                            "confirmedBySomeone": "false",
                            "rejectedBySomeone": "false",
                            "observation": {
                                "label": predict_label,
                                "labelConfidence":test_acc
                            }
                        }
                    ]
                    
                with open('CSCE636-Spring2021-CochiLocoYang-v10_unsorted.json', 'a') as video_json_file:
                    json.dump(video_data, video_json_file)
            
        # Move predicted video to another folder
        os.rename(os.path.join(video_file_path, video), os.path.join(predicted_file_path, video))