In [1]:
import cv2
import numpy as np
import pandas as pd
import scipy
from scipy.misc import imread
import cPickle as pickle
import random
import os
import matplotlib.pyplot as plt
from moviepy.video.io.ffmpeg_tools import ffmpeg_extract_subclip
from moviepy.editor import VideoFileClip
import shutil
from PIL import Image
import av
from tqdm import tqdm
%matplotlib inline

In [2]:
from util import *

In [3]:
def get_video_rows(path, feature_extractor, preprocessor, write_frames_dir=None):
    frames_path = None
    if write_frames_dir:
        frames_path = os.path.join(write_frames_dir, os.path.basename(path)+'_frames')
        if os.path.exists(frames_path):
            shutil.rmtree(frames_path)
        os.makedirs(frames_path)
    
    rows = []
    video_name = os.path.basename(path)

    container = av.open(path)

    stream = container.streams.video[0]
    stream.codec_context.skip_frame = 'NONKEY'
    for frame in container.decode(stream):
        img = frame.to_image()
        img = preprocessor(img)
        features = feature_extractor(rgb_to_bgr(np.array(img)))
        row = [path, frame.time] #metadata, features
        for i in range(len(features)):
            row.append(features[i])
        rows.append(tuple(row))
        if frames_path:
            fpath = os.path.join(frames_path, '{}.jpg'.format(str(frame.pts)))
            Image.fromarray(img).save(fpath)
    return rows

def get_dataframe(fpaths, feature_extractor, preprocessor=preprocess_image_load, write_frames_dir=None):
    rows = []
    i = 0
    for fpath in fpaths:
        video_rows = get_video_rows(fpath, feature_extractor=feature_extractor, preprocessor=preprocessor, write_frames_dir=write_frames_dir)
        rows += video_rows
        i+=1
        print 'Done ' + str(round(float(i)/len(fpaths), 2))
    
    cols = ['video_path', 'frame_time']
    for i in range(len(rows[0])-2):
        cols.append('x_'+str(i))
    df = pd.DataFrame(rows, columns=cols, index=range(len(rows)))    
    return df

In [4]:
DATASET_NAME = 'LOCALHIST_CORRUPT'
video_folder = '../data'
frame_dir_path = './frames'
tmp_dir_path = './tmp'

if os.path.exists(frame_dir_path):
    shutil.rmtree(frame_dir_path)
os.makedirs(frame_dir_path)
    
if os.path.exists(tmp_dir_path):
    shutil.rmtree(tmp_dir_path)
os.makedirs(tmp_dir_path)

FEATURE_EXTRACTOR = local_histogram_features

In [5]:
%%time
fpaths = [os.path.join(video_folder, fpath) for fpath in os.listdir(video_folder)]
df = get_dataframe(fpaths, feature_extractor=FEATURE_EXTRACTOR)

Done 0.05
Done 0.1
Done 0.15
Done 0.2
Done 0.25
Done 0.3
Done 0.35
Done 0.4
Done 0.45
Done 0.5
Done 0.55
Done 0.6
Done 0.65
Done 0.7
Done 0.75
Done 0.8
Done 0.85
Done 0.9
Done 0.95
Done 1.0
CPU times: user 26min 47s, sys: 2min 51s, total: 29min 39s
Wall time: 26min 3s


In [6]:
df.head()

Unnamed: 0,video_path,frame_time,x_0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,...,x_890,x_891,x_892,x_893,x_894,x_895,x_896,x_897,x_898,x_899
0,../data/1943 - Victory Through Air Power.avi,0.033367,0.0,0.0,0.0,0.0,0.0,0.140162,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,../data/1943 - Victory Through Air Power.avi,2.035369,0.000108,0.0,0.0,0.0,4.4e-05,0.007993,0.0,0.000355,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,../data/1943 - Victory Through Air Power.avi,2.068735,5.8e-05,0.0,0.0,0.000105,3.6e-05,0.009339,8e-06,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,../data/1943 - Victory Through Air Power.avi,2.102102,1.7e-05,0.0,0.0,0.000147,0.000103,0.008468,1.9e-05,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,../data/1943 - Victory Through Air Power.avi,2.135469,6e-06,0.0,0.0,0.000203,5e-05,0.008313,1.1e-05,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
df.shape

(28087, 902)

In [8]:
def make_subclips(in_path, out_dir, number_subclips=1, subclip_duration=10):
    actual_video_name = os.path.basename(in_path)
    
    subclips_dir = os.path.join(out_dir, actual_video_name+'_subclips')
    if os.path.exists(subclips_dir):
        shutil.rmtree(subclips_dir)
    os.makedirs(subclips_dir)
        
        
    container = VideoFileClip(in_path)
    
    duration = container.duration
    
    fnames = []
    for i in range(number_subclips):
        subclip_start_time = np.random.randint(0, duration-subclip_duration-1)
        subclip_range = (subclip_start_time, subclip_start_time+subclip_duration)
        subclip_fname = os.path.join(subclips_dir, str(subclip_range)+'.'+actual_video_name.split('.')[-1])
        ffmpeg_extract_subclip(in_path, subclip_range[0], subclip_range[1], targetname=subclip_fname)
        fnames.append(subclip_fname)
    return fnames

In [9]:
def preprocess_corrupt(image, params):
    image = image.resize((600, 600))
    image = restore_image(corrupt(image, *params))
    return image

def get_corrupted_dataframe(fpaths, feature_extractor, preprocessor=preprocess_image, write_frames_dir=None):
    rows = []
    i = 0
    for fpath in fpaths:
        cparams = random_corrupt_params()
        def video_frame_corrupt(img):
            return preprocess_corrupt(img, cparams)
        video_rows = get_video_rows(fpath, feature_extractor=feature_extractor, preprocessor=video_frame_corrupt, write_frames_dir=write_frames_dir)
        rows += video_rows
        i+=1
        print 'Done ' + str(round(float(i)/len(fpaths), 2))
    
    cols = ['video_path', 'frame_time']
    for i in range(len(rows[0])-2):
        cols.append('x_'+str(i))
    df = pd.DataFrame(rows, columns=cols, index=range(len(rows)))    
    return df

In [10]:
def get_subclip_dataframe(fpaths, feature_extractor, subclip_dir, remove_subclips=True, number_subclips=10, subclip_duration=10):
    dfs = []
    for fpath in fpaths:
        subclip_corruption = random_corrupt_params()
        subclip_fnames = make_subclips(fpath, subclip_dir, number_subclips=number_subclips, subclip_duration=subclip_duration)
        
        subclip_df = get_corrupted_dataframe(subclip_fnames, feature_extractor=feature_extractor)
        subclip_df['source_fpath'] = fpath
        dfs.append(subclip_df)
        if remove_subclips:
            for fname in subclip_fnames:
                if os.path.exists(fname):
                    os.remove(fname)
            os.rmdir(os.path.dirname(fname))
                
    df = pd.concat(dfs, axis=0)
    return df

In [11]:
fpaths = [os.path.join(video_folder, fpath) for fpath in os.listdir(video_folder)]
subclip_df = get_subclip_dataframe(fpaths, feature_extractor=FEATURE_EXTRACTOR, subclip_dir=tmp_dir_path, number_subclips=10)

Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Done 0.1
Done 0.2
Done 0.3
Done 0.4
Done 0.5
Done 0.6
Done 0.7
Done 0.8
Done 0.9
Done 1.0
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Run

No handlers could be found for logger "libav.avi"


Done 0.2
Done 0.3
Done 0.4
Done 0.5
Done 0.6
Done 0.7
Done 0.8
Done 0.9
Done 1.0
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Done 0.1
Done 0.2
Done 0.3
Done 0.4
Done 0.5
Done 0.6
Done 0.7
Done 0.8
Done 0.9
Done 1.0
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
M

Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Done 0.1
Done 0.2
Done 0.3
Done 0.4
Done 0.5
Done 0.6
Done 0.7
Done 0.8
Done 0.9
Done 1.0
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Command successful
Moviepy - Running:
>>> "+ " ".join(cmd)
Moviepy - Com

In [12]:
subclip_df.head()

Unnamed: 0,video_path,frame_time,x_0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,...,x_891,x_892,x_893,x_894,x_895,x_896,x_897,x_898,x_899,source_fpath
0,./tmp/1943 - Victory Through Air Power.avi_sub...,0.0,3e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,../data/1943 - Victory Through Air Power.avi
1,./tmp/1943 - Victory Through Air Power.avi_sub...,7.173841,0.00038,0.001302,0.000158,0.000158,0.0,0.0,0.000278,0.001946,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,../data/1943 - Victory Through Air Power.avi
2,./tmp/1943 - Victory Through Air Power.avi_sub...,7.207207,3e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,../data/1943 - Victory Through Air Power.avi
3,./tmp/1943 - Victory Through Air Power.avi_sub...,7.807808,3e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,../data/1943 - Victory Through Air Power.avi
4,./tmp/1943 - Victory Through Air Power.avi_sub...,7.874541,3e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,../data/1943 - Victory Through Air Power.avi


In [13]:
subclip_df = subclip_df.rename({'video_path': 'clip_path', 'source_fpath': 'video_path'}, axis=1)
subclip_df.head()

Unnamed: 0,clip_path,frame_time,x_0,x_1,x_2,x_3,x_4,x_5,x_6,x_7,...,x_891,x_892,x_893,x_894,x_895,x_896,x_897,x_898,x_899,video_path
0,./tmp/1943 - Victory Through Air Power.avi_sub...,0.0,3e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,../data/1943 - Victory Through Air Power.avi
1,./tmp/1943 - Victory Through Air Power.avi_sub...,7.173841,0.00038,0.001302,0.000158,0.000158,0.0,0.0,0.000278,0.001946,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,../data/1943 - Victory Through Air Power.avi
2,./tmp/1943 - Victory Through Air Power.avi_sub...,7.207207,3e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,../data/1943 - Victory Through Air Power.avi
3,./tmp/1943 - Victory Through Air Power.avi_sub...,7.807808,3e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,../data/1943 - Victory Through Air Power.avi
4,./tmp/1943 - Victory Through Air Power.avi_sub...,7.874541,3e-06,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,../data/1943 - Victory Through Air Power.avi


In [14]:
subclip_df.video_path.value_counts()

../data/1948 - Melody Time.avi                               244
../data/1943 - Victory Through Air Power.avi                 184
../data/1945 - The Three Caballeros.avi                      135
../data/1940 - Pinocchio.avi                                  50
../data/1948 - So Dear to My Heart.mp4                        46
../data/1947 - Fun and Fancy Free.avi                         43
../data/1937 - Snow White and the Seven Dwarves.avi           41
../data/1949 - The Adventures Of Ichabod And Mr. Toad.m4v     37
../data/The.Young.Pope.S01E02.HDTVRip.Jaskier.avi             24
../data/The.Young.Pope.S01E01.HDTVRip.Jaskier.avi             22
../data/1941 - Dumbo.avi                                      22
../data/The.Young.Pope.S01E04.HDTVRip.Jaskier.avi             19
../data/1942 - Bambi.avi                                      19
../data/The.Young.Pope.S01E03.HDTVRip.Jaskier.avi             18
../data/1946 - Make Mine Music.avi                            18
../data/1928 - Mickey Mou

In [15]:
df.to_csv(DATASET_NAME+'_df.csv', index=False)
subclip_df.to_csv(DATASET_NAME+'_subclips.csv', index=False)