In [8]:
import cv2
import faiss
import os
import numpy as np
import pandas as pd
import scipy
from scipy.spatial.distance import cosine
import random
from sklearn.metrics import classification_report, accuracy_score
import seaborn as sns
from vzam.searcher import FaissRhashVideoSearcher
%matplotlib inline
sns.set_style('white')

In [9]:
from vzam import *

In [10]:
TMP_DIR = 'tmp'
FRAMES_DIR = 'tmp/frames'
TRAIN_VIDEOS_DIR = 'data/train_videos'
TEST_VIDEOS_DIR = 'data/test_videos'

In [11]:
fpaths = [os.path.join(TRAIN_VIDEOS_DIR, fname) for fname in os.listdir(TRAIN_VIDEOS_DIR)]

In [12]:
processed_paths = [os.path.join(TMP_DIR, '_'+fname) for fname in os.listdir(TRAIN_VIDEOS_DIR)]

In [23]:
for source, processed in zip(fpaths, processed_paths):
    try:
        preprocess_video(source, processed, resize=(256, 256), target_framerate=5)
    except Exception as e:
        print(e)
    print(source)

ffmpeg version 4.0.2 Copyright (c) 2000-2018 the FFmpeg developers
  built with gcc 4.8.2 (GCC) 20140120 (Red Hat 4.8.2-15)
  configuration: --prefix=/home/boris/anaconda3/envs/prototype --disable-doc --disable-openssl --enable-shared --enable-static --extra-cflags='-Wall -g -m64 -pipe -O3 -march=x86-64 -fPIC' --extra-cxxflags='-Wall -g -m64 -pipe -O3 -march=x86-64 -fPIC' --extra-libs='-lpthread -lm -lz' --enable-zlib --enable-pic --enable-pthreads --enable-gpl --enable-version3 --enable-hardcoded-tables --enable-avresample --enable-libfreetype --enable-gnutls --enable-libx264 --enable-libopenh264
  libavutil      56. 14.100 / 56. 14.100
  libavcodec     58. 18.100 / 58. 18.100
  libavformat    58. 12.100 / 58. 12.100
  libavdevice    58.  3.100 / 58.  3.100
  libavfilter     7. 16.100 /  7. 16.100
  libavresample   4.  0.  0 /  4.  0.  0
  libswscale      5.  1.100 /  5.  1.100
  libswresample   3.  1.100 /  3.  1.100
  libpostproc    55.  1.100 / 55.  1.100
Input #0, mov,mp4,m4a,3gp,

In [24]:
processed_paths

['tmp/_1948 - So Dear to My Heart.mp4',
 'tmp/_1928 - Mickey Mouse -  Steamboat Willie.mp4',
 'tmp/_1947 - Fun and Fancy Free.avi',
 'tmp/_1929 - Mickey Mouse - Plane Crazy.avi',
 'tmp/_1949 - The Adventures Of Ichabod And Mr. Toad.m4v']

In [25]:

train_df = get_rhash_df(processed_paths)

tmp/_1948 - So Dear to My Heart.mp4
10000
20000
tmp/_1928 - Mickey Mouse -  Steamboat Willie.mp4
tmp/_1947 - Fun and Fancy Free.avi
10000
20000
tmp/_1929 - Mickey Mouse - Plane Crazy.avi
tmp/_1949 - The Adventures Of Ichabod And Mr. Toad.m4v
10000
20000


moov atom not found
 (repeated 2 more times)


In [26]:
train_df.head()

Unnamed: 0,feature,ts,id
0,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",2.8,_1948 - So Dear to My Heart.mp4
1,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, ...",5.8,_1948 - So Dear to My Heart.mp4
2,"[0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, ...",8.8,_1948 - So Dear to My Heart.mp4
3,"[1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, ...",11.8,_1948 - So Dear to My Heart.mp4
4,"[0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, ...",14.8,_1948 - So Dear to My Heart.mp4


In [27]:
train_df.shape

(4675, 3)

In [28]:
frame_times = train_df['ts']
labels = train_df.id.values
frame_vectors = np.vstack(train_df['feature'].values)
frame_vectors

array([[1, 1, 1, ..., 1, 1, 1],
       [0, 0, 0, ..., 1, 1, 0],
       [0, 1, 1, ..., 0, 1, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [29]:
clf = FaissRhashVideoSearcher(frame_vectors, labels, frame_times)

In [30]:
train_df.head()

Unnamed: 0,feature,ts,id
0,"[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...",2.8,_1948 - So Dear to My Heart.mp4
1,"[0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, ...",5.8,_1948 - So Dear to My Heart.mp4
2,"[0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, ...",8.8,_1948 - So Dear to My Heart.mp4
3,"[1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, ...",11.8,_1948 - So Dear to My Heart.mp4
4,"[0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 1, 0, ...",14.8,_1948 - So Dear to My Heart.mp4


In [32]:
moc, votes, timestamps, min_dists, min_indices = clf.lookup_mv([train_df.loc[0].feature])
print(moc)

_1948 - So Dear to My Heart.mp4


In [33]:
test_videos = [os.path.join(TEST_VIDEOS_DIR, f) for f in os.listdir(TEST_VIDEOS_DIR)]

In [34]:
processed_test_paths = [os.path.join(TMP_DIR, '_'+fname) for fname in os.listdir(TEST_VIDEOS_DIR)]

In [35]:
for source, processed in zip(test_videos, processed_test_paths):
    try:
        preprocess_video(source, processed, resize=(256, 256), target_framerate=10)
    except Exception as e:
        print(e)
    print(source)

ffmpeg version 4.0.2 Copyright (c) 2000-2018 the FFmpeg developers
  built with gcc 4.8.2 (GCC) 20140120 (Red Hat 4.8.2-15)
  configuration: --prefix=/home/boris/anaconda3/envs/prototype --disable-doc --disable-openssl --enable-shared --enable-static --extra-cflags='-Wall -g -m64 -pipe -O3 -march=x86-64 -fPIC' --extra-cxxflags='-Wall -g -m64 -pipe -O3 -march=x86-64 -fPIC' --extra-libs='-lpthread -lm -lz' --enable-zlib --enable-pic --enable-pthreads --enable-gpl --enable-version3 --enable-hardcoded-tables --enable-avresample --enable-libfreetype --enable-gnutls --enable-libx264 --enable-libopenh264
  libavutil      56. 14.100 / 56. 14.100
  libavcodec     58. 18.100 / 58. 18.100
  libavformat    58. 12.100 / 58. 12.100
  libavdevice    58.  3.100 / 58.  3.100
  libavfilter     7. 16.100 /  7. 16.100
  libavresample   4.  0.  0 /  4.  0.  0
  libswscale      5.  1.100 /  5.  1.100
  libswresample   3.  1.100 /  3.  1.100
  libpostproc    55.  1.100 / 55.  1.100
[mpeg4 @ 0xc9fc40] Failed 

In [38]:
# FUN LOOKUP
for source_path, test_path in zip(test_videos, processed_test_paths):
    test_df = get_rhash_df([test_path])
    if test_df.empty:
        print('df empty')
        continue
    true_label = os.path.basename(source_path)
    
    features = np.vstack(test_df.feature.values)
    predictions = clf.lookup(features)
    predicted_label = predictions if predictions else 'miss'
    print('True label: \t', true_label)
    print('Predictions and confidences: \t', predicted_label)
    print('Final prediction', predicted_label[0])
    print('')


tmp/_1947 - Fun and Fancy Free_shortey.avi
True label: 	 1947 - Fun and Fancy Free_shortey.avi
Predictions and confidences: 	 [('_1947 - Fun and Fancy Free.avi', 1.0, 3.0)]
Final prediction ('_1947 - Fun and Fancy Free.avi', 1.0, 3.0)

tmp/_the_fun_and_fancy_tree_6.mp4
True label: 	 the_fun_and_fancy_tree_6.mp4
Predictions and confidences: 	 [('_1947 - Fun and Fancy Free.avi', 1.0, 4.285714285714286), ('_1949 - The Adventures Of Ichabod And Mr. Toad.m4v', 0.7142857142857143, 2.0)]
Final prediction ('_1947 - Fun and Fancy Free.avi', 1.0, 4.285714285714286)

tmp/_the_fun_and_fancy_tree_4.mp4
True label: 	 the_fun_and_fancy_tree_4.mp4
Predictions and confidences: 	 [('_1947 - Fun and Fancy Free.avi', 0.8333333333333334, 9.2), ('_1948 - So Dear to My Heart.mp4', 0.8333333333333334, 9.2)]
Final prediction ('_1947 - Fun and Fancy Free.avi', 0.8333333333333334, 9.2)

tmp/_miss.mp4
True label: 	 miss.mp4
Predictions and confidences: 	 miss
Final prediction m

tmp/_the_fun_and_fancy_tree_2.mp4


In [37]:
for source_path, test_path in zip(test_videos, processed_test_paths):
    test_df = get_rhash_df([test_path])
    if test_df.empty:
        print('df empty')
        continue
    true_label = os.path.basename(source_path)
    
    features = np.vstack(test_df.feature.values)
    moc, votes, timestamps, min_dists, min_indices = clf.lookup_mv(features)
    predicted_label = moc
    print('True label: \t', true_label)
    print('Predicted label: \t', predicted_label)

tmp/_1947 - Fun and Fancy Free_shortey.avi
True label: 	 1947 - Fun and Fancy Free_shortey.avi
Predicted label: 	 _1947 - Fun and Fancy Free.avi
tmp/_the_fun_and_fancy_tree_6.mp4
True label: 	 the_fun_and_fancy_tree_6.mp4
Predicted label: 	 _1949 - The Adventures Of Ichabod And Mr. Toad.m4v
tmp/_the_fun_and_fancy_tree_4.mp4
True label: 	 the_fun_and_fancy_tree_4.mp4
Predicted label: 	 _1948 - So Dear to My Heart.mp4
tmp/_miss.mp4
True label: 	 miss.mp4
Predicted label: 	 miss
tmp/_the_fun_and_fancy_tree_2.mp4
True label: 	 the_fun_and_fancy_tree_2.mp4
Predicted label: 	 _1948 - So Dear to My Heart.mp4
tmp/_the_fun_and_fancy_tree_3.mp4
True label: 	 the_fun_and_fancy_tree_3.mp4
Predicted label: 	 _1948 - So Dear to My Heart.mp4
tmp/_the_fun_and_fancy_tree_5.mp4
True label: 	 the_fun_and_fancy_tree_5.mp4
Predicted label: 	 miss
tmp/_the_fun_and_fancy_tree_1.mp4
True label: 	 the_fun_and_fancy_tree_1.mp4
Predicted label: 	 _1948 - So Dear to My Heart.mp4
