In [1]:
import cv2
import faiss
import os
import numpy as np
import pandas as pd
import scipy
from scipy.spatial.distance import cosine
import random
from sklearn.metrics import classification_report, accuracy_score
import seaborn as sns
%matplotlib inline
sns.set_style('white')

In [2]:
from vzam import *

In [3]:
TMP_DIR = 'tmp'
FRAMES_DIR = 'tmp/frames'
TRAIN_VIDEOS_DIR = 'data/train_videos'
TEST_VIDEOS_DIR = 'data/test_videos'

In [4]:
fpaths = [os.path.join(TRAIN_VIDEOS_DIR, fname) for fname in os.listdir(TRAIN_VIDEOS_DIR)]

In [5]:
processed_paths = [os.path.join(TMP_DIR, '_'+fname) for fname in os.listdir(TRAIN_VIDEOS_DIR)]

In [654]:
for source, processed in zip(fpaths, processed_paths):
    try:
        preprocess_video(source, processed, resize=(256, 256), target_framerate=3)
    except Exception as e:
        print(e)
    print(source)

ffmpeg version 4.0.2 Copyright (c) 2000-2018 the FFmpeg developers
  built with gcc 4.8.2 (GCC) 20140120 (Red Hat 4.8.2-15)
  configuration: --prefix=/home/boris/anaconda3/envs/prototype --disable-doc --disable-openssl --enable-shared --enable-static --extra-cflags='-Wall -g -m64 -pipe -O3 -march=x86-64 -fPIC' --extra-cxxflags='-Wall -g -m64 -pipe -O3 -march=x86-64 -fPIC' --extra-libs='-lpthread -lm -lz' --enable-zlib --enable-pic --enable-pthreads --enable-gpl --enable-version3 --enable-hardcoded-tables --enable-avresample --enable-libfreetype --enable-gnutls --enable-libx264 --enable-libopenh264
  libavutil      56. 14.100 / 56. 14.100
  libavcodec     58. 18.100 / 58. 18.100
  libavformat    58. 12.100 / 58. 12.100
  libavdevice    58.  3.100 / 58.  3.100
  libavfilter     7. 16.100 /  7. 16.100
  libavresample   4.  0.  0 /  4.  0.  0
  libswscale      5.  1.100 /  5.  1.100
  libswresample   3.  1.100 /  3.  1.100
  libpostproc    55.  1.100 / 55.  1.100
Input #0, mov,mp4,m4a,3gp,

In [655]:
processed_paths

['tmp/_1948 - So Dear to My Heart.mp4',
 'tmp/_1928 - Mickey Mouse -  Steamboat Willie.mp4',
 'tmp/_1947 - Fun and Fancy Free.avi',
 'tmp/_1929 - Mickey Mouse - Plane Crazy.avi',
 'tmp/_1949 - The Adventures Of Ichabod And Mr. Toad.m4v']

In [6]:

def rHash(image, hash_size=64):# words=12, bits=9):
    image = np.asarray(image)
    
    n_blocks_w = np.sqrt(hash_size)
    n_blocks_h = np.sqrt(hash_size)
    
    block_width = int(len(image)//n_blocks_w)
    block_height = int(len(image)//n_blocks_h)
    
    assert image.shape[0] % n_blocks_w == 0
    assert image.shape[1] % n_blocks_h == 0
    
    block_means = []
    for i in range(0, len(image), block_height):
        for j in range(0, image.shape[1], block_width):
            mean = np.mean(image[i:i+block_height,j:j+block_width])
            block_means.append(mean)
    fingerprint = np.array(block_means) >= np.median(block_means)
    return fingerprint.astype(int)


def quandrant_rHash(image, hash_size=64):
    image = np.asarray(image)
    mid_x = len(image) // 2
    mid_y = image.shape[1] // 2

    hsize = hash_size // 4
    fingerprints = []
    fingerprints.append(rHash(image[0:mid_x,0:mid_y],  hsize))
    fingerprints.append(rHash(image[mid_x:,0:mid_y],  hsize))
    fingerprints.append(rHash(image[0:mid_x,mid_y:],  hsize))
    fingerprints.append(rHash(image[mid_x:,mid_y:],  hsize))
    return np.hstack(fingerprints)


def extract_tiri_rhashes(video_fpath,
                 buffer_size=15,
                 gamma=1.65):
    """
    :param video_fpath: str, path to video file
    :param buffer_size: int, amount of images to average
    :param gamma: float, exponential weighting parameter
    :return:
    """
    container = av.open(video_fpath)

    stream = container.streams.video[0]
    frame_idx = 0
    rhashes = []
    timestamps = []

    buffer_images = []
    for frame in container.decode(stream):
        frame_idx+=1
        img = np.array(frame.to_image().convert('L')) / 256
        buffer_images.append(img)
        
        if len(buffer_images) == buffer_size:
            tiri = exponentially_weighted_average(buffer_images, gamma)
            rhashes.append(quandrant_rHash(tiri))
            timestamps.append(round(frame.time,3))
            buffer_images = []
        if frame_idx % 10000 == 0:
            print(frame_idx)
    return rhashes, timestamps

def get_rhash_df(fpaths):
    df = None
    for video_path in fpaths:
        print(video_path)
        video_rhashes, video_timestamps = extract_tiri_rhashes(video_path)
        video_id = os.path.basename(video_path)
        
        video_df = pd.DataFrame({'feature': video_rhashes, 'ts': video_timestamps})
        video_df['id'] = video_id
        if df is None:
            df = video_df
        else:
            df = pd.concat([df, video_df], axis=0,  ignore_index=True)
    return df

In [7]:
train_df = get_rhash_df(processed_paths)

tmp/_1948 - So Dear to My Heart.mp4
10000
tmp/_1928 - Mickey Mouse -  Steamboat Willie.mp4
tmp/_1947 - Fun and Fancy Free.avi
10000
tmp/_1929 - Mickey Mouse - Plane Crazy.avi
tmp/_1949 - The Adventures Of Ichabod And Mr. Toad.m4v
10000


In [8]:
train_df.head()

Unnamed: 0,feature,ts,id
0,"[0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, ...",4.667,_1948 - So Dear to My Heart.mp4
1,"[0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, ...",9.667,_1948 - So Dear to My Heart.mp4
2,"[0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, ...",14.667,_1948 - So Dear to My Heart.mp4
3,"[0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, ...",19.667,_1948 - So Dear to My Heart.mp4
4,"[1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, ...",24.667,_1948 - So Dear to My Heart.mp4


In [11]:
train_df.shape

(2805, 3)

In [12]:
frame_times = train_df['ts']
labels = train_df.id.values
frame_vectors = np.vstack(train_df['feature'].values)
frame_vectors

array([[0, 0, 0, ..., 1, 1, 0],
       [0, 0, 0, ..., 0, 1, 0],
       [0, 1, 1, ..., 0, 0, 0],
       ...,
       [1, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [13]:
def clear_label_df(label_df):
    bad_indices = []
    
    current_query_pos = None
    earliest_query_ts = {}
    for index, row in label_df.iterrows():
        if not current_query_pos:
            current_query_pos = row.query_vec
        if row.query_vec > current_query_pos and row.ts < earliest_query_ts[current_query_pos]:
            bad_indices.append(index)
            continue
            
        if not earliest_query_ts.get(row.query_vec) or row.ts < earliest_query_ts[row.query_vec]:
            earliest_query_ts[row.query_vec] = row.ts
            
        if row.query_vec > current_query_pos+1:
            current_query_pos = row.query_vec
            
    clean_label_df = label_df.drop(bad_indices)
    
    ts = clean_label_df.ts
    clean_label_df = clean_label_df[(ts.median() - 3 * ts.std() <= ts) & (ts <= ts.median() + 3 * ts.std())]
    clean_label_df = clean_label_df.groupby(['query_vec'])['dist', 'label', 'ts'].min()
    return clean_label_df

class FaissRhashVideoSearcher:
    
    def __init__(self, vectors, labels, timestamps, dist_threshold=10):
        self.d = d = vectors.shape[1]
        self.vectors = vectors.astype('uint8')
        self.labels = labels
        self.timestamps = timestamps
        self.dist_threshold = dist_threshold
        self.index = faiss.IndexBinaryFlat(self.vectors.shape[1]*8)
        # self.index = faiss.IndexIVFFlat(self.quantizer,
        #                                 self.vectors.shape[1],
        #                                 int(ncells * len(self.vectors)))
        # self.index = faiss.index_binary_factory(d, "BIVF32")
        
        self.index.train(self.vectors)
        self.index.add(self.vectors)

    def lookup(self, vectors):
        vectors = np.asarray(vectors).astype('uint8')
        D, I = self.index.search(vectors, 1)
        min_indices, min_dists = I.flatten(), D.flatten()
        votes = self.labels[min_indices]
        timestamps = self.timestamps[min_indices]
        miss_mask = min_dists > self.dist_threshold
        votes[miss_mask] = 'miss'

        moc = max([(list(votes).count(chr), chr) for chr in set(votes)])
        moc = moc[1]
        return moc, votes, timestamps, min_dists, min_indices
    
    def lookup_fun(self, vectors, conf_threshold=0.7):
        vectors = np.asarray(vectors).astype('uint8')
        D, I = self.index.search(vectors, 10)
        
        labels = self.labels[I]
        timestamps = self.timestamps.values[I]
        candidates = []
        for i in range(len(vectors)):
            for j in range(I.shape[1]):
                dist = D[i][j]
                if dist > self.dist_threshold:
                    continue
                candidate = {'query_vec': i,
                             'dist': D[i][j],
                             'label': labels[i][j],
                             'ts': timestamps[i][j],
                             }
                candidates.append(candidate)
        df = pd.DataFrame(candidates, columns=['query_vec', 'dist', 'label', 'ts']).sort_values(by=['label', 'query_vec', 'ts'])
        potential_labels = df.label.unique()    
        predictions = []
        for label in potential_labels:
            label_df = df[df.label == label]
            clean_label_df = clear_label_df(label_df)
            confidence = len(clean_label_df)/len(vectors)
            if confidence < conf_threshold:
                continue
            mean_dist = clean_label_df.dist.mean()
            predictions.append((label, confidence, mean_dist))
        predictions = sorted(predictions, key=lambda x: (-x[1], x[2]))
        return predictions
    

In [14]:
clf = FaissRhashVideoSearcher(frame_vectors, labels, frame_times)

In [15]:
train_df.head()

Unnamed: 0,feature,ts,id
0,"[0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, ...",4.667,_1948 - So Dear to My Heart.mp4
1,"[0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 0, 1, 1, ...",9.667,_1948 - So Dear to My Heart.mp4
2,"[0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 1, ...",14.667,_1948 - So Dear to My Heart.mp4
3,"[0, 0, 0, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, ...",19.667,_1948 - So Dear to My Heart.mp4
4,"[1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, ...",24.667,_1948 - So Dear to My Heart.mp4


In [16]:
moc, votes, timestamps, min_dists, min_indices = clf.lookup([train_df.loc[0].feature])
print(moc)

_1948 - So Dear to My Heart.mp4


In [17]:
test_videos = [os.path.join(TEST_VIDEOS_DIR, f) for f in os.listdir(TEST_VIDEOS_DIR)]

In [18]:
processed_test_paths = [os.path.join(TMP_DIR, '_'+fname) for fname in os.listdir(TEST_VIDEOS_DIR)]

In [19]:
for source, processed in zip(test_videos, processed_test_paths):
    try:
        preprocess_video(source, processed, resize=(256, 256), target_framerate=10)
    except Exception as e:
        print(e)
    print(source)

ffmpeg version 4.0.2 Copyright (c) 2000-2018 the FFmpeg developers
  built with gcc 4.8.2 (GCC) 20140120 (Red Hat 4.8.2-15)
  configuration: --prefix=/home/boris/anaconda3/envs/prototype --disable-doc --disable-openssl --enable-shared --enable-static --extra-cflags='-Wall -g -m64 -pipe -O3 -march=x86-64 -fPIC' --extra-cxxflags='-Wall -g -m64 -pipe -O3 -march=x86-64 -fPIC' --extra-libs='-lpthread -lm -lz' --enable-zlib --enable-pic --enable-pthreads --enable-gpl --enable-version3 --enable-hardcoded-tables --enable-avresample --enable-libfreetype --enable-gnutls --enable-libx264 --enable-libopenh264
  libavutil      56. 14.100 / 56. 14.100
  libavcodec     58. 18.100 / 58. 18.100
  libavformat    58. 12.100 / 58. 12.100
  libavdevice    58.  3.100 / 58.  3.100
  libavfilter     7. 16.100 /  7. 16.100
  libavresample   4.  0.  0 /  4.  0.  0
  libswscale      5.  1.100 /  5.  1.100
  libswresample   3.  1.100 /  3.  1.100
  libpostproc    55.  1.100 / 55.  1.100
[mpeg4 @ 0x1c97c40] Failed

In [20]:
# FUN LOOKUP
for source_path, test_path in zip(test_videos, processed_test_paths):
    test_df = get_rhash_df([test_path])
    if test_df.empty:
        print('df empty')
        continue
    true_label = os.path.basename(source_path)
    
    features = np.vstack(test_df.feature.values)
    predictions = clf.lookup_fun(features)
    predicted_label = predictions if predictions else 'miss'
    print('True label: \t', true_label)
    print('Predictions and confidences: \t', predicted_label)
    print('Final prediction', predicted_label[0])


tmp/_1947 - Fun and Fancy Free_shortey.avi
True label: 	 1947 - Fun and Fancy Free_shortey.avi
Predictions and confidences: 	 miss
Final prediction m
tmp/_the_fun_and_fancy_tree_6.mp4
True label: 	 the_fun_and_fancy_tree_6.mp4
Predictions and confidences: 	 [('_1947 - Fun and Fancy Free.avi', 1.0, 2.2857142857142856), ('_1949 - The Adventures Of Ichabod And Mr. Toad.m4v', 0.8571428571428571, 4.0)]
Final prediction ('_1947 - Fun and Fancy Free.avi', 1.0, 2.2857142857142856)
tmp/_the_fun_and_fancy_tree_4.mp4
True label: 	 the_fun_and_fancy_tree_4.mp4
Predictions and confidences: 	 miss
Final prediction m
tmp/_miss.mp4
True label: 	 miss.mp4
Predictions and confidences: 	 miss
Final prediction m
tmp/_the_fun_and_fancy_tree_2.mp4
True label: 	 the_fun_and_fancy_tree_2.mp4
Predictions and confidences: 	 [('_1947 - Fun and Fancy Free.avi', 0.7142857142857143, 6.4), ('_1949 - The Adventures Of Ichabod And Mr. Toad.m4v', 0.7142857142857143, 8.4)]
Final prediction ('_1947 - Fun and Fancy Free.a

In [669]:
for source_path, test_path in zip(test_videos, processed_test_paths):
    test_df = get_rhash_df([test_path])
    if test_df.empty:
        print('df empty')
        continue
    true_label = os.path.basename(source_path)
    
    features = np.vstack(test_df.feature.values)
    moc, votes, timestamps, min_dists, min_indices = clf.lookup(features)
    predicted_label = moc
    print('True label: \t', true_label)
    print('Predicted label: \t', predicted_label)

tmp/_1947 - Fun and Fancy Free_shortey.avi
True label: 	 1947 - Fun and Fancy Free_shortey.avi
Predicted label: 	 _1947 - Fun and Fancy Free.avi
tmp/_the_fun_and_fancy_tree_6.mp4
True label: 	 the_fun_and_fancy_tree_6.mp4
Predicted label: 	 _1947 - Fun and Fancy Free.avi
tmp/_the_fun_and_fancy_tree_4.mp4
True label: 	 the_fun_and_fancy_tree_4.mp4
Predicted label: 	 _1948 - So Dear to My Heart.mp4
tmp/_miss.mp4
True label: 	 miss.mp4
Predicted label: 	 miss
tmp/_the_fun_and_fancy_tree_2.mp4
True label: 	 the_fun_and_fancy_tree_2.mp4
Predicted label: 	 _1948 - So Dear to My Heart.mp4
tmp/_the_fun_and_fancy_tree_3.mp4
True label: 	 the_fun_and_fancy_tree_3.mp4
Predicted label: 	 _1949 - The Adventures Of Ichabod And Mr. Toad.m4v
tmp/_the_fun_and_fancy_tree_5.mp4
True label: 	 the_fun_and_fancy_tree_5.mp4
Predicted label: 	 _1948 - So Dear to My Heart.mp4
tmp/_the_fun_and_fancy_tree_1.mp4
True label: 	 the_fun_and_fancy_tree_1.mp4
Predicted label: 	 _1947 - Fun and Fancy Free.avi
