In [64]:
import sys
sys.path.append("../")

In [65]:
import os
import torch
import numpy as np
import pywt
import torchvision.transforms as transforms
from torch.utils.data import Dataset
from moviepy.editor import *
from moviepy.video.fx.all import blackwhite
import librosa
import matplotlib.pyplot as plt
from libs.Lsh import *

In [141]:
class Audio():
  def __init__(self, sr, samples):
    self.sr = sr
    self.samples = samples
class Video():
  def __init__(self, clip:VideoFileClip, fps,duration):
    self.clip = clip
    self.fps = fps
    self.duration = duration
    
  def _get_top_wavelet(self, frame, top=200):
    coeffs2 = pywt.dwt2(self.clip.get_frame(frame)[:,:,0], 'haar')
    LL, (LH, HL, HH) = coeffs2
    img = LL + LH + HL + HH
    img = np.where(img >= top,1,0)
    return img
    
  def fingerprint(self, frame, top=200):
    return self._get_top_wavelet(frame, top)

class Content:
  def __init__(self, name:str, video:Video, audio:Audio):
    self.name = name
    self.video = video
    self.audio = audio


    
  
    

In [143]:
class ContentDataset(Dataset):
    def __init__(self, file_path, cache_into_memory=False):
        self.file_path = file_path
        self.cache_into_memory = cache_into_memory
        self.name_list = self._get_file_names()

    def _get_file_names(self):
        return os.listdir(self.file_path)

    def __getitem__(self, index, frame_size=(256, 144),target_sr=5512):
        video_name = os.path.join(self.file_path, self.name_list[index])
        video_clip = VideoFileClip(video_name).fx(blackwhite).resize(frame_size)
        video = Video(video_clip, video_clip.fps, video_clip.duration)
        
        audio_clip = video_clip.audio
        sr = audio_clip.fps
        audio_clip = audio_clip.to_soundarray()
        audio_clip = librosa.to_mono(audio_clip.T)
        audio_clip = audio_clip.reshape(1, -1)
        audio_clip = librosa.resample(audio_clip, orig_sr=sr, target_sr=5512)
        audio = Audio(target_sr, audio_clip)
        return Content(video_name,video,audio) 
    

    def __len__(self):
        return len(self.name_list)

In [144]:
dataset = ContentDataset("../data/compressed_index/")

In [145]:
content = dataset[0]
content2 = dataset[198] 

In [148]:
fingerprint1 = []
i = 0
supply = 2
while i < content.video.duration:
  frame = content.video.clip.get_frame(i)[:,:,0]
  fingerprint1.append(frame.flatten())
  i+=supply


In [149]:
fingerprint2 = []
i = 0 
supply = 2
while i < content2.video.duration:
  frame = content2.video.clip.get_frame(i)[:,:,0]
  fingerprint2.append(frame.flatten())
  i+=supply

In [150]:
len(fingerprint1),len(fingerprint2)

(818, 1369)

In [151]:
lsh = LSH(threshold=0.81)

In [152]:
lsh.clear()

In [153]:
for i,f in enumerate(fingerprint1):
  lsh.add(f, content.name,i)

In [154]:
sims = {}
for i,f in enumerate(fingerprint2):
  s = lsh.search(f, content2.name,i,add_to_bucket=False)
  if(s is not None):
    # print("This time: ", i*1.48)
    # print(s[1:])
    if i in sims:
      sims[i].append(s)
    else:
      sims[i] = [s]

In [155]:
sims

{0: [([1,
    106,
    45,
    42,
    31,
    252,
    126,
    31,
    120,
    195,
    219,
    222,
    153,
    108,
    125,
    223,
    57,
    21,
    48,
    176,
    57,
    146,
    164,
    137,
    112,
    225,
    165,
    90,
    140,
    127,
    218,
    244,
    16,
    5,
    79,
    92,
    10,
    162,
    65,
    10,
    192,
    213,
    230,
    203,
    235,
    55,
    125,
    167,
    8,
    43,
    147,
    100,
    145,
    98,
    198,
    12,
    30,
    118,
    98,
    205,
    67,
    229,
    217,
    77,
    121,
    130,
    168,
    168,
    140,
    30,
    238,
    243,
    86,
    211,
    64,
    169,
    83,
    79,
    157,
    231,
    151,
    106,
    137,
    26,
    237,
    237,
    140,
    182,
    235,
    56,
    166,
    245,
    72,
    254,
    67,
    231,
    46,
    60,
    216,
    180],
   '../data/compressed_index/023114a7952a10d21c3d5c177b5a8351.mp4',
   2,
   1.0)],
 1: [([1,
    106,
    45,
    42,
    31,
    252,


In [156]:
sotred = dict(sorted(sims.items(), key=lambda x: x[1][0][3], reverse=True))

In [157]:
for key,value in sotred.items():
  print(key, value[0][2:])

0 (2, 1.0)
1 (2, 1.0)
2 (2, 1.0)
3 (2, 1.0)
4 (2, 1.0)
5 (2, 1.0)
6 (2, 1.0)
7 (2, 1.0)
8 (2, 1.0)
9 (2, 1.0)
10 (2, 1.0)
11 (2, 1.0)
12 (2, 1.0)
13 (2, 1.0)
14 (2, 1.0)
15 (2, 1.0)
16 (2, 1.0)
17 (2, 1.0)
18 (2, 1.0)
20 (2, 1.0)
21 (2, 1.0)
22 (2, 1.0)
23 (2, 1.0)
24 (2, 1.0)
25 (2, 1.0)
26 (2, 1.0)
27 (2, 1.0)
28 (2, 1.0)
29 (2, 1.0)
30 (2, 1.0)
31 (2, 1.0)
32 (2, 1.0)
33 (2, 1.0)
34 (2, 1.0)
35 (2, 1.0)
36 (2, 1.0)
37 (2, 1.0)
38 (2, 1.0)
39 (2, 1.0)
40 (2, 1.0)
41 (2, 1.0)
42 (2, 1.0)
43 (2, 1.0)
44 (2, 1.0)
45 (2, 1.0)
46 (2, 1.0)
47 (2, 1.0)
48 (2, 1.0)
49 (2, 1.0)
50 (2, 1.0)
51 (2, 1.0)
52 (2, 1.0)
53 (2, 1.0)
54 (2, 1.0)
55 (2, 1.0)
56 (2, 1.0)
57 (2, 1.0)
58 (2, 1.0)
59 (2, 1.0)
60 (2, 1.0)
61 (2, 1.0)
62 (2, 1.0)
63 (2, 1.0)
64 (2, 1.0)
65 (2, 1.0)
66 (2, 1.0)
67 (2, 1.0)
68 (2, 1.0)
69 (2, 1.0)
70 (2, 1.0)
71 (2, 1.0)
72 (2, 1.0)
73 (2, 1.0)
74 (2, 1.0)
75 (2, 1.0)
76 (2, 1.0)
77 (2, 1.0)
78 (2, 1.0)
79 (2, 1.0)
80 (2, 1.0)
81 (2, 1.0)
82 (2, 1.0)
83 (2, 1.0)
84 (2, 1.0)
85

In [79]:
def build_fingerprint_from_frame(content, from_frame,to_frame):
  fingerprint = []
  supply = 2
  for i in range(from_frame,to_frame):
    frame = content.video.clip.get_frame(i)[:,:,0]
    fingerprint.append(frame.flatten())
    i = i + supply
  return fingerprint
    
  

In [80]:
fingerprints_to_check = {}
for key,value in sotred.items():
  m = lsh.get_min_hash()
  fingerprint = content2.video.clip.get_frame(key)[:,:,0].flatten()
  fingerprint = m.hash(fingerprint)
  if len(fingerprint) == 100:
    jac = m.jaccard_similarity(fingerprint, value[0][0])
    if(jac > 0.98):
      fingerprints_to_check[key] = build_fingerprint_from_frame(content2, key, key+60)
  break

In [81]:
len(fingerprints_to_check)

0

In [82]:
#get max time range based on synced fingerprints
def get_max_time(sync_fingerprint,sync_frame):
  ranges = []
  range_i = []
  for i,f in enumerate(sync_fingerprint):
    s = lsh.search(f,"Second video",i+sync_frame,add_to_bucket=False)
    if(s is not None):
      print(i,f,s[0])
      # this_min, this_sec = divmod((i+sync_frame//128)*1.48, 60)
      # this_time_str = '{:02d}:{:02d}'.format(int(this_min), int(this_sec)) 
      # target_min, target_sec = divmod(s[2]*1.48, 60)
      # target_time_str = '{:02d}:{:02d}'.format(int(target_min), int(target_sec))
      if(len(range_i) == 0):
        range_i = [i+sync_frame, s[2]]
      else:
        #430 - 5 sec
        if(s[2] - range_i[1]< 4):
          range_i[1] = s[2]
        else:
          ranges.append(range_i)
          range_i = [i+sync_frame, s[2]]

  if(len(range_i) > 0):
    ranges.append(range_i)

  return ranges

In [83]:
for key,value in fingerprints_to_check.items():
  print(key)
  for v in value:
    print(np.count_nonzero(v))
  # max_ranges = get_max_time(value, key)
  # for i in max_ranges:
  #   print(key, i[0]*2/60, i[1]*2/60)

In [84]:
#build fingerprint based on sync frames
sync_fingerprint = []
supply = 2
max_ranges = []
for key in sync_frames:
  i = key
  while i < i + 30:
    frame = content2.video.clip.get_frame(i)[:,:,0]
    frame_normalized = (frame - frame.min()) / (frame.max() - frame.min())
    sync_fingerprint.append(content2.video.fingerprint(i).flatten())
    i+=supply
  max_ranges.append(get_max_time(sync_fingerprint,i))
  sync_fingerprint = []


KeyboardInterrupt: 

In [None]:
for key,value in sotred.items():
  fr = get_sync_frame(key, value[0][0])
  if(fr != -1):
    print(key*2/60,fr)

10.9 327
10.933333333333334 328
