In [52]:
import sys
sys.path.append("../")

In [53]:
import os
import torch
import numpy as np
import pywt
import torchvision.transforms as transforms
from torch.utils.data import Dataset
from moviepy.editor import *
from moviepy.video.fx.all import blackwhite
import librosa
import matplotlib.pyplot as plt
from libs.Lsh import *

In [71]:
class Content:
  def __init__(self, name:str, video:Video, audio:Audio):
    self.name = name
    self.video = None
    self.audio = None

class Audio():
  def __init__(self, sr, samples):
    self.sr = sr
    self.samples = samples
class Video():
  def __init__(self, clip, fps,duration):
    self.clip = clip
    self.fps = fps
    self.duration = duration
    
  def _get_top_wavelet(self, frame, top=200):
    coeffs2 = pywt.dwt2(self.clip.get_frame(frame)[:,:,0], 'haar')
    LL, (LH, HL, HH) = coeffs2
    LL = np.where(LL >= np.sort(LL.flatten())[-top], 1, 0)
    LH = np.where(LH >= np.sort(LH.flatten())[-top], 1, 0)
    HL = np.where(HL >= np.sort(HL.flatten())[-top], 1, 0)
    HH = np.where(HH >= np.sort(HH.flatten())[-top], 1, 0)
    img = LL + LH + HL + HH
    img = np.where(img != 0,1,0)
    return img
    
  def fingerprint(self, frame, top=200):
    return self._get_top_wavelet(frame, top)
    

In [55]:
class ContentDataset(Dataset):
    def __init__(self, file_path, cache_into_memory=False):
        self.file_path = file_path
        self.cache_into_memory = cache_into_memory
        self.name_list = self._get_file_names()

    def _get_file_names(self):
        return os.listdir(self.file_path)

    def __getitem__(self, index, frame_size=(256, 144),target_sr=5512):
        video_name = os.path.join(self.file_path, self.name_list[index])
        video_clip = VideoFileClip(video_name).fx(blackwhite).resize(frame_size)
        video = Video(video_clip, video_clip.fps, video_clip.duration)
        
        audio_clip = video_clip.audio
        sr = audio_clip.fps
        audio_clip = audio_clip.to_soundarray()
        audio_clip = librosa.to_mono(audio_clip.T)
        audio_clip = audio_clip.reshape(1, -1)
        audio_clip = librosa.resample(audio_clip, orig_sr=sr, target_sr=5512)
        audio = Audio(target_sr, audio_clip)
        return Content(video_name,video,audio) 
    

    def __len__(self):
        return len(self.name_list)

In [56]:
dataset = ContentDataset("../data/compressed_index/")

In [57]:
clip = dataset[0]
clip2 = dataset[198] 

In [58]:
fingerprint1 = []
i = 0
supply = 2
while i < clip.duration:
  fingerprint1.append(clip.fingerprint(i).flatten())
  i+=supply


In [59]:
fingerprint2 = []
i = 0
while i < clip.duration:
  fingerprint2.append(clip.fingerprint(i).flatten())
  i+=supply


In [60]:
lsh = LSH(threshold=0.81)

In [61]:
for i,f in enumerate(fingerprint1):
  lsh.search(f, clip.name, i)

In [62]:
sims = {}
for i,f in enumerate(fingerprint2):
  s = lsh.search(f, clip2.name,i,add_to_bucket=False)
  if(s is not None):
    # print("This time: ", i*1.48)
    # print(s[1:])
    if i in sims:
      sims[i].append(s)
    else:
      sims[i] = [s]

In [63]:
sotred = dict(sorted(sims.items(), key=lambda x: x[1][0][3]))

In [70]:
for key,value in sotred.items():
  print(key, value[0][2:])

801 (797, 0.9333735468304453)
803 (797, 0.9500661565897717)
0 (0, 1.0)
1 (0, 1.0)
3 (3, 1.0)
5 (5, 1.0)
6 (6, 1.0)
7 (7, 1.0)
8 (8, 1.0)
9 (9, 1.0)
11 (11, 1.0)
12 (12, 1.0)
13 (13, 1.0)
14 (14, 1.0)
15 (15, 1.0)
16 (16, 1.0)
17 (17, 1.0)
18 (18, 1.0)
19 (19, 1.0)
20 (20, 1.0)
21 (21, 1.0)
22 (22, 1.0)
23 (23, 1.0)
24 (24, 1.0)
25 (25, 1.0)
26 (26, 1.0)
27 (27, 1.0)
28 (28, 1.0)
29 (29, 1.0)
30 (30, 1.0)
31 (31, 1.0)
32 (32, 1.0)
33 (33, 1.0)
34 (34, 1.0)
35 (35, 1.0)
36 (36, 1.0)
37 (37, 1.0)
38 (38, 1.0)
40 (40, 1.0)
41 (41, 1.0)
42 (42, 1.0)
43 (43, 1.0)
44 (44, 1.0)
45 (45, 1.0)
46 (46, 1.0)
47 (47, 1.0)
49 (49, 1.0)
50 (50, 1.0)
52 (52, 1.0)
53 (53, 1.0)
54 (54, 1.0)
57 (57, 1.0)
58 (58, 1.0)
59 (59, 1.0)
61 (61, 1.0)
62 (62, 1.0)
63 (63, 1.0)
64 (64, 1.0)
65 (65, 1.0)
66 (66, 1.0)
67 (67, 1.0)
68 (68, 1.0)
69 (69, 1.0)
70 (70, 1.0)
71 (71, 1.0)
72 (72, 1.0)
73 (73, 1.0)
74 (74, 1.0)
75 (75, 1.0)
76 (76, 1.0)
77 (77, 1.0)
78 (78, 1.0)
79 (79, 1.0)
80 (80, 1.0)
81 (81, 1.0)
82 (82,

In [65]:
def get_sync_frame(key, minhash):
  sec_30 = 30
  check_frame = key//1.48
  minHash = lsh.get_min_hash()
  supply = 2
  i = check_frame - sec_30
  while i < check_frame + sec_30:
    fingerprint = clip2.clip.get_frame(i)[:,:,0].flatten()
    fingerprint = minHash.hash(fingerprint)
    if len(fingerprint) == 100:
      jac = minHash.jaccard_similarity(fingerprint, minhash)
      print(jac)
      if(jac > 0.98):
        return i
    i+=supply
  return -1

In [66]:
for key,value in sotred.items():
  frame = get_sync_frame(key, value[0][0])
  if(frame != -1):
    print(frame)

0.13420323169494963
0.07256986440893755
0.09825043930017573
0.14522821576763487
0.11429598346661875
0.10466266569939986
0.11141283850041488
0.1112326768781911
0.07109442679757436
0.14805613375411267
0.08932270916334661
0.1146265560165975
0.19377385880961343
0.08411065933189826
0.10708389024294362
0.2192630426851514
0.0782421549569564
0.0989056887478072
0.18245676170237884
0.08241800445502727
0.11362884551769765
0.19760693558787706
0.14652014652014653
0.14157902997959504
0.09676432341467146
0.152143539966174
0.18541033434650456
0.323686428772127
0.07184430702073481
0.178431961550015
0.10852025173471035
0.060163973957077406
0.06960175374497625
0.31200913894045407
0.09265734265734266
0.1535784711750377
0.07337622549019608
0.1403924043974541
0.05783132530120482
0.11747643219724438
0.10686085278118969
0.30249360613810744
0.06926822071407494
0.17499114417286574
0.08930932763706642
0.12134845309996302
0.09951802927525884
0.14991051039930878
0.08285229202037352


KeyboardInterrupt: 