# Keyshots

In [1]:
import os
import subprocess
import datetime
import time
import cv2
import pickle
from tqdm import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage import measure

In [2]:
def ping_telegram(text):
    import telegram
    bot = telegram.Bot(token="848617644:AAH-YU71Klu7amhz0wtVBto0ACxhDhvYTaE")
    bot.send_message(chat_id="625772042", text = text )

## Pixel difference

In [3]:
def similarity(frame1, frame2):
    """Returns SSIM similarity between two images"""
    #s = measure.compare_mse(frame1, frame2)
    s = measure.compare_ssim(frame1, frame2, multichannel=True)
    return s

In [4]:
def similarities(vid_path):
    """Returns list of similarities of consecutive frames in the video and its fps"""
    vidcap = cv2.VideoCapture(vid_path)
    fps = vidcap.get(cv2.CAP_PROP_FPS)
    print("FPS of the video: {}".formatfps)
    vidcap.set
    success, frame1 = vidcap.read()
    sims = []
    count = 0
    while success:
        success, frame2 = vidcap.read()
        if success:
            sim = similarity(frame1, frame2)
            sims.append(sim)
            print("At frame {}: similarity = {}".format(count, sim))
            frame1 = frame2
            count += 1
    #Plotting time vs similarities
    plt.plot([x/fps for x in range(count)], sims)
    plt.show()
    return sims, fps

In [None]:
sims,_ = similarities("../data/SAA_clip.mp4")

In [None]:
sims_inv = [1-x for x in sims] # Difference = 1 - similarity

In [None]:
plt.plot([x/fps for x in range(len(sims))], sims_inv) #Video time vs frame_difference

## PySceneDetect

In [5]:
def time_to_milsec(timestr):
    """Converts scenedetect's output time string into milliseconds"""
    HMS,MSEC = timestr.split('.')
    x = time.strptime(HMS,'%H:%M:%S')
    seconds = datetime.timedelta(hours=x.tm_hour,minutes=x.tm_min,seconds=x.tm_sec).total_seconds()
    return seconds*1000 + int(MSEC)

In [6]:
def shot_boundaries(vid_path):
    """Returns list of shot boundaries in video_time"""
    file_name = os.path.basename(vid_path).split('.')[0]
    command = "scenedetect -i {} -s {}.stats.csv detect-content list-scenes".format(vid_path,file_name)
    os.system(command)
    results = pd.read_csv("{}-Scenes.csv".format(file_name))
    results = results.columns.tolist()[1:] #List of timestamps of shot boundaries
    shot_bounds = [time_to_milsec(x) for x in results]
    return shot_bounds

In [7]:
def get_key_frames(shot_bounds, vid_path, store = False):
    """Returns list of key_frames in numpy array format, one for each shot 
       and the timestamps of these key_frames"""
    # Taking centre frame of each shot - creating a list of such 
    # centre frames from the extracted shot boundaries
    file_name = os.path.basename(vid_path).split('.')[0]
    key_frames = []
    timestamps = []
    bound1 = 0
    vidcap = cv2.VideoCapture(vid_path)
    for i in tqdm(range(len(shot_bounds) + 1)):
        
        if(i != len(shot_bounds)): #not last boundary
            bound2 = shot_bounds[i]
            
        else: #last boundary
            vidcap.set(cv2.CAP_PROP_POS_AVI_RATIO,1)
            bound2 = vidcap.get(cv2.CAP_PROP_POS_MSEC) #max duration
            
        key_frame_msec = (bound1 + bound2)/2 #Average of 2 boundaries
        timestamps.append(key_frame_msec)
        #print(key_frame_msec)
        vidcap.set(cv2.CAP_PROP_POS_MSEC,key_frame_msec)
        _,image = vidcap.read() #Reading frame at key_frame_sec
        if (store==True):
            if not (os.path.isdir(file_name+'_key_frames')):
                os.mkdir(file_name+'_key_frames')
            cv2.imwrite("{}/frame_{}_{}.jpg".format(file_name+'_key_frames',i,key_frame_msec),image)
        key_frames.append(image) #storing frame as np array
        bound1 = bound2
        
    return key_frames,timestamps

## Clustering

In [8]:
def store_key_frames(vid_path):
    """Stores key frames of the video in a new directory"""
    shot_bounds = shot_boundaries(vid_path)
    return shot_bounds, get_key_frames(shot_bounds,vid_path, True)

In [9]:
op = store_key_frames("../data/SAA_clip.mp4")

100%|██████████| 75/75 [00:03<00:00, 22.85it/s]


In [9]:
from imagecluster import main

Using TensorFlow backend.


In [None]:
main.main('2006_clipped_key_frames/',sim=0.65,vis=True)

loading fingerprints database 2006_clipped_key_frames/imagecluster/fingerprints.pk ...
clustering ...
#images : #clusters
2 : 75
3 : 24
4 : 12
5 : 4
6 : 4
7 : 5
8 : 4
9 : 3
10 : 3
11 : 1
13 : 3
14 : 1
15 : 1
16 : 1
18 : 1
21 : 1
25 : 2
26 : 1
35 : 1
#images in clusters total:  683
cluster dir: 2006_clipped_key_frames/imagecluster/clusters
plot array (uint8) size: 738.5888671875 MiB


In [19]:
!tree SAA_clip_key_frames/imagecluster/clusters/

[01;34mSAA_clip_key_frames/imagecluster/clusters/[00m
├── [01;34mcluster_with_11[00m
│   └── [01;34mcluster_0[00m
│       ├── [01;36mframe_0_4721.5.jpg[00m -> [01;35m/home/eon/Desktop/ShowSegmentation/Week 2 frame compare/SAA_clip_key_frames/frame_0_4721.5.jpg[00m
│       ├── [01;36mframe_10_102018.5.jpg[00m -> [01;35m/home/eon/Desktop/ShowSegmentation/Week 2 frame compare/SAA_clip_key_frames/frame_10_102018.5.jpg[00m
│       ├── [01;36mframe_1_10961.0.jpg[00m -> [01;35m/home/eon/Desktop/ShowSegmentation/Week 2 frame compare/SAA_clip_key_frames/frame_1_10961.0.jpg[00m
│       ├── [01;36mframe_2_13813.5.jpg[00m -> [01;35m/home/eon/Desktop/ShowSegmentation/Week 2 frame compare/SAA_clip_key_frames/frame_2_13813.5.jpg[00m
│       ├── [01;36mframe_3_16499.5.jpg[00m -> [01;35m/home/eon/Desktop/ShowSegmentation/Week 2 frame compare/SAA_clip_key_frames/frame_3_16499.5.jpg[00m
│       ├── [01;36mframe_4_22839.5.jpg[00m -> [01;35m/home/eon/Desktop/ShowSegment

## Similarities

In [None]:
def find_scenes(vid_path, threshold):
    """Groups shots into scenes"""
    print('Finding shot boundaries...')
    shot_bounds = shot_boundaries(vid_path)
    print('Extracting key frames...')
    key_frames, kf_timestamps = get_key_frames(shot_bounds,vid_path)
    #print(key_frames)
    scene_count = 0
    n_shots = len(key_frames)
    scenes = [0] #What scene is each shot - length = no. of shots [Maps shots to scenes]
    key_shots = [0] #Key shot for each scene [Maps scenes to their key shots]
    print('Classifying shots into scenes...')
    for i in tqdm(range(1, n_shots)): #For each shot
        curr_shot = key_frames[i]
        found = False
        for key_shot in key_shots[-1:-16:-1]: #Iterate through key_shots of last few scenes - param
            if (similarity(key_frames[key_shot],curr_shot) > threshold):
                found = True
                scenes.append(scenes[key_shot]) #Mark the shot as belonging to this scene
                break
        if (found == False): #End of scenes => No matching scene is found
            scene_count += 1 
            scenes.append(scene_count) #This shot belongs to the new scene
            key_shots.append(i) #Mark this shot as key shot of the new scene
    return (scenes, key_shots, scene_count, kf_timestamps)

In [44]:
output = find_scenes("../../2006-01-02_0000_US_00001057_V11_M2_VHS10_H4_JA.mp4", 0.6)

Finding shot boundaries...


  exec(code_obj, self.user_global_ns, self.user_ns)


Extracting key frames...


100%|██████████| 4731/4731 [04:35<00:00, 17.20it/s]
  0%|          | 0/4730 [00:00<?, ?it/s]

Classifying shots into scenes...


  cropped = ar[slices]
100%|██████████| 4730/4730 [1:41:52<00:00,  1.04it/s]


In [47]:
ping_telegram('Finding scenes done!')

### Pickling output

In [58]:
#Pickling data as a binary stream
file = open('dump_binary','wb')
pickle.dump(output, file)
file.close()

In [66]:
#Loading pickle
infile = open('dump_binary','rb')
pp = pickle.load(infile)