In [1]:
import os
import subprocess
import cv2
import datetime
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from skimage import measure
from tqdm import tqdm

## Pixel difference

In [2]:
def similarity(frame1, frame2):
    #s = measure.compare_mse(frame1, frame2)
    s = measure.compare_ssim(frame1, frame2, multichannel=True)
    return s

In [3]:
def find_boundaries(vid_path):
    vidcap = cv2.VideoCapture(vid_path)
    fps = vidcap.get(cv2.CAP_PROP_FPS)
    print("FPS of the video: {}".formatfps)
    vidcap.set
    success, frame1 = vidcap.read()
    sims = []
    count = 0
    while success:
        success, frame2 = vidcap.read()
        if success:
            sim = similarity(frame1, frame2)
            sims.append(sim)
            print("At frame {}: similarity = {}".format(count, sim))
            frame1 = frame2
            count += 1
    #Plotting time vs similarities
    plt.plot([x/fps for x in range(count)], sims)
    plt.show()
    return sims, fps

In [None]:
sims,_ = find_boundaries("../data/SAA_clip.mp4")

In [None]:
sims_inv = [1-x for x in sims]

In [None]:
plt.plot([x/fps for x in range(len(sims))], sims_inv)

In [None]:
for i in range(len(sims_inv)):
    if(sims_inv[i]==max(sims_inv)):
        print(i/fps)

## PySceneDetect

In [18]:
def time_to_milsec(timestr):
    HMS,MSEC = timestr.split('.')
    x = time.strptime(HMS,'%H:%M:%S')
    seconds = datetime.timedelta(hours=x.tm_hour,minutes=x.tm_min,seconds=x.tm_sec).total_seconds()
    return seconds*1000 + int(MSEC)

In [19]:
def shot_boundaries(vid_path):
    command = "scenedetect -i {} -s video.stats.csv detect-content list-scenes".format(vid_path)
    os.system(command)
    file_name = os.path.basename(vid_path).split('.')[0]
    results = pd.read_csv("{}-Scenes.csv".format(file_name))
    results = results.columns.tolist()[1:] #List of timestamps of shot boundaries
    shot_bounds = [time_to_milsec(x) for x in results]
    return shot_bounds

In [35]:
def get_key_frames(shot_bounds, vid_path):
    # Taking centre frame of each shot - creating a list of such 
    # centre frames from the extracted shot boundaries
    key_frames = []
    timestamps = []
    bound1 = 0
    vidcap = cv2.VideoCapture(vid_path)
    for i in tqdm(range(len(shot_bounds) + 1)):
        
        if(i != len(shot_bounds)): #not last boundary
            bound2 = shot_bounds[i]
            
        else: #last boundary
            vidcap.set(cv2.CAP_PROP_POS_AVI_RATIO,1)
            bound2 = vidcap.get(cv2.CAP_PROP_POS_MSEC) #max duration
            
        key_frame_msec = (bound1 + bound2)/2 #Average of 2 boundaries
        timestamps.append(key_frame_msec)
        #print(key_frame_msec)
        vidcap.set(cv2.CAP_PROP_POS_MSEC,key_frame_msec)
        _,image = vidcap.read() #Reading frame at key_frame_sec
        key_frames.append(image) #storing frame as np array
        bound1 = bound2
        
    return key_frames,timestamps

In [37]:
def find_scenes(vid_path, threshold):
    print('Finding shot boundaries...')
    shot_bounds = shot_boundaries(vid_path)
    print('Extracting key frames...')
    key_frames, kf_timestamps = get_key_frames(shot_bounds,vid_path)
    #print(key_frames)
    scene_count = 0
    scenes = [0] #What scene is each shot - length = no. of shots [Maps shots to scenes]
    key_shots = [0] #Key shot for each scene [Maps scenes to their key shots]
    print('Classifying shots into scenes...')
    for i in tqdm(range(1, len(key_frames))): #For each shot
        curr_shot = key_frames[i]
        for j,key_shot in enumerate(key_shots[-15:]): #Iterate through each scene
            if(similarity(key_frames[key_shot],curr_shot) > threshold):
                scenes.append(j) #Mark the shot as belonging to this scene
                break
            elif(j==len(key_shots)-1): #End of scenes => No matching scene is found
                scene_count += 1 
                scenes.append(scene_count) #This shot belongs to the new scene
                key_shots.append(i) #Mark this shot as key shot of the new scene
    return (scenes, key_shots, scene_count, key_frames, kf_timestamps)

In [None]:
output = find_scenes("../../2006-01-02_0000_US_00001057_V11_M2_VHS10_H4_JA.mp4", 0.6)

Finding shot boundaries...
