# Image Similarity

##  Break Video into Frames

In [None]:
def video_to_frames(input_loc, output_loc):
    """Function to extract frames from input video file and 
    save them as separate frames in an output directory."""
    import time
    import cv2
    import os
    
    try:
        os.mkdir(output_loc)
    except OSError as e:
        print(e)
    
    # Log the time
    time_start = time.time()
    
    # Start capturing the feed
    cap = cv2.VideoCapture(input_loc)
    
    # Find the number of frames
    video_length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) - 1
    print ("Quantidade de frames: ", video_length)
    
    count = 0
    print ("Convertendo Video..\n")
    
    # Start converting the video
    while cap.isOpened():
        # Extract the frame
        ret, frame = cap.read()
        # Write the results back to output location.
        cv2.imwrite(output_loc + "/%#05d.jpg" % (count+1), frame)
        count = count + 1
        # If there are no more frames left
        if (count > (video_length-1)):
            # Log the time again
            time_end = time.time()
            # Release the feed
            cap.release()
            # Print stats
            print ("Finalizado a extração dos frames.\n%d frames extraidos" % count)
            print ("Demorou %d segundos para converter." % (time_end-time_start))
            break
            
video_to_frames('YOUR_VIDEO_PATH', 'YOUR_PATH/Frames_Video/all_frames')

## Similarity

In [None]:
# import the necessary packages
from skimage.measure import compare_ssim as ssim
import numpy as np
import cv2
from shutil import copy
from datetime import datetime
import os
import json

def mse(imageA, imageB):
    # the 'Mean Squared Error' between the two images is the
    # sum of the squared difference between the two images;
    # NOTE: the two images must have the same dimension
    err = np.sum((imageA.astype("float") - imageB.astype("float")) ** 2)
    err /= float(imageA.shape[0] * imageA.shape[1])

    # return the MSE, the lower the error, the more "similar" the two images are
    return err
 
def compare_images(imageA, imageB, title):
    # compute the mean squared error and structural similarity
    # index for the images
    m = mse(imageA, imageB)
    s = ssim(imageA, imageB)
    return round(m, 2), round(s, 2)
    
    
def check_similarity(pre, nex, simila_check_file):
    # read and convert the images to grayscale
    #print("pre {} - nex {}".format(pre, nex))
    img_pre = cv2.cvtColor(cv2.imread(pre), cv2.COLOR_BGR2GRAY)
    img_nex = cv2.cvtColor(cv2.imread(nex), cv2.COLOR_BGR2GRAY)
    
    # compare the images
    #print("Previous vs. Next")
    m,s = compare_images(img_pre, img_nex, "Previous vs. Next")
    
    selected_frames = 'YOUR_PATH/Frames_Video/selected_frames' 
    if not os.path.exists(selected_frames):
        os.mkdir(selected_frames)

    # check if MSE is > 0.10
    if m > 20.0:
        copy(nex, selected_frames)
    
    gera_similarity_log(fil_similarity_log(pre,nex,m, s), simila_check_file)
    
    
def fill_similarity_log(pre,nex,m, s):
    similarity_log = {}
    similarity_log["Previous Frame"]=pre
    similarity_log["Next Frame"]=nex
    similarity_log["MSE"]=m
    similarity_log["SSIM"]=s
    return similarity_log
    
def gera_similarity_log(similarity_log, simila_check_file):
    simila_check_file["similarity"].append(similarity_log)
    with open('similarity_log.json', 'w') as outfile:
        json.dump(simila_check_file, outfile)

# -- main  
simila_check_file = {"similarity":[]}

images_path = "YOUR_PATH/Frames_Video/all_frames/"

start = datetime.now()
# Load the frames from path
files = [os.path.join(images_path, p) for p in sorted(os.listdir(images_path)) if p.endswith(".jpg")]
                                               

# Copy the first frame to the Selected folder
copy(files[0], 'YOUR_PATH/Frames_Video/selected_frames/')

# Remove first frame from the list
files.pop(0)

readahead = iter(files)
next(readahead)

for prev, nex in zip(files, readahead):
    check_similarity(prev,nex, simila_check_file)

print("tempo processamento: " + str(datetime.now() - start))