In [None]:
# These are the initial, and slower, preprocessing functions that were focused
# more on using Numpy. As it turns out OpenCV is superior wrt matix operations.

# Given a video name pair: pc_pair[0] a deepfake, pc_pair[1] its original/parent, 
# this function creates one "fakerframe" JPEG image (a difference blend mode
# image) per video frame pair (expect 300 per video), stores these at datapath.
def create_fakerframes(pc_pair, datapath):
#{
    video = cv2.VideoCapture(pc_pair[0])
    orig = cv2.VideoCapture(pc_pair[1])
    
    count, vsuccess, osuccess = 0, True, True
    while video.isOpened() and orig.isOpened() and vsuccess and osuccess:
    #{
        vsuccess, videoframe = video.read()
        osuccess, origframe = orig.read()
        
        if vsuccess and osuccess:
        #{
            # Each fakerframe created similarly to the Photoshop
            # difference blend-mode b/w the faked frame and its original
            # https://helpx.adobe.com/photoshop/using/blending-modes.htm
            
            brtmask = np.greater(videoframe, origframe)            
            fakerframe = np.array(videoframe, copy=True)
            fakerframe[brtmask] = videoframe[brtmask] - origframe[brtmask]
            
            brtmask = np.logical_not(brtmask)
            fakerframe[brtmask] = origframe[brtmask] - videoframe[brtmask]
            cv2.imwrite(f"{datapath}/fakerframe{count}.jpg", fakerframe)
        #}
        
        count += 1
    #}
    
    video.release()
    orig.release()
#}

# Given a datapath to a collection of fakerframe images associated with a single
# deepfake/original pair, this fcn creates a fakerprint (ahem, like a fingerprint)
# by summing the fakerframes and scaling the result into pixel range of [0,255].
def encode_fakerprint(datapath, nimages=300):
#{
    # Note: use of unsigned ints required to avoid sign bit funkiness,
    # 16-bits to provide room for summation, then back to 8 for [0,255]

    fakerprint = None
    for i in range(nimages):
    #{
        if file_exists(f"{datapath}/fakerframe{i}.jpg"):    
            fakerframe = cv2.imread(f"{datapath}/fakerframe{i}.jpg")
            if fakerprint is None: fakerprint = fakerframe.astype(np.uint16)
            else: fakerprint = fakerprint + fakerframe
    #}
    
    if fakerprint is not None:
        fkmax = np.amax(fakerprint)
        fscale = fkmax if fkmax <= 255 else fkmax / 255
        fakerprint = np.divide(fakerprint, fscale).astype(np.uint8)
        cv2.imwrite(f"{datapath}/fakerprint.jpg", fakerprint)
#}

# This one tried to speed things up by creating a large data-block of then entire 
# video for numpy to perform calculations on in one feld swoop. Turned out to be
# slower even than original Numpy versions.
def create_fakerframes_fast(pc_pair, datapath):
#{
    initial = time.time()
    video = cv2.VideoCapture(pc_pair[0])
    orig = cv2.VideoCapture(pc_pair[1])
    print(f"CV2 video open: {time.time()-initial:.3f} sec")
    
    step = time.time()
    nframes = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
    fwidth  = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
    fheight = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))

    if (nframes != int(orig.get(cv2.CAP_PROP_FRAME_COUNT)) or
        fwidth  != int(orig.get(cv2.CAP_PROP_FRAME_WIDTH)) or
        fheight != int(orig.get(cv2.CAP_PROP_FRAME_HEIGHT))):
        print("WARNING: fake/original video dimension mismatch:", pc_pair)

    # Load all video frames into np.arrays
    count, vsuccess, osuccess = 0, True, True
    differenceblock = np.zeros((nframes+1, fheight, fwidth, 3), dtype=np.uint8)
    originalblock = np.zeros((nframes+1, fheight, fwidth, 3), dtype=np.uint8)
    while video.isOpened() and orig.isOpened() and vsuccess and osuccess and count < nframes:
        vsuccess, differenceblock[count, :] = video.read()
        osuccess, originalblock[count, :] = orig.read()        
        count += 1

    video.release()
    orig.release()
    print(f"CV2 video read: {time.time()-step:.3f} sec")
    step = time.time()
    
    # Each fakerframe created similarly to the Photoshop
    # difference blend-mode b/w the faked frame and its original
    # https://helpx.adobe.com/photoshop/using/blending-modes.htm
    brtmask = np.greater(differenceblock, originalblock)            
    differenceblock[brtmask] = differenceblock[brtmask] - originalblock[brtmask]

    brtmask = np.logical_not(brtmask)
    differenceblock[brtmask] = originalblock[brtmask] - differenceblock[brtmask]
    print(f"Numpy difference operation: {time.time()-step:.3f} sec")

    step = time.time()
    np.save(f"{datapath}/{re.split(r'[/.]', pc_pair[0])[-2]}", differenceblock)    
    print(f"Numpy IO write to file: {time.time()-step:.3f} sec")
    print(f"Total create_fakerframes_fast time: {time.time()-initial:.3f} sec")
#}
