In [None]:
from math import log10, sqrt
import numpy as np
import cv2
from skimage.metrics import structural_similarity as ssim_frame
import imageio
import matplotlib.pyplot as plt
from PIL import Image
import cpbd
from deepface import DeepFace as DF
import pandas as pd
import dlib
import imutils
from imutils import face_utils
from scipy.spatial import distance as dist

def check(true, pred):
    # check for format, it needs to be imageio.plugins.ffmpeg.FfmpegFormat.Reader"
    assert type(true) == imageio.plugins.ffmpeg.FfmpegFormat.Reader
    
    # check for frame counts, they need to be equal
    l1 = true.count_frames()
    l2 = pred.count_frames()
    print(l1)
    print(l2)
    #assert l1 == l2 # "Different frame numbers!"
    
    # check for different image resolutions
    assert np.array(true.get_data(0)).shape == np.array(pred.get_data(0)).shape
    
def check_v2(pred):
    # check for format, it needs to be imageio.plugins.ffmpeg.FfmpegFormat.Reader"
    assert type(pred) == imageio.plugins.ffmpeg.FfmpegFormat.Reader
    
def to_grayscale(im):
    return np.array(im[:, :, 0] * 0.2989 + im[:, :, 1] * 0.5870 + im[:, :, 2] * 0.1140)

# metric 1: PSNR, Peak Signal to Noise Ratio
# range: usuall 25-50dB, higher the score, better the prediction
def eval_psnr(true, pred, offset = 0):
    #check(true, pred)
    def psnr_frame(true, pred):
        mse = np.mean((true - pred) ** 2)
        if (mse == 0):
            return 100
        return 20 * log10(np.max(true) / sqrt(mse))
    
    psnrs = []
    l1 = true.count_frames()
    for i in range(30):
        psnrs.append(psnr_frame(true.get_data(i), pred.get_data(i)))
    return sum(psnrs) / 30

# metric 2: SSIM, Structural Similarity Index
# range: 0-1, higher the score, similar the images
def eval_ssim(true, pred, offset = 0):
    #check(true, pred)
    
    ssims = []
    l1 = true.count_frames()
    for i in range(30):
        ssims.append(ssim_frame(true.get_data(i), pred.get_data(i), channel_axis=2))
    return sum(ssims) / 30

# metric 3: CPBD, Cumulative Probability Blur Detection
# range: 0-1, higher the score, sharper (less blur) the image
def eval_cpbd(pred):
    check_v2(pred)
    
    cpbds = []
    l1 = pred.count_frames()
    for i in range(l1):
        cpbds.append(cpbd.compute(to_grayscale(pred.get_data(i))))
    return sum(cpbds) / l1

# metric 4.1: ACD-I, Average Content Distance-Identity
def eval_acdi(pred):
    check_v2(pred)
    
    acdis = []
    l1 = pred.count_frames()
    prev_grayscale = to_grayscale(pred.get_data(0))
    for i in range(1, l1):
        current_grayscale = to_grayscale(pred.get_data(i))
        acdis.append( np.mean((current_grayscale - prev_grayscale)**2) )
        prev_grayscale = current_grayscale
    return sum(acdis) / (l1 - 1)

# metric 4.2: ACD-C, Average Content Distance-Context
def eval_acdc(pred):
    check_v2(pred)
    filename = "im3.jpg"
    
    acdcs = []
    l1 = pred.count_frames()
    Image.fromarray(pred.get_data(0)).save(filename)
    prev_rep = np.array(DF.represent(filename, enforce_detection = False, model_name = "OpenFace"))
    for i in range(1, l1):
        Image.fromarray(pred.get_data(i)).save(filename)
        current_rep = DF.represent(filename, enforce_detection = False, model_name = "OpenFace")
        acdcs.append( np.mean((np.array(current_rep[0]['embedding']) - np.array(prev_rep[0]['embedding']))**2) )
        prev_rep = current_rep
    return sum(acdcs) / (l1 - 1)

# metric 5: Blinks/sec, Blink duration based on EAR (eye aspect ratio)
FACIAL_LANDMARK_PREDICTOR = "shape_predictor_68_face_landmarks.dat"
EAR_TH_FOR_BLINK = 0.1

faceDetector = dlib.get_frontal_face_detector()
landmarkFinder = dlib.shape_predictor(FACIAL_LANDMARK_PREDICTOR)

(leftEyeStart, leftEyeEnd) = face_utils.FACIAL_LANDMARKS_IDXS["left_eye"]
(rightEyeStart, rightEyeEnd) = face_utils.FACIAL_LANDMARKS_IDXS["right_eye"]
def eye_aspect_ratio(eye):
    p2_minus_p6 = dist.euclidean(eye[1], eye[5])
    p3_minus_p5 = dist.euclidean(eye[2], eye[4])
    p1_minus_p4 = dist.euclidean(eye[0], eye[3])
    ear = (p2_minus_p6 + p3_minus_p5) / (2.0 * p1_minus_p4)
    return ear
def eval_ear(testVid):
    last_open_frame_idx = 0
    blink_ct = 0
    closed = False

    durations = []
    #try:
    
    
    for i in range(int(testVid.get(cv2.CAP_PROP_FRAME_COUNT))):
        (status, image) = testVid.read()
        image = imutils.resize(image, width=800)
        grayImage = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)

        faces = faceDetector(grayImage, 0)

        if len(faces) == 0:
            ear = 0.35
        else:
            faceLandmarks = landmarkFinder(grayImage, faces[0])
            faceLandmarks = face_utils.shape_to_np(faceLandmarks)
            leftEye = faceLandmarks[leftEyeStart:leftEyeEnd]
            rightEye = faceLandmarks[rightEyeStart:rightEyeEnd]
            leftEAR = eye_aspect_ratio(leftEye)
            rightEAR = eye_aspect_ratio(rightEye)
            ear = (leftEAR + rightEAR) / 2.0

        if ear < 0.2:
            if not closed:
                blink_ct += 1
                closed = True
                #print("BLINK")
        elif ear > 0.25:
            if closed:
                closed = False
                durations.append((i - last_open_frame_idx) / int(testVid.get(cv2.CAP_PROP_FRAME_COUNT)) *1.22)
            last_open_frame_idx = i
    if len(durations) == 0:
        return (0, np.nan)
        durations = [0]
    #print(blink_ct, durations)
    #print(blink_ct / 1.22, sum(durations)/len(durations))
    return blink_ct / 1.22, sum(durations)/len(durations)


    #except:
        #0.39 blinks/sec, 0.41sec / blink
        #print(blink_ct / 1.22, sum(durations)/len(durations))
        #return 0.39, 0.41

In [None]:
original_path = "results_comparison_resized/"

In [None]:
for i in [3,5,7,9,10,11,12,14]:
    model_path = f"results_exp{i}/"
    import os, time
    original_files = os.listdir(original_path)
    model_files = os.listdir(model_path)
    print(len(model_files),len(original_files))

    s = time.time()
    model_evals = {}
    count = 0
    psnr_sum = 0
    ssim_sum = 0
    cpbd_sum = 0
    acdi_sum = 0
    acdc_sum = 0
    ear_sum1 = 0
    ear_sum2 = 0
    count2 = 0
    for video_name in original_files:
        try:
            orig_vid = imageio.get_reader(original_path + "/" + video_name, 'ffmpeg')
            #model_vid = imageio.get_reader(model_path + "/" + video_name, 'ffmpeg')
            model_vid = cv2.VideoCapture(model_path + "/" + video_name)
        except:
            print(f"missing either {sda_video_name} or {mit_video_name} or {sda_resized_video_name} skipping!")
            missing_files.append(video_name)
            continue
        count+=1
        """
        # 1. eval psnr
        psnr_model = eval_psnr(orig_vid, model_vid, offset=1)
        psnr_sum += psnr_model
        # 2. eval ssim
        ssim_model = eval_ssim(orig_vid, model_vid, offset=1)
        ssim_sum += ssim_model
        # 3. eval cpbd
        cpbd_model = eval_cpbd(model_vid)
        cpbd_sum += cpbd_model
        # 4.1. eval acdi
        acdi_model = eval_acdi(model_vid)
        acdi_sum += acdi_model
        # 4.2. eval acdc
        acdc_model = eval_acdc(model_vid)
        acdc_sum += acdc_model
        """
        ear_model = eval_ear(model_vid)
        ear_sum1 += ear_model[0]
        if(ear_model[1] != np.nan):
            print(ear_model[1])
            count2+=1
            ear_sum2 += ear_model[1]
    #print(f"Experiment {i} PSNR: {psnr_sum/count} SSIM: {ssim_sum/count} CPBD: {cpbd_sum/count} ACDI: {acdi_sum/count} ACDC: {acdc_sum/count}")
    print(f"Experiment {i} EAR: {ear_sum1/count} {ear_sum2/count2}")

In [None]:
64 64
Experiment 1 EAR: 0.0 nan
64 64
Experiment 2 EAR: 0.0 nan
64 64
Experiment 3 EAR: 0.10245901639344264 nan
64 64
Experiment 4 EAR: 0.0 nan
64 64
Experiment 5 EAR: 0.07684426229508198 nan
64 64
Experiment 6 EAR: 0.0 nan
64 64
Experiment 7 EAR: 0.025614754098360656 nan
64 64
Experiment 8 EAR: 0.0 nan
64 64
Experiment 9 EAR: 0.012807377049180328 nan
64 64
Experiment 10 EAR: 0.025614754098360656 nan
64 64
Experiment 11 EAR: 0.012807377049180328 nan
64 64
Experiment 12 EAR: 0.025614754098360656 nan
64 64
Experiment 13 EAR: 0.0 nan
64 64
Experiment 14 EAR: 0.025614754098360656 nan