**Import packages**

In [1]:
import cv2 
import numpy as np 
import torch 
import imageio
from face_enhancer import enhancer_generator_with_len, enhancer_list

  from .autonotebook import tqdm as notebook_tqdm


**Define useful function**

In [2]:
def read_mp4(input_fn, to_rgb=False, to_gray=False, to_nchw=False):
    frames = []
    cap = cv2.VideoCapture(input_fn)
    while True:
        ret, frame = cap.read()
        if not ret:
            break
        if to_rgb:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
        if to_gray:
            frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        frames.append(frame)
    cap.release()
    frames = np.stack(frames)
    if to_nchw:
        frames = np.transpose(frames, (0, 3, 1, 2))
    return frames

**Read the original videos**

In [3]:
original_video_path = './data/faafeee171254fbfb66b42154dda35e4.speaker.mp4' # just an exmaple
enhanced_video_path = './data/faafeee171254fbfb66b42154dda35e4.speaker.enahnced.mp4'

**Enhance images**

In [4]:
enhancer = 'gfpgan'
background_enhancer = None
try:
    enhanced_images_gen_with_len = enhancer_generator_with_len(original_video_path, method=enhancer, bg_upsampler=background_enhancer)
    # the fps in the train/test videos is 25
    imageio.mimsave(enhanced_video_path, enhanced_images_gen_with_len, fps=float(30))
except:
    enhanced_images_gen_with_len = enhancer_list(original_video_path, method=enhancer, bg_upsampler=background_enhancer)
    imageio.mimsave(enhanced_video_path, enhanced_images_gen_with_len, fps=float(30))

face enhancer....
model_path /hy-tmp/checkpoints/gfpgan/weights/GFPGANv1.4.pth


Face Enhancer:: 100%|██████████| 30/30 [00:09<00:00,  3.24it/s]


**compare the FID between the original one and the enahnced one**

In [11]:
from piq import FID

fid_metric = FID()
gt_feats = []
pd_feats = []

gt_frames = read_mp4(original_video_path, True, False, True)
pd_frames = read_mp4(enhanced_video_path, True, False, True)

gt_frames = torch.from_numpy(gt_frames).float() / 255.
pd_frames = torch.from_numpy(pd_frames).float() / 255.

T = gt_frames.size(0)
total_images = torch.cat((gt_frames, pd_frames), 0)
# if len(total_images) > args.batch_size:
#     total_images = torch.split(total_images, args.batch_size, 0)
# else:
total_images = [total_images]

total_feats = []
for sub_images in total_images:
    feats = fid_metric.compute_feats([
        {'images': sub_images},
    ])
    feats = feats.detach().cpu()
    total_feats.append(feats)
total_feats = torch.cat(total_feats, 0)
gt_feat, pd_feat = torch.split(total_feats, (T, T), 0)

gt_feats.append(gt_feat.numpy())
pd_feats.append(pd_feat.numpy())

gt_feats = torch.from_numpy(np.concatenate(gt_feats, 0))
pd_feats = torch.from_numpy(np.concatenate(pd_feats, 0))
print('fid (enhanced-origin):', fid_metric.compute_metric(pd_feats, gt_feats).item())
print('fid (enhanced-enhanced):', fid_metric.compute_metric(pd_feats, pd_feats).item())
print('fid (origin-origin):', fid_metric.compute_metric(gt_feats, gt_feats).item())





fid (enhanced-origin): 15.791654402745792
fid (enhanced-enhanced): 0.0340112991321746
fid (origin-origin): 0.028985544586831224


**compare the CPBD between the original one and the enahnced one**

In [18]:
import cpbd 
# cpbd would have imread problem, I solved this according to  https://github.com/0x64746b/python-cpbd/issues/2#issuecomment-1271743131
import tqdm 
gt_frames = read_mp4(original_video_path, False, True, False) # read gray image

cpbd_value = [cpbd.compute(frame) for frame in gt_frames]
print('cpbd origin:', np.array(cpbd_value).mean())

pd_frames = read_mp4(enhanced_video_path, False, True, False)

cpbd_value = [cpbd.compute(frame) for frame in pd_frames]
print('cpbd enahnced:', np.array(cpbd_value).mean())

cpbd origin: 0.13508757209224612
cpbd enahnced: 0.18875182889610936
