In [None]:
#!pip install -U torch torchvision opencv-python-headless
!pip install openmim
!mim install mmcv-full
!mim install mmdet
!mim install mmpose
#!pip install anime-face-detector
!git clone https://github.com/hysts/anime-face-detector

from IPython.display import clear_output
clear_output()

If you encounter the following error in Colab, you can restart the runtime to execute the following cells correctly.

```
xtcocotools/_mask.pyx in init xtcocotools._mask()
ValueError: numpy.ndarray size changed, may indicate binary incompatibility. Expected 88 from C header, got 80 from PyObject
```

In [None]:
%cd anime-face-detector

In [None]:
#@title visualize function

import cv2
import numpy as np
import anime_face_detector


def detect_vis(img, face_score_threshold: float,
               landmark_score_threshold: float,
               detector: anime_face_detector.LandmarkDetector):
    preds = detector(img)
    res = img.copy()

    for pred in preds:
        box = pred['bbox']
        box, score = box[:4], box[4]
        if score < face_score_threshold:
            continue
        box = np.round(box).astype(int)

        lt = max(2, int(3 * (box[2:] - box[:2]).max() / 256))

        cv2.rectangle(res, tuple(box[:2]), tuple(box[2:]), (0, 255, 0), lt)

        pred_pts = pred['keypoints']
        for *pt, score in pred_pts:
            if score < landmark_score_threshold:
                color = (0, 255, 255)
            else:
                color = (0, 0, 255)
            pt = np.round(pt).astype(int)
            cv2.circle(res, tuple(pt), lt, color, cv2.FILLED)
    #res = cv2.cvtColor(res, cv2.COLOR_BGR2RGB)

    #image_pil = PIL.Image.fromarray(res)
    return res


# https://github.com/hysts/anime-face-detector/blob/main/assets/landmarks.jpg
FACE_BOTTOM_OUTLINE = list(range(0, 5))
MOUTH_OUTLINE = list(range(24, 28))
LEFT_EYEBROW = list(range(5, 8))
RIGHT_EYEBROW = list(range(8, 11))
LEFT_EYE_TOP = list(range(11, 14))
RIGHT_EYE_TOP = list(range(17, 20))
LEFT_EYE_BOTTOM = list(range(14, 17))
RIGHT_EYE_BOTTOM = list(range(20, 23))
NOSE = list([23])
FACE_ALL_LIST = [
    FACE_BOTTOM_OUTLINE, MOUTH_OUTLINE, LEFT_EYEBROW, RIGHT_EYEBROW,
    LEFT_EYE_TOP, RIGHT_EYE_TOP, LEFT_EYE_BOTTOM, RIGHT_EYE_BOTTOM, NOSE
]
LEFT_EYE_LIST = [LEFT_EYE_TOP, LEFT_EYE_BOTTOM]
RIGHT_EYE_LIST = [RIGHT_EYE_TOP, RIGHT_EYE_BOTTOM]
FACE_OUTLINE_LIST = [FACE_BOTTOM_OUTLINE, LEFT_EYEBROW, RIGHT_EYEBROW]
MOUTH_OUTLINE_LIST = [MOUTH_OUTLINE]
NOSE_LIST = [NOSE]


def detect_vis2(img, face_score_threshold: float,
                landmark_score_threshold: float,
                detector: anime_face_detector.LandmarkDetector):
    preds = detector(img)
    res = img.copy()

    for pred in preds:
        # vis box
        box = pred['bbox']
        box, score = box[:4], box[4]
        box = np.round(box).astype(int)
        lt = max(2, int(3 * (box[2:] - box[:2]).max() / 256))  #line_thickness

        cv2.rectangle(res, tuple(box[:2]), tuple(box[2:]), (0, 255, 0), lt)
        cv2.putText(res,
                    str(round(score * 100, 2)) + '%', (box[0], box[1] - 2),
                    0,
                    lt / 2, [225, 255, 255],
                    thickness=max(lt, 1),
                    lineType=cv2.LINE_AA)

        # vis landmark
        pred_pts = pred['keypoints']
        th_pred_pts = []
        # points num = 28
        for i in range(28):
            *pt, score = pred_pts[i]
            pt = tuple(np.round(pt).astype(int))
            if score < landmark_score_threshold:
                color = (0, 255, 255)
                th_pred_pts.append(None)
            else:
                color = (0, 0, 255)
                th_pred_pts.append(np.array(pt, np.int32))
            cv2.circle(res, pt, lt, color, cv2.FILLED)

        # for each parts
        #print(th_pred_pts)
        for points in FACE_ALL_LIST:
            #print(points)
            pts = [th_pred_pts[_] for _ in points]
            #print(pts)

            # pass none group
            hasNone = False
            for _ in pts:
                if isinstance(_, type(None)):
                    hasNone = True
                    break
            if hasNone:
                break

            closed = False
            if points in FACE_OUTLINE_LIST:
                color = (0, 170, 255)
            elif points in NOSE_LIST:
                color = (255, 30, 30)
            elif points in LEFT_EYE_LIST:
                color = (50, 220, 255)
            elif points in RIGHT_EYE_LIST:
                color = (50, 220, 255)
            elif points in MOUTH_OUTLINE_LIST:
                color = (255, 30, 30)
                closed = True
            else:
                raise (Exception(f'unknow points {points}'))
            cv2.polylines(res, np.array([pts], np.int32), closed, color, lt)

    return res

In [None]:
#@title ArgumentParser

device = 'cpu'  #@param ['cuda:0', 'cpu']
model = 'yolov3'  #@param ['yolov3', 'faster-rcnn']
visualize_func = 'detect_vis2'  #@param ['detect_vis2', 'detect_vis']

detector = anime_face_detector.create_detector('yolov3', device=device)
detect = detect_vis2 if visualize_func == 'detect_vis2' else detect_vis

# image test

In [None]:
!wget -q https://raw.githubusercontent.com/hysts/anime-face-detector/main/assets/input.jpg -O input.jpg

In [None]:
import matplotlib.pyplot as plt

image = cv2.imread('input.jpg')
res = detect(image,
             face_score_threshold=0.5,
             landmark_score_threshold=0.3,
             detector=detector)

plt.figure(figsize=(30, 30))
plt.imshow(res[:, :, ::-1])
plt.axis('off')
plt.show()

# video test

In [None]:
#@title detect_from_video function
# from https://github.com/aim-uofa/AdelaiDet/blob/master/demo/predictor.py
def detect_from_video(video,
                      face_score_threshold,
                      landmark_score_threshold,
                      detector=detector):
    def process_predictions(frame):
        return detect(frame,
                      face_score_threshold,
                      landmark_score_threshold,
                      detector=detector)

    def _frame_from_video(video):
        while video.isOpened():
            success, frame = video.read()
            if success:
                yield frame
            else:
                break

    frame_gen = _frame_from_video(video)
    for frame in frame_gen:
        yield process_predictions(frame)

In [None]:
#https://www.sakugabooru.com/post/show/43401
!wget -q https://www.sakugabooru.com/data/f47f699b9c5afc5a849be4b974f40975.mp4 -O input_vid.mp4

In [None]:
from moviepy.editor import VideoFileClip

# skip frame
speedx = 2
clip = VideoFileClip('input_vid.mp4').subfx(lambda c: c.speedx(speedx))
clip.write_videofile('input_vid_clip.mp4')
clip.close()

In [None]:
from tqdm.auto import tqdm

video = cv2.VideoCapture('input_vid_clip.mp4')
width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
frames_per_second = video.get(cv2.CAP_PROP_FPS) // speedx
num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
output_file = cv2.VideoWriter(
    filename='/content/anime-face-detector/output_vid.mp4',
    # some installation of opencv may not support x264 (due to its license),
    # you can try other format (e.g. MPEG)
    fourcc=cv2.VideoWriter_fourcc(*'MPEG'),
    fps=float(frames_per_second),
    frameSize=(width, height),
    isColor=True,
)

# colabCPU 3.27s/it
for vis_frame in tqdm(detect_from_video(video,
                                        face_score_threshold=0.5,
                                        landmark_score_threshold=0.3,
                                        detector=detector),
                      total=num_frames):
    output_file.write(vis_frame)

video.release()
output_file.release()