# SimSwap
Reference: https://github.com/neuralchen/SimSwap

## Prepare code

In [None]:
%cd /content
!git clone https://github.com/woctezuma/SimSwap.git
%cd /content/SimSwap/
!git checkout upgrade-insightface

In [None]:
!pip install insightface==0.7.3 onnxruntime moviepy > /dev/null
!pip install googledrivedownloader > /dev/null
!pip install imageio==2.34.0 > /dev/null

## Prepare models

In [None]:
%cd /content/SimSwap

In [None]:
from google_drive_downloader import GoogleDriveDownloader

!wget -P ./arcface_model https://github.com/woctezuma/SimSwap-colab/releases/download/1.0/arcface_checkpoint.tar
!wget https://github.com/neuralchen/SimSwap/releases/download/1.0/checkpoints.zip
!wget -P ./parsing_model/checkpoint https://github.com/neuralchen/SimSwap/releases/download/1.0/79999_iter.pth
!wget https://github.com/neuralchen/SimSwap/releases/download/512_beta/512.zip

In [None]:
!wget --no-check-certificate \
 https://sh23tw.dm.files.1drv.com/y4mmGiIkNVigkSwOKDcV3nwMJulRGhbtHdkheehR5TArc52UjudUYNXAEvKCii2O5LAmzGCGK6IfleocxuDeoKxDZkNzDRSt4ZUlEt8GlSOpCXAFEkBwaZimtWGDRbpIGpb_pz9Nq5jATBQpezBS6G_UtspWTkgrXHHxhviV2nWy8APPx134zOZrUIbkSF6xnsqzs3uZ_SEX_m9Rey0ykpx9w \
 -O antelope.zip

In [None]:
!unzip ./checkpoints.zip  -d ./checkpoints

!unzip 512.zip -d ./checkpoints

!unzip antelope.zip -d ./insightface_func/models/

## Prepare data

### Download

In [None]:
%cd /content

!wget https://i.imgur.com/QYJOzy7.jpeg -O cpc_ackboo.jpg
!wget https://i.imgur.com/l5MGOws.jpeg -O starwars_meme.jpg

In [None]:
input_fname = '/content/person5.JPG'
output_fname = '/content/image3.jpg'

In [None]:
# input_fname = '/content/cpc_ackboo.jpg'
# output_fname = '/content/starwars_meme.jpg'

### Convert to JPG

Images should not be too large, hence the (arbitrary) limitation of 1024 length.

In [None]:
def get_new_size(img_size,
                 max_allowed_length = 1024):

  if any(max_allowed_length < sz for sz in img_size):
    long_length = max(img_size)
    ratio = max_allowed_length / long_length
  else:
    ratio = 1.0

  new_img_size = [
                  int(ratio*sz)
                  for sz in img_size
                  ]

  return tuple(new_img_size)

In [None]:
from PIL import Image

allow_resize = False

for fname in [input_fname, output_fname]:
  jpg_fname = fname.replace('.png', '.jpg')

  try:
    img = Image.open(fname)
  except FileNotFoundError:
    continue

  new_size = get_new_size(img.size, max_allowed_length = 1024)
  if allow_resize:
    print(f'Resizing from {img.size} to {new_size}')
    img.resize(new_size)

  print(f'Saving to {jpg_fname}')
  img.convert('RGB').save(jpg_fname)

jpg_input = input_fname.replace('.png', '.jpg')
jpg_output = output_fname.replace('.png', '.jpg')

In [None]:
!pip install facenet-pytorch opencv-python

## Run

### Single

In [None]:
%cd /content/SimSwap
%mkdir -p /content/output/single/

!python test_wholeimage_swapsingle.py \
 --no_simswaplogo \
 --use_mask \
 --crop_size 512 \
 --isTrain false  --name people \
 --Arc_path arcface_model/arcface_checkpoint.tar \
 --pic_a_path {jpg_input} \
 --pic_b_path {jpg_output} \
 --output_path /content/output/single/ > /dev/null


In [None]:
!pip install insightface opencv-python matplotlib

In [None]:
from ultralytics import YOLO
import cv2
import mediapipe as mp
import matplotlib.pyplot as plt

def face_landmark(image_path):
    # YOLOv8
    model = YOLO('yolov8n.pt')

    # ID of person in YOLO8
    person_class_id = [key for key, value in model.names.items() if value == 'person'][0]

    # mediapipe for face landmarks
    mp_face_mesh = mp.solutions.face_mesh
    face_mesh = mp_face_mesh.FaceMesh(static_image_mode=True, max_num_faces=1, min_detection_confidence=0.5)

    # FACE DETECTION BY yolov8
    image = cv2.imread(image_path)
    results = model(image)

    # boundingbox + landmark
    for result in results[0].boxes:
        if result.cls == person_class_id:  #just person
            x1, y1, x2, y2 = map(int, result.xyxy[0])

            # face cropping
            face_roi = image[y1:y2, x1:x2]

            # convert img to RGB for mediapipe
            rgb_face = cv2.cvtColor(face_roi, cv2.COLOR_BGR2RGB)

            # face landmark
            results_face_mesh = face_mesh.process(rgb_face)

            # 5 landmark of face
            left_eye_points = [133, 33]     # گوشه‌های چشم چپ
            right_eye_points = [362, 263]   # گوشه‌های چشم راست
            nose_tip = 1                    # نوک بینی
            left_mouth_corner = 61          # گوشه چپ لب
            right_mouth_corner = 291        # گوشه راست لب

            if results_face_mesh.multi_face_landmarks:
                for face_landmarks in results_face_mesh.multi_face_landmarks:
                    # center of face
                    center_x = (x1 + x2) // 2
                    center_y = (y1 + y2) // 2

                    # center of left eye
                    left_eye_center = [
                        sum(face_landmarks.landmark[idx].x for idx in left_eye_points) / 2,
                        sum(face_landmarks.landmark[idx].y for idx in left_eye_points) / 2
                    ]
                    left_eye_x = int(left_eye_center[0] * face_roi.shape[1]) + x1
                    left_eye_y = int(left_eye_center[1] * face_roi.shape[0]) + y1
                    cv2.circle(image, (left_eye_x, left_eye_y), 3, (0, 255, 0), -1)

                    # center of right eye
                    right_eye_center = [
                        sum(face_landmarks.landmark[idx].x for idx in right_eye_points) / 2,
                        sum(face_landmarks.landmark[idx].y for idx in right_eye_points) / 2
                    ]
                    right_eye_x = int(right_eye_center[0] * face_roi.shape[1]) + x1
                    right_eye_y = int(right_eye_center[1] * face_roi.shape[0]) + y1
                    cv2.circle(image, (right_eye_x, right_eye_y), 3, (0, 255, 0), -1)

                    # nose
                    nose = face_landmarks.landmark[nose_tip]
                    nose_x = int(nose.x * face_roi.shape[1]) + x1
                    nose_y = int(nose.y * face_roi.shape[0]) + y1
                    cv2.circle(image, (nose_x, nose_y), 3, (0, 255, 0), -1)

                    # left mouth
                    left_mouth = face_landmarks.landmark[left_mouth_corner]
                    left_mouth_x = int(left_mouth.x * face_roi.shape[1]) + x1
                    left_mouth_y = int(left_mouth.y * face_roi.shape[0]) + y1
                    cv2.circle(image, (left_mouth_x, left_mouth_y), 3, (0, 255, 0), -1)

                    # right mouth
                    right_mouth = face_landmarks.landmark[right_mouth_corner]
                    right_mouth_x = int(right_mouth.x * face_roi.shape[1]) + x1
                    right_mouth_y = int(right_mouth.y * face_roi.shape[0]) + y1
                    cv2.circle(image, (right_mouth_x, right_mouth_y), 3, (0, 255, 0), -1)

    results = [left_eye_x, left_eye_y, right_eye_x, right_eye_y, nose_x, nose_y, left_mouth_x, left_mouth_y, right_mouth_x, right_mouth_y]
    # plot image by landmarks
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    plt.axis('off')
    plt.show()

    return(results)


image_path = '/content/image1.png'
landmarks = face_landmark(image_path)

In [None]:
from google.colab import drive
drive.mount('/content/drive')
# !git clone https://github.com/hpc203/yolov8-face-landmarks-opencv-dnn

In [None]:
!python /content/drive/MyDrive/yolov8-face-landmarks-opencv-dnn/main.py --imgpath '/content/image2.png' --modelpath '/content/drive/MyDrive/yolov8-face-landmarks-opencv-dnn/weights/yolov8n-face.onnx'

In [None]:
import cv2
import numpy as np
import math
from google.colab.patches import cv2_imshow

class YOLOv8_face:
    def __init__(self, path, conf_thres=0.2, iou_thres=0.5):
        self.conf_threshold = conf_thres
        self.iou_threshold = iou_thres
        self.class_names = ['face']
        self.num_classes = len(self.class_names)
        # Initialize model
        self.net = cv2.dnn.readNet(path)
        self.input_height = 640
        self.input_width = 640
        self.reg_max = 16

        self.project = np.arange(self.reg_max)
        self.strides = (8, 16, 32)
        self.feats_hw = [(math.ceil(self.input_height / self.strides[i]), math.ceil(self.input_width / self.strides[i])) for i in range(len(self.strides))]
        self.anchors = self.make_anchors(self.feats_hw)

    def make_anchors(self, feats_hw, grid_cell_offset=0.5):
        """Generate anchors from features."""
        anchor_points = {}
        for i, stride in enumerate(self.strides):
            h, w = feats_hw[i]
            x = np.arange(0, w) + grid_cell_offset  # shift x
            y = np.arange(0, h) + grid_cell_offset  # shift y
            sx, sy = np.meshgrid(x, y)
            anchor_points[stride] = np.stack((sx, sy), axis=-1).reshape(-1, 2)
        return anchor_points

    def softmax(self, x, axis=1):
        x_exp = np.exp(x)
        x_sum = np.sum(x_exp, axis=axis, keepdims=True)
        s = x_exp / x_sum
        return s

    def resize_image(self, srcimg, keep_ratio=True):
        top, left, newh, neww = 0, 0, self.input_width, self.input_height
        if keep_ratio and srcimg.shape[0] != srcimg.shape[1]:
            hw_scale = srcimg.shape[0] / srcimg.shape[1]
            if hw_scale > 1:
                newh, neww = self.input_height, int(self.input_width / hw_scale)
                img = cv2.resize(srcimg, (neww, newh), interpolation=cv2.INTER_AREA)
                left = int((self.input_width - neww) * 0.5)
                img = cv2.copyMakeBorder(img, 0, 0, left, self.input_width - neww - left, cv2.BORDER_CONSTANT, value=(0, 0, 0))
            else:
                newh, neww = int(self.input_height * hw_scale), self.input_width
                img = cv2.resize(srcimg, (neww, newh), interpolation=cv2.INTER_AREA)
                top = int((self.input_height - newh) * 0.5)
                img = cv2.copyMakeBorder(img, top, self.input_height - newh - top, 0, 0, cv2.BORDER_CONSTANT, value=(0, 0, 0))
        else:
            img = cv2.resize(srcimg, (self.input_width, self.input_height), interpolation=cv2.INTER_AREA)
        return img, newh, neww, top, left

    def detect(self, srcimg):
        input_img, newh, neww, padh, padw = self.resize_image(cv2.cvtColor(srcimg, cv2.COLOR_BGR2RGB))
        scale_h, scale_w = srcimg.shape[0] / newh, srcimg.shape[1] / neww
        input_img = input_img.astype(np.float32) / 255.0

        blob = cv2.dnn.blobFromImage(input_img)
        self.net.setInput(blob)
        outputs = self.net.forward(self.net.getUnconnectedOutLayersNames())

        det_bboxes, det_conf, det_classid, landmarks = self.post_process(outputs, scale_h, scale_w, padh, padw)
        return det_bboxes, det_conf, det_classid, landmarks

    def post_process(self, preds, scale_h, scale_w, padh, padw):
        bboxes, scores, landmarks = [], [], []
        for i, pred in enumerate(preds):
            stride = int(self.input_height / pred.shape[2])
            pred = pred.transpose((0, 2, 3, 1))

            box = pred[..., :self.reg_max * 4]
            cls = 1 / (1 + np.exp(-pred[..., self.reg_max * 4:-15])).reshape((-1, 1))
            kpts = pred[..., -15:].reshape((-1, 15))

            tmp = box.reshape(-1, 4, self.reg_max)
            bbox_pred = self.softmax(tmp, axis=-1)
            bbox_pred = np.dot(bbox_pred, self.project).reshape((-1, 4))

            bbox = self.distance2bbox(self.anchors[stride], bbox_pred, max_shape=(self.input_height, self.input_width)) * stride
            kpts[:, 0::3] = (kpts[:, 0::3] * 2.0 + (self.anchors[stride][:, 0].reshape((-1, 1)) - 0.5)) * stride
            kpts[:, 1::3] = (kpts[:, 1::3] * 2.0 + (self.anchors[stride][:, 1].reshape((-1, 1)) - 0.5)) * stride
            kpts[:, 2::3] = 1 / (1 + np.exp(-kpts[:, 2::3]))

            bbox -= np.array([[padw, padh, padw, padh]])
            bbox *= np.array([[scale_w, scale_h, scale_w, scale_h]])
            kpts -= np.tile(np.array([padw, padh, 0]), 5).reshape((1, 15))
            kpts *= np.tile(np.array([scale_w, scale_h, 1]), 5).reshape((1, 15))

            bboxes.append(bbox)
            scores.append(cls)
            landmarks.append(kpts)

        bboxes = np.concatenate(bboxes, axis=0)
        scores = np.concatenate(scores, axis=0)
        landmarks = np.concatenate(landmarks, axis=0)

        bboxes_wh = bboxes.copy()
        bboxes_wh[:, 2:4] = bboxes[:, 2:4] - bboxes[:, 0:2]  # xywh
        classIds = np.argmax(scores, axis=1)
        confidences = np.max(scores, axis=1)

        mask = confidences > self.conf_threshold
        bboxes_wh = bboxes_wh[mask]
        confidences = confidences[mask]
        classIds = classIds[mask]
        landmarks = landmarks[mask]

        indices = cv2.dnn.NMSBoxes(bboxes_wh.tolist(), confidences.tolist(), self.conf_threshold, self.iou_threshold).flatten()
        if len(indices) > 0:
            mlvl_bboxes = bboxes_wh[indices]
            confidences = confidences[indices]
            classIds = classIds[indices]
            landmarks = landmarks[indices]
            return mlvl_bboxes, confidences, classIds, landmarks
        else:
            print('nothing detect')
            return np.array([]), np.array([]), np.array([]), np.array([])

    def distance2bbox(self, points, distance, max_shape=None):
        x1 = points[:, 0] - distance[:, 0]
        y1 = points[:, 1] - distance[:, 1]
        x2 = points[:, 0] + distance[:, 2]
        y2 = points[:, 1] + distance[:, 3]
        if max_shape is not None:
            x1 = np.clip(x1, 0, max_shape[1])
            y1 = np.clip(y1, 0, max_shape[0])
            x2 = np.clip(x2, 0, max_shape[1])
            y2 = np.clip(y2, 0, max_shape[0])
        return np.stack([x1, y1, x2, y2], axis=-1)

    def draw_detections(self, image, boxes, scores, kpts):
        for box, score, kp in zip(boxes, scores, kpts):
            x, y, w, h = box.astype(int)
            # Draw rectangle
            cv2.rectangle(image, (x, y), (x + w, y + h), (0, 0, 255), thickness=3)
            cv2.putText(image, "face:" + str(round(score, 2)), (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), thickness=2)
            for i in range(5):
                cv2.circle(image, (int(kp[i * 3]), int(kp[i * 3 + 1])), 4, (0, 255, 0), thickness=-1)
        return image


def run_face_detection(img_path, model_path):
    yolo_face_detector = YOLOv8_face(model_path)
    image = cv2.imread(img_path)
    boxes, scores, classIds, landmarks = yolo_face_detector.detect(image)
    output_image = yolo_face_detector.draw_detections(image.copy(), boxes, scores, landmarks)
    cv2_imshow(output_image)
    cv2.waitKey(0)
    cv2.destroyAllWindows()

img_path = '/content/image2.png'
model_path = '/content/drive/MyDrive/yolov8-face-landmarks-opencv-dnn/weights/yolov8n-face.onnx'
run_face_detection(img_path, model_path)

### Multi

In [None]:
%cd /content/SimSwap
%mkdir -p /content/output/multi/

!python test_wholeimage_swapmulti.py \
 --no_simswaplogo \
 --use_mask \
 --crop_size 512 \
 --isTrain false  --name people \
 --Arc_path arcface_model/arcface_checkpoint.tar \
 --pic_a_path {jpg_input} \
 --pic_b_path {jpg_output} \
 --output_path /content/output/multi/ > /dev/null
