In [1]:
import cv2
import torch
import time
import os

from utils.inference.image_processing import crop_face
from utils.inference.video_processing import read_video, get_final_video_frame, add_audio_from_another_video, face_enhancement, face_enhancement_multi, get_final_video_multi
from utils.inference.core import model_inference, model_inference_multi

from network.AEI_Net import AEI_Net
from coordinate_reg.image_infer import Handler
from insightface_func.face_detect_crop_multi import Face_detect_crop
from arcface_model.iresnet import iresnet100
from models.pix2pix_model import Pix2PixModel
from models.config_sr import TestOptions



### Load Models

In [14]:
app = Face_detect_crop(name='antelope', root='./insightface_func/models')
app.prepare(ctx_id= 0, det_thresh=0.6, det_size=(640,640))

# main model for generation
G = AEI_Net(c_id=512)
G.eval()
G.load_state_dict(torch.load('weights/G_0_035000_init_arch_arcface2.pth', map_location=torch.device('cpu')))
G = G.cuda()
G = G.half()

# arcface model to get face embedding
netArc = iresnet100(fp16=False)
netArc.load_state_dict(torch.load('arcface_model/backbone.pth'))
netArc=netArc.cuda()
netArc.eval()

# model to get face landmarks
handler = Handler('./coordinate_reg/model/2d106det', 0, ctx_id=0, det_size=640)

# model to make superres of face, set use_sr=True if you want to use super resolution or use_sr=False if you don't
use_sr = False

if use_sr:
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    torch.backends.cudnn.benchmark = True
    opt = TestOptions()
    #opt.which_epoch ='10_7'
    model = Pix2PixModel(opt)
    model.netG.train()

  and should_run_async(code)


input mean and std: 127.5 127.5
find model: ./insightface_func/models/antelope/glintr100.onnx recognition
find model: ./insightface_func/models/antelope/scrfd_10g_bnkps.onnx detection
set det-size: (640, 640)
loading ./coordinate_reg/model/2d106det 0
input mean and std: 127.5 127.5
find model: ./insightface_func/models/antelope/glintr100.onnx recognition
find model: ./insightface_func/models/antelope/scrfd_10g_bnkps.onnx detection
set det-size: (640, 640)


### Set here path to source image and video for faceswap

In [24]:
"""
choose not really long videos, coz it can take a lot of time othervise 
choose source image as a photo -- preferable a selfie of a person
"""

# path_to_video = 'examples/videos/video5_mod.mp4'
# source = cv2.imread('examples/images/elon_musk.jpg')
path_to_video = 'examples/videos/2person.mp4'
OUT_VIDEO_NAME = "examples/results/testing_multi.mp4"
crop_size = 224 # don't change this

  and should_run_async(code)


In [25]:
# check, if we can detect face on the source image

source1 = cv2.imread('examples/images/zak.jpg')
source2 = cv2.imread('examples/images/mark.jpeg')
source_images = [source1, source2]
# source_images = [source]

source = []
try:
    for source_image in source_images:     
        source.append(crop_face(source_image, app, crop_size)[0])
except TypeError:
    print("Bad source images")

  and should_run_async(code)


In [26]:
# read video

full_frames, fps = read_video(path_to_video)

# target1 = full_frames[0]
# target_images = [target1]
target1 = cv2.imread('examples/images/target4.png')
target2 = cv2.imread('examples/images/target5.png')
target_images = [target1, target2]

target = []
try:
    for target_image in target_images: 
        target.append(crop_face(target_image, app, crop_size)[0])
except TypeError:
    print("Bad target images")

  and should_run_async(code)


### Model Inference

In [27]:
START_TIME = time.time()

  and should_run_async(code)


In [28]:
final_frames_list, crop_frames_list, full_frames, tfm_array_list = model_inference_multi(full_frames,
                                                                    source,
                                                                    target,
                                                                    netArc,
                                                                    G,
                                                                    app, 
                                                                    crop_size=crop_size)

  and should_run_async(code)
100%|██████████| 264/264 [00:06<00:00, 38.09it/s]
264it [00:00, 5529.93it/s]
100%|██████████| 5/5 [00:01<00:00,  3.65it/s]
100%|██████████| 264/264 [00:00<00:00, 831928.07it/s]
264it [00:00, 5189.66it/s]
100%|██████████| 5/5 [00:01<00:00,  3.93it/s]
100%|██████████| 264/264 [00:00<00:00, 937592.09it/s]


In [29]:
if use_sr:
    final_frames_list = face_enhancement_multi(final_frames_list, model)

  and should_run_async(code)


In [30]:
get_final_video_multi(final_frames_list,
                      crop_frames_list,
                      full_frames,
                      tfm_array_list,
                      OUT_VIDEO_NAME,
                      fps, 
                      handler)

  and should_run_async(code)
100%|██████████| 264/264 [00:20<00:00, 12.59it/s]


In [31]:
add_audio_from_another_video(path_to_video, OUT_VIDEO_NAME, "audio")

  and should_run_async(code)


In [32]:
print(f'Full pipeline took {time.time() - START_TIME}')
print(f"Video saved with path {OUT_VIDEO_NAME}")

Full pipeline took 34.964576721191406
Video saved with path examples/results/testing_multi.mp4


  and should_run_async(code)
