In [1]:
import cv2
import torch
import time
import os

from utils.inference.image_processing import crop_face
from utils.inference.video_processing import read_video, get_final_video_frame, add_audio_from_another_video, face_enhancement
from utils.inference.core import model_inference

from network.AEI_Net import AEI_Net
from coordinate_reg.image_infer import Handler
from insightface_func.face_detect_crop_single import Face_detect_crop
from arcface_model.iresnet import iresnet100
from models.pix2pix_model import Pix2PixModel
from models.config_sr import TestOptions

  parsed = re.search('spade(\D+)(\d)x\d', config_text)
  return [atoi(c) for c in re.split('(\d+)', text)]


### Load Models

In [2]:
app = Face_detect_crop(name='antelope', root='./insightface_func/models')
app.prepare(ctx_id= 0, det_thresh=0.6, det_size=(640,640))

# main model for generation
G = AEI_Net(c_id=512)
G.eval()
G.load_state_dict(torch.load('weights/G_0_035000_init_arch_arcface2.pth', map_location=torch.device('cpu')))
G = G.cuda()
G = G.half()

# arcface model to get face embedding
netArc = iresnet100(fp16=False)
netArc.load_state_dict(torch.load('arcface_model/backbone.pth'))
netArc=netArc.cuda()
netArc.eval()

# model to get face landmarks
handler = Handler('./coordinate_reg/model/2d106det', 0, ctx_id=0, det_size=640)

# model to make superres of face, set use_sr=True if you want to use super resolution or use_sr=False if you don't
use_sr = False
if use_sr:
    os.environ['CUDA_VISIBLE_DEVICES'] = '0'
    torch.backends.cudnn.benchmark = True
    opt = TestOptions()
    #opt.which_epoch ='10_7'
    model = Pix2PixModel(opt)
    model.netG.train()

  and should_run_async(code)


input mean and std: 127.5 127.5
find model: ./insightface_func/models/antelope/glintr100.onnx recognition
find model: ./insightface_func/models/antelope/scrfd_10g_bnkps.onnx detection
set det-size: (640, 640)
loading ./coordinate_reg/model/2d106det 0
input mean and std: 127.5 127.5
find model: ./insightface_func/models/antelope/glintr100.onnx recognition
find model: ./insightface_func/models/antelope/scrfd_10g_bnkps.onnx detection
set det-size: (640, 640)
Network [LIPSPADEGenerator] was created. Total number of parameters: 72.2 million. To see the architecture, do print(network).
Load checkpoint from path:  weights/10_net_G.pth


### Set here path to source image and video for faceswap

In [3]:
"""
choose not really long videos, coz it can take a lot of time othervise 
choose source image as a photo -- preferable a selfie of a person
"""

path_to_video = 'examples/videos/dora_short.mp4'
source_full = cv2.imread('examples/images/elon_musk.jpg')
OUT_VIDEO_NAME = "examples/results/elon2dora.mp4"
crop_size = 224 # don't change this

  and should_run_async(code)


In [4]:
# check, if we can detect face on the source image

try:
    source = crop_face(source_full, app, crop_size)[0]
    source = source[:, :, ::-1]
    print("Everything is ok!")
except TypeError:
    print("Bad source image. Choose another one.")

Everything is ok!


  and should_run_async(code)


In [5]:
# read video

full_frames, fps = read_video(path_to_video)

  and should_run_async(code)


### Model Inference

In [6]:
START_TIME = time.time()

  and should_run_async(code)


In [7]:
final_frames, crop_frames, full_frames, tfm_array = model_inference(full_frames,
                                                                    source,
                                                                    [netArc],
                                                                    G,
                                                                    app, 
                                                                    crop_size=crop_size)

  and should_run_async(code)
100%|██████████| 262/262 [00:02<00:00, 102.30it/s]
262it [00:00, 4874.78it/s]
100%|██████████| 5/5 [00:06<00:00,  1.39s/it]
100%|██████████| 262/262 [00:00<00:00, 542135.00it/s]


In [8]:
if use_sr:
    final_frames = face_enhancement(final_frames, model)

  and should_run_async(code)
14it [00:07,  1.86it/s]


In [9]:
get_final_video_frame(final_frames,
                      crop_frames,
                      full_frames,
                      tfm_array,
                      OUT_VIDEO_NAME,
                      fps, 
                      handler)

  and should_run_async(code)
  final = (mask_t*swap_t + (1-mask_t)*frame).type(torch.uint8).squeeze().permute(1,2,0).cpu().detach().numpy()
100%|██████████| 262/262 [00:05<00:00, 47.31it/s]


In [10]:
add_audio_from_another_video(path_to_video, OUT_VIDEO_NAME, "audio")

  and should_run_async(code)


In [11]:
print(f'Full pipeline took {time.time() - START_TIME}')
print(f"Video saved with path {OUT_VIDEO_NAME}")

Full pipeline took 26.246086597442627
Video saved with path examples/results/elon2dora.mp4


  and should_run_async(code)
