In [None]:
%tensorflow_version 1.x
# !git clone https://github.com/justinpinkney/stylegan2
# %cd /content/stylegan2
# !nvcc test_nvcc.cu -o test_nvcc -run
# !mkdir aligned
# !mkdir raw
# !mkdir generated

# %cd /content/
# !git clone https://github.com/AliaksandrSiarohin/first-order-model

TensorFlow 1.x selected.


In [None]:
import argparse
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision.utils as vutils
from torchvision.utils import save_image
from torchvision import transforms
from PIL import Image as PILImage
from IPython.display import Image as IPImage
import glob
import cv2

%cd /content/stylegan2/
import os, os.path
import pretrained_networks # requires stylegan2 (https://github.com/justinpinkney/stylegan2)
import dnnlib
import dnnlib.tflib as tflib
from pathlib import Path

# requires first-order-model (https://github.com/AliaksandrSiarohin/first-order-model)

nz = 100
ngf = 64
IMG_SIZE = 128
IMG_CHANNELS = 3

class Generator(nn.Module):
	def __init__(self):
		super(Generator, self).__init__()
		
		self.net = nn.Sequential(
			# input z = noise, latent vector
            nn.ConvTranspose2d(nz, ngf*16, 4, 1, 0, bias=False),
            nn.BatchNorm2d(ngf*16),
            nn.ReLU(True),
            # ngf*8 x 4 x 4
            nn.ConvTranspose2d(ngf*16, ngf*8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf*8),
            nn.ReLU(True),
            # ngf*4 x 8 x 8
            nn.ConvTranspose2d(ngf*8, ngf*4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf*4),
            nn.ReLU(True),
            # ngf*2 x 16 x 16
            nn.ConvTranspose2d(ngf*4, ngf*2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf*2),
            nn.ReLU(True),
            # ngf x 32 x 32
			nn.ConvTranspose2d(ngf*2, ngf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(True),
			# ngf x N x N
            nn.ConvTranspose2d(ngf, IMG_CHANNELS, 4, 2, 1, bias=False),
            nn.Tanh()
            # nc x 64 x 64
		)

	def forward(self, x):
		return self.net(x)

# REQUIRES GPU TO RUN -> DEPLOY TO GPU SERVER.

class Cartoonizer():

    def __init__(self):

        self.device = ("cuda:0" if torch.cuda.is_available() else "cpu" )
        self.netG = Generator().to(self.device)

        blended_url = "https://drive.google.com/uc?id=1H73TfV5gQ9ot7slSed_l-lim9X7pMRiU" 
        ffhq_url = "http://d36zk2xti64re0.cloudfront.net/stylegan2/networks/stylegan2-ffhq-config-f.pkl"

        _, _, self.G_blended = pretrained_networks.load_networks(blended_url) # load cartoon generator model
        _, _, self.G = pretrained_networks.load_networks(ffhq_url)

    def generate_face(self, model_path, display_image=False):
        print("generating face...")
        self.netG.load_state_dict(torch.load(model_path))
        noise = torch.randn(1, nz, 1, 1, device=self.device)
        face = self.netG(noise)[0].detach().cpu().permute(1,2,0)
        face = vutils.make_grid(face, padding=2, normalize=True)
        if display_image:
            plt.imshow(face)
            plt.show()

        face = face.numpy()
        im = PILImage.fromarray((face*255).astype(np.uint8))
        im.save('/content/stylegan2/raw/raw_face.jpg')
        
    def resize_face(self, size, raw_face_path, aligned_dir):
        img = PILImage.open(raw_face_path)
        img = img.resize(size)
        width, height = img.size
        cX = width/2
        cY = height/2

        # left = cX - 128
        # top = cY - 128
        # right = cX + 128
        # bottom = cY + 128

        # resize face to 256x
        # img = img.crop((left, top, right, bottom))
        # save face image to aligned dir.
        print(img.size)
        img.save(f"{aligned_dir}/face_aligned.jpg")
        # dislay generated face.
        img = IPImage(filename=f"{aligned_dir}/face_aligned.jpg")
        display(img)

    def cartoonize_face(self, source_img, display_image=False):
        # if display_image:
        #     source_img = IPImage(filename=source_img, width=256)
        #     display(source_img)

        %cd /content/stylegan2
        # !python align_images.py raw aligned
        # project face image into n dim latent vector (z)
        !python project_images.py --num-steps 500 aligned generated

        latent_dir = Path("generated")
        latents = latent_dir.glob("*.npy")
        for latent_file in latents:
            # pass latent vector generated from face image through stylegan2 Generator network.
            latent = np.load(latent_file)
            latent = np.expand_dims(latent, axis=0)
            synthesis_kwargs = dict(output_transform=dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=False), minibatch_size=8)
            images = self.G_blended.components.synthesis.run(latent, randomize_noise=False, **synthesis_kwargs)
            PILImage.fromarray(images.transpose((0,2,3,1))[0], 'RGB').save(latent_file.parent / (f"{latent_file.stem}-cartoon.jpg"))

        cartoon_img = IPImage(filename="/content/stylegan2/generated/face_aligned-cartoon.jpg", width=256)
        if display_image:
            display(cartoon_img)

    def generate_cartoonize_face(self, model_path, raw_face_path, aligned_dir, display_image=False):
        # generate netG face, saves as raw/raw_face.jpg.
        self.generate_face(model_path, display_image=display_image)
        self.resize_face((256,256), raw_face_path, aligned_dir)
        self.cartoonize_face(source_img=f"{aligned_dir}/face_aligned.jpg", display_image=True)

# if __name__ == "__main__":
Cartoonizer = Cartoonizer()
Cartoonizer.generate_cartoonize_face(model_path="/content/models/netG_EPOCHS=12_IMGSIZE=128.pth", raw_face_path="/content/stylegan2/raw/raw_face.jpg", aligned_dir="/content/stylegan2/aligned", display_image=True)

%cd '/content/first-order-model'

from demo import load_checkpoints # demo is from the first-order-model
generator, kp_detector = load_checkpoints(config_path='/content/first-order-model/config/vox-256.yaml', checkpoint_path="/content/vox-cpk.pth.tar")

import imageio
import matplotlib.animation as animation
from skimage.transform import resize
from IPython.display import HTML

from demo import make_animation
from skimage import img_as_ubyte

import warnings
warnings.filterwarnings('ignore')

sample_image = imageio.imread('/content/stylegan2/generated/face_aligned-cartoon.jpg')
reader = imageio.get_reader('/content/sample_video_30fps.mp4')

# resize source cartoon image being animated
sample_image = resize(sample_image, (256,256))[..., :3]

fps = reader.get_meta_data()['fps']
sample_video = []

try:
    for im in reader:
        # get every frame of the driver video
        sample_video.append(im)
except RuntimeError:
    pass
reader.close()

# resize every frame in driver video
sample_video = [resize(frame, (256,256))[..., :3] for frame in sample_video]

# generate the animation -> array of every frame of animation.
predictions = make_animation(sample_image, sample_video, generator, kp_detector, relative=True)

def display_anim(sample_image, sample_video, generated=None):
    # display: sample image - sample video - animated video.
    # if no animated video, width = 8. if animated video, width = 12.
    fig = plt.figure(figsize=(12 + (12 * (generated is not None)), 6))

    # array containing all image/video columns
    imgs = []

    for i in range(len(sample_video)):
        # cols = [sample_image] # add sample image frame column
        cols = []
        cols.append(sample_video[i]) # add each sample video frame column
        if generated[i] is not None:
            cols.append(generated[i]) # add generated frame column

        # concat all three frames
        img = plt.imshow(np.concatenate(cols, axis=1), animated=True) # final display image, as numpy array
        plt.axis('off')
        # save all three frames as one.
        imgs.append([img])
    
    # generate animation with all frames
    anim = animation.ArtistAnimation(fig, imgs, interval=33.333, repeat_delay=1000)
    plt.close()
    return anim

HTML(display_anim(sample_image, sample_video, predictions).to_html5_video())


/content/stylegan2
/content/first-order-model
/content/first-order-model


In [None]:
import imageio
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from skimage.transform import resize
from IPython.display import HTML

from demo import make_animation
from skimage import img_as_ubyte

import warnings
warnings.filterwarnings('ignore')

sample_image = imageio.imread('/content/stylegan2/generated/face_aligned-cartoon.jpg')
reader = imageio.get_reader('/content/sample_video_30fps.mp4')

# resize source cartoon image being animated
sample_image = resize(sample_image, (256,256))[..., :3]

fps = reader.get_meta_data()['fps']
sample_video = []

try:
    for im in reader:
        # get every frame of the driver video
        sample_video.append(im)
except RuntimeError:
    pass
reader.close()

# resize every frame in driver video
sample_video = [resize(frame, (256,256))[..., :3] for frame in sample_video]

# generate the animation -> array of every frame of animation.
predictions = make_animation(sample_image, sample_video, generator, kp_detector, relative=True)

def display(sample_image, sample_video, generated=None):
    # display: sample image - sample video - animated video.
    # if no animated video, width = 8. if animated video, width = 12.
    fig = plt.figure(figsize=(12 + (12 * (generated is not None)), 6))

    # array containing all image/video columns
    imgs = []

    for i in range(len(sample_video)):
        # cols = [sample_image] # add sample image frame column
        cols = []
        cols.append(sample_video[i]) # add each sample video frame column
        if generated[i] is not None:
            cols.append(generated[i]) # add generated frame column

        # concat all three frames
        img = plt.imshow(np.concatenate(cols, axis=1), animated=True) # final display image, as numpy array
        plt.axis('off')
        # save all three frames as one.
        imgs.append([img])
    
    # generate animation with all frames
    anim = animation.ArtistAnimation(fig, imgs, interval=33.333, repeat_delay=1000)
    plt.close()
    return anim

HTML(display(sample_image, sample_video, predictions).to_html5_video())


100%|██████████| 552/552 [00:29<00:00, 19.00it/s]
