Skip to content

Commit

Permalink
Merge pull request #8 from yzhou359/main
Browse files Browse the repository at this point in the history
Fix bugs for cartoon animation + wine solution for linux
  • Loading branch information
yzhou359 committed Feb 23, 2021
2 parents 0a8d780 + 3600eb3 commit 5d367e9
Show file tree
Hide file tree
Showing 6 changed files with 108 additions and 66 deletions.
21 changes: 18 additions & 3 deletions README.md
Expand Up @@ -50,6 +50,15 @@ sudo apt-get install ffmpeg
```
pip install -r requirements.txt
```
- `winehq-stable` for cartoon face warping in Ubuntu (https://wiki.winehq.org/Ubuntu). Tested on Ubuntu16.04, wine==5.0.3.
```
sudo dpkg --add-architecture i386
wget -nc https://dl.winehq.org/wine-builds/winehq.key
sudo apt-key add winehq.key
sudo apt-add-repository 'deb https://dl.winehq.org/wine-builds/ubuntu/ xenial main'
sudo apt update
sudo apt install --install-recommends winehq-stable
```

## Pre-trained Models

Expand Down Expand Up @@ -96,9 +105,14 @@ to amply lip motion (in x/y-axis direction) and head motion displacements, defau
| Image | ![img](examples_cartoon/wilk_fullbody.jpg) | ![img](examples_cartoon/roy_full.png) | ![img](examples_cartoon/sketch.png) | ![img](examples_cartoon/color.jpg) | ![img](examples_cartoon/cartoonM.png) | ![img](examples_cartoon/danbooru1.jpg) |

```
python main_end2end_cartoon.py --jpg <cartoon_puppet_name>
python main_end2end_cartoon.py --jpg <cartoon_puppet_name_with_extension> --jpg_bg <puppet_background_with_extension>
```

- `--jpg_bg` takes a same-size image as the background image to create the animation, such as the puppet's body, the overall fixed background image. If you want to use the background, make sure the puppet face image (i.e. `--jpg` image) is in `png` format and is transparent on the non-face area. If you don't need any background, please also create a same-size image (e.g. a pure white image) to hold the argument place.

- use addition args `--amp_lip_x <x> --amp_lip_y <y> --amp_pos <pos>`
to amply lip motion (in x/y-axis direction) and head motion displacements, default values are `<x>=2., <y>=2., <pos>=.5`

- create your own puppets (ToDo...)

## Train
Expand Down Expand Up @@ -130,8 +144,9 @@ Todo...

We would like to thank Timothy Langlois for the narration, and
[Kaizhi Qian](https://scholar.google.com/citations?user=uEpr4C4AAAAJ&hl=en)
for the help with the [voice conversion module](https://auspicious3000.github.io/icassp-2020-demo/). We
thank Daichi Ito for sharing the caricature image and Dave Werner
for the help with the [voice conversion module](https://auspicious3000.github.io/icassp-2020-demo/).
We thank [Jakub Fiser](https://research.adobe.com/person/jakub-fiser/) for implementing the real-time GPU version of the triangle morphing algorithm.
We thank Daichi Ito for sharing the caricature image and Dave Werner
for Wilk, the gruff but ultimately lovable puppet.

This research is partially funded by NSF (EAGER-1942069)
Expand Down
Binary file added examples_cartoon/wilk_bg.jpg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
88 changes: 57 additions & 31 deletions main_end2end_cartoon.py
Expand Up @@ -21,10 +21,12 @@
GEN_AUDIO = True
GEN_FLS = True

DEMO_CH = 'danbooru1'
DEMO_CH = 'wilk.png'

parser = argparse.ArgumentParser()
parser.add_argument('--jpg', type=str, default=DEMO_CH)
parser.add_argument('--jpg', type=str, required=True, help='Puppet image name to animate (with filename extension), e.g. wilk.png')
parser.add_argument('--jpg_bg', type=str, required=True, help='Puppet image background (with filename extension), e.g. wilk_bg.jpg')
parser.add_argument('--out', type=str, default='out.mp4')

parser.add_argument('--load_AUTOVC_name', type=str, default='examples/ckpt/ckpt_autovc.pth')
parser.add_argument('--load_a2l_G_name', type=str, default='examples/ckpt/ckpt_speaker_branch.pth') #ckpt_audio2landmark_g.pth') #
Expand All @@ -33,8 +35,8 @@

parser.add_argument('--amp_lip_x', type=float, default=2.0)
parser.add_argument('--amp_lip_y', type=float, default=2.0)
parser.add_argument('--amp_pos', type=float, default=0.8)
parser.add_argument('--reuse_train_emb_list', default=['45hn7-LXDX8']) # ['E_kmpT-EfOg']) # ['E_kmpT-EfOg']) # ['45hn7-LXDX8'])
parser.add_argument('--amp_pos', type=float, default=0.5)
parser.add_argument('--reuse_train_emb_list', type=str, nargs='+', default=[]) # ['E_kmpT-EfOg']) # ['E_kmpT-EfOg']) # ['45hn7-LXDX8'])


parser.add_argument('--add_audio_in', default=False, action='store_true')
Expand All @@ -61,17 +63,25 @@

opt_parser = parser.parse_args()

DEMO_CH = opt_parser.jpg
DEMO_CH = opt_parser.jpg.split('.')[0]

shape_3d = np.loadtxt('examples_cartoon/{}_face_close_mouth.txt'.format(opt_parser.jpg))
shape_3d = np.loadtxt('examples_cartoon/{}_face_close_mouth.txt'.format(DEMO_CH))

''' STEP 3: Generate audio data as input to audio branch '''
au_data = []
au_emb = []
ains = glob.glob1('examples', '*.wav')
ains = [item for item in ains if item is not 'tmp.wav']
ains.sort()
for ain in ains:
os.system('ffmpeg -y -loglevel error -i examples/{} -ar 16000 examples/tmp.wav'.format(ain))
shutil.copyfile('examples/tmp.wav', 'examples/{}'.format(ain))

# au embedding
from thirdparty.resemblyer_util.speaker_emb import get_spk_emb
me, ae = get_spk_emb('examples/{}'.format(ain))
au_emb.append(me.reshape(-1))

print('Processing audio file', ain)
c = AutoVC_mel_Convertor('examples')
au_data_i = c.convert_single_wav_to_autovc_input(audio_filename=os.path.join('examples', ain),
Expand Down Expand Up @@ -112,7 +122,10 @@
''' STEP 4: RUN audio->landmark network'''
from src.approaches.train_audio2landmark import Audio2landmark_model
model = Audio2landmark_model(opt_parser, jpg_shape=shape_3d)
model.test()
if(len(opt_parser.reuse_train_emb_list) == 0):
model.test(au_emb=au_emb)
else:
model.test(au_emb=None)
print('finish gen fls')

''' STEP 5: de-normalize the output to the original image scale '''
Expand Down Expand Up @@ -176,27 +189,40 @@

os.remove(os.path.join('examples_cartoon', fls_names[i]))

# # ==============================================
# # Step 4 : Vector art morphing (only work in WINDOWS)
# # ==============================================
# warp_exe = os.path.join(os.getcwd(), 'facewarp', 'facewarp.exe')
# import os
#
# if (os.path.exists(os.path.join(output_dir, 'output'))):
# shutil.rmtree(os.path.join(output_dir, 'output'))
# os.mkdir(os.path.join(output_dir, 'output'))
# os.chdir('{}'.format(os.path.join(output_dir, 'output')))
# print(os.getcwd())
#
# os.system('{} {} {} {} {} {}'.format(
# warp_exe,
# os.path.join('examples_cartoon', DEMO_CH+'.png'),
# os.path.join(output_dir, 'triangulation.txt'),
# os.path.join(output_dir, 'reference_points.txt'),
# os.path.join(output_dir, 'warped_points.txt'),
# # os.path.join(ROOT_DIR, 'puppets', sys.argv[6]),
# '-novsync -dump'))
# os.system('ffmpeg -y -r 62.5 -f image2 -i "%06d.tga" -i {} -shortest {}'.format(
# ain,
# os.path.join(output_dir, sys.argv[8])
# ))
# ==============================================
# Step 4 : Vector art morphing
# ==============================================
warp_exe = os.path.join(os.getcwd(), 'facewarp', 'facewarp.exe')
import os

if (os.path.exists(os.path.join(output_dir, 'output'))):
shutil.rmtree(os.path.join(output_dir, 'output'))
os.mkdir(os.path.join(output_dir, 'output'))
os.chdir('{}'.format(os.path.join(output_dir, 'output')))
cur_dir = os.getcwd()
print(cur_dir)

if(os.name == 'nt'):
''' windows '''
os.system('{} {} {} {} {} {}'.format(
warp_exe,
os.path.join(cur_dir, '..', '..', opt_parser.jpg),
os.path.join(cur_dir, '..', 'triangulation.txt'),
os.path.join(cur_dir, '..', 'reference_points.txt'),
os.path.join(cur_dir, '..', 'warped_points.txt'),
os.path.join(cur_dir, '..', '..', opt_parser.jpg_bg),
'-novsync -dump'))
else:
''' linux '''
os.system('wine {} {} {} {} {} {}'.format(
warp_exe,
os.path.join(cur_dir, '..', '..', opt_parser.jpg),
os.path.join(cur_dir, '..', 'triangulation.txt'),
os.path.join(cur_dir, '..', 'reference_points.txt'),
os.path.join(cur_dir, '..', 'warped_points.txt'),
os.path.join(cur_dir, '..', '..', opt_parser.jpg_bg),
'-novsync -dump'))
os.system('ffmpeg -y -r 62.5 -f image2 -i "%06d.tga" -i {} -pix_fmt yuv420p -vf "pad=ceil(iw/2)*2:ceil(ih/2)*2" -shortest -strict -2 {}'.format(
os.path.join(cur_dir, '..', '..', '..', 'examples', ain),
os.path.join(cur_dir, '..', 'out.mp4')
))
43 changes: 22 additions & 21 deletions quick_demo.ipynb
Expand Up @@ -6,7 +6,8 @@
"name": "quick_demo.ipynb",
"provenance": [],
"collapsed_sections": [],
"authorship_tag": "ABX9TyPYmqKJqHGxAbsAVY62zkIy",
"toc_visible": true,
"authorship_tag": "ABX9TyOYW4P15IPg+x69aFu7awQb",
"include_colab_link": true
},
"kernelspec": {
Expand Down Expand Up @@ -53,7 +54,7 @@
"cell_type": "code",
"metadata": {
"id": "yB-ixde4R3nO",
"outputId": "ec2f71e3-66c3-4af9-cd6c-6dce38958f3c",
"outputId": "3014143b-2a49-439a-ce4a-54e9aa9589e7",
"colab": {
"base_uri": "https://localhost:8080/"
}
Expand All @@ -68,15 +69,15 @@
{
"output_type": "stream",
"text": [
"Sat Nov 7 02:37:34 2020 \n",
"Tue Nov 10 19:18:06 2020 \n",
"+-----------------------------------------------------------------------------+\n",
"| NVIDIA-SMI 418.67 Driver Version: 418.67 CUDA Version: 10.1 |\n",
"|-------------------------------+----------------------+----------------------+\n",
"| GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
"| Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. |\n",
"|===============================+======================+======================|\n",
"| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n",
"| N/A 58C P8 11W / 70W | 0MiB / 15079MiB | 0% Default |\n",
"| 0 Tesla P4 Off | 00000000:00:04.0 Off | 0 |\n",
"| N/A 40C P8 7W / 75W | 0MiB / 7611MiB | 0% Default |\n",
"+-------------------------------+----------------------+----------------------+\n",
" \n",
"+-----------------------------------------------------------------------------+\n",
Expand Down Expand Up @@ -111,7 +112,7 @@
"source": [
"print(subprocess.getoutput('ffmpeg'))"
],
"execution_count": 2,
"execution_count": null,
"outputs": [
{
"output_type": "stream",
Expand Down Expand Up @@ -158,7 +159,7 @@
"source": [
"!git clone https://github.com/yzhou359/MakeItTalk"
],
"execution_count": 3,
"execution_count": null,
"outputs": [
{
"output_type": "stream",
Expand Down Expand Up @@ -193,7 +194,7 @@
"!pip install -r requirements.txt\n",
"!pip install tensorboardX"
],
"execution_count": 4,
"execution_count": null,
"outputs": [
{
"output_type": "stream",
Expand Down Expand Up @@ -279,7 +280,7 @@
"!gdown -O examples/ckpt/ckpt_116_i2i_comb.pth https://drive.google.com/uc?id=1i2LJXKp-yWKIEEgJ7C6cE3_2NirfY_0a\n",
"!gdown -O examples/dump/emb.pickle https://drive.google.com/uc?id=18-0CYl5E6ungS3H4rRSHjfYvvm-WwjTI"
],
"execution_count": 5,
"execution_count": null,
"outputs": [
{
"output_type": "stream",
Expand Down Expand Up @@ -348,14 +349,14 @@
"import torch\n",
"import pickle\n",
"import face_alignment\n",
"from thirdparty.autovc.AutoVC_mel_Convertor_retrain_version import AutoVC_mel_Convertor\n",
"from src.autovc.AutoVC_mel_Convertor_retrain_version import AutoVC_mel_Convertor\n",
"import shutil\n",
"import time\n",
"import util.utils as util\n",
"from scipy.signal import savgol_filter\n",
"from src.approaches.train_audio2landmark import Audio2landmark_model"
],
"execution_count": 6,
"execution_count": null,
"outputs": []
},
{
Expand All @@ -380,7 +381,7 @@
"AMP_LIP_SHAPE_Y = 2. # amplify the lip motion in vertical direction\n",
"AMP_HEAD_POSE_MOTION = 0.7 # amplify the head pose motion (usually smaller than 1.0, put it to 0. for a static head pose)"
],
"execution_count": 63,
"execution_count": null,
"outputs": []
},
{
Expand Down Expand Up @@ -435,7 +436,7 @@
"\n",
"opt_parser = parser.parse_args()"
],
"execution_count": 64,
"execution_count": null,
"outputs": []
},
{
Expand Down Expand Up @@ -464,7 +465,7 @@
"if(opt_parser.close_input_face_mouth):\n",
" util.close_input_face_mouth(shape_3d)"
],
"execution_count": 65,
"execution_count": null,
"outputs": []
},
{
Expand All @@ -491,7 +492,7 @@
"shape_3d[[37,38,43,44], 1] -=2. # larger eyes\n",
"shape_3d[[40,41,46,47], 1] +=2. # larger eyes"
],
"execution_count": 66,
"execution_count": null,
"outputs": []
},
{
Expand All @@ -511,7 +512,7 @@
"source": [
"shape_3d, scale, shift = util.norm_input_face(shape_3d)"
],
"execution_count": 67,
"execution_count": null,
"outputs": []
},
{
Expand Down Expand Up @@ -584,7 +585,7 @@
" gaze = {'rot_trans':rot_tran, 'rot_quat':rot_quat, 'anchor_t_shape':anchor_t_shape}\n",
" pickle.dump(gaze, fp)"
],
"execution_count": 68,
"execution_count": null,
"outputs": [
{
"output_type": "stream",
Expand Down Expand Up @@ -626,7 +627,7 @@
"else:\n",
" model.test(au_emb=None)"
],
"execution_count": 69,
"execution_count": null,
"outputs": [
{
"output_type": "stream",
Expand Down Expand Up @@ -770,7 +771,7 @@
" print('finish image2image gen')\n",
" os.remove(os.path.join('examples', fls[i]))"
],
"execution_count": 70,
"execution_count": null,
"outputs": [
{
"output_type": "stream",
Expand Down Expand Up @@ -822,7 +823,7 @@
" </video>\n",
" \"\"\" % data_url))"
],
"execution_count": 71,
"execution_count": null,
"outputs": [
{
"output_type": "stream",
Expand Down Expand Up @@ -859,7 +860,7 @@
"source": [
""
],
"execution_count": 71,
"execution_count": null,
"outputs": []
}
]
Expand Down

0 comments on commit 5d367e9

Please sign in to comment.