<a href="https://colab.research.google.com/github/georgedouzas/avatar-poc/blob/main/environment.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Setup & Installation

In [1]:
# Setup environment
!sudo apt-get update
!apt install software-properties-common
!sudo apt-get install python3.8 python3.8-distutils
!curl -sS https://bootstrap.pypa.io/pip/3.8/get-pip.py -o get-pip.py
!sudo python3.8 get-pip.py
!python3.8 -m pip install -U setuptools wheel
!sudo apt-get install python3.9 python3.9-distutils
!curl -sS https://bootstrap.pypa.io/get-pip.py -o get-pip.py
!sudo python3.9 get-pip.py
!python3.9 -m pip install -U setuptools wheel

# Install SadTalker
!git clone https://github.com/cedro3/SadTalker.git &> /dev/null
%cd SadTalker
!export PYTHONPATH=/content/SadTalker:$PYTHONPATH
!python3.8 -m pip install torch==1.12.1+cu113 torchvision==0.13.1+cu113 torchaudio==0.12.1 --extra-index-url https://download.pytorch.org/whl/cu113
!apt update
!apt install ffmpeg &> /dev/null
!python3.8 -m pip install -r requirements.txt
!rm -rf checkpoints
!bash scripts/download_models.sh

# Install piper
!python3.9 -m pip install piper-tts
!python3.9 -m piper.download_voices en_US-lessac-medium
!python3.9 -m piper.download_voices el_GR-rapunzelina-low

Get:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  InRelease [1,581 B]
Get:2 http://security.ubuntu.com/ubuntu jammy-security InRelease [129 kB]
Hit:3 http://archive.ubuntu.com/ubuntu jammy InRelease
Get:4 http://archive.ubuntu.com/ubuntu jammy-updates InRelease [128 kB]
Get:5 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease [3,632 B]
Get:6 https://r2u.stat.illinois.edu/ubuntu jammy InRelease [6,555 B]
Get:7 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64  Packages [1,806 kB]
Get:8 http://archive.ubuntu.com/ubuntu jammy-backports InRelease [127 kB]
Hit:9 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease
Hit:10 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease
Hit:11 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease
Get:12 http://security.ubuntu.com/ubuntu jammy-security/main amd64 Packages [3,103 kB]
Get:13 https://r2u.stat.illinois.edu/ubunt

# Gradio App

In [12]:
import gradio as gr
import subprocess
import os
import datetime
import shutil

# Language to voice model mapping
MODELS_MAPPING = {
    'English': 'en_US-lessac-medium',
    'Greek': 'el_GR-rapunzelina-low'
}

def generate_video(language, text, image):

    language_model = MODELS_MAPPING.get(language, 'en_US-lessac-medium')

    # Set paths
    driven_audio_path = 'examples/driven_audio/input_audio.wav'
    source_image_path = 'examples/source_image/input_image.png'

    # Save uploaded image to source path
    shutil.copy(image, source_image_path)

    # Run TTS to generate audio
    subprocess.run([
        'python3.9', '-m', 'piper',
        '-m', language_model,
        '-f', driven_audio_path,
        '--', text
    ], stdout=subprocess.PIPE, stderr=subprocess.PIPE)

    # Run inference
    subprocess.run([
        'python3.8', 'inference.py',
        '--driven_audio', driven_audio_path,
        '--source_image', source_image_path,
        '--result_dir', './results'
    ], stdout=subprocess.PIPE, stderr=subprocess.PIPE)

    # Construct expected output filename (latest .mp4 in results/)
    result_subdirs = sorted(os.listdir('./results'), reverse=True)
    for subdir in result_subdirs:
        path = os.path.join('./results', subdir, 'input_image##output.mp4')
        if os.path.exists(path):
            return path

    return "Error: No output video generated."

# Gradio UI
with gr.Blocks() as demo:
    gr.Markdown("# Avatar PoC")

    with gr.Row():
        language_input = gr.Dropdown(choices=list(MODELS_MAPPING.keys()), label="Select Language", value="English")
        text_input = gr.Textbox(lines=4, label="Enter Text")
        image_input = gr.Image(label="Upload Image", type="filepath")

    generate_button = gr.Button("Generate Video")

    video_output = gr.Video(label="Generated Video")

    generate_button.click(
        fn=generate_video,
        inputs=[language_input, text_input, image_input],
        outputs=video_output
    )

demo.launch(share=True)

Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://84a1007c7fbb4fe43f.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


Keyboard interruption in main thread... closing server.


KeyboardInterrupt: 