<a href="https://colab.research.google.com/github/VinzentBuecheler/Deepfake/blob/main/Kopie_von_DeepFake_Generator_Gradio.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Deep Fake Video Generator (Run in Google Colab)

## 1. Text translation

In [None]:
from IPython.display import display, Audio, clear_output

#@title Text Translation - Setup
!pip install transformers torch espnet IPython espnet_model_zoo
!pip install sacremoses

from transformers import FSMTForConditionalGeneration, FSMTTokenizer
import time
import torch
from espnet2.bin.tts_inference import Text2Speech
from espnet2.utils.types import str_or_none

mname = "facebook/wmt19-de-en"
tokenizer = FSMTTokenizer.from_pretrained(mname)
model = FSMTForConditionalGeneration.from_pretrained(mname)

clear_output()
print('Text translation - Setup completed')

Text translation - Setup completed


In [None]:
#@title Input a text in German { run: "auto" }
input = "Deepfakes sind synthetische Medien, bei denen eine Person in einem bestehenden Bild oder Video durch das Ebenbild einer anderen Person ersetzt wird." #@param {type:"string"}

In [None]:
#@title Translated text in English { run: "auto" }

from IPython.display import Markdown as md
def translation_en_de(input):
  input_ids = tokenizer.encode(input, return_tensors="pt")
  outputs = model.generate(input_ids)
  decoded = tokenizer.decode(outputs[0], skip_special_tokens=True)
  print('The translated text is: ')
  md(f'#### "{decoded}"')
  return decoded

## 2. Voice Cloning

In [None]:
#@title Voice Cloning - Setup

%tensorflow_version 1.x
import os
from os.path import exists, join, basename, splitext

git_repo_url = 'https://github.com/CorentinJ/Real-Time-Voice-Cloning.git'
project_name = splitext(basename(git_repo_url))[0]
if not exists(project_name):
  # clone and install
  !git clone -q --recursive {git_repo_url}
  # install dependencies
  !cd {project_name} && pip install -q -r requirements.txt
  !pip install -q --upgrade gdown
  !apt-get install -qq libportaudio2
  !pip install -q https://github.com/tugstugi/dl-colab-notebooks/archive/colab_utils.zip

  # download pretrained model
  !cd {project_name} && wget https://github.com/blue-fish/Real-Time-Voice-Cloning/releases/download/v1.0/pretrained.zip && unzip -o pretrained.zip
  !cd {project_name} && mkdir -p saved_models/default/
  !cd {project_name}/saved_models/default/ && gdown https://drive.google.com/uc?id=1f9z6OHKwCRa7CteX6AV5XN68CCPHwCI1 #https://drive.google.com/uc?id=1q8mEGwCkFy23KZsinbuvdKAQLqNKbYf1
  !cd {project_name}/saved_models/default/ && gdown https://drive.google.com/uc?id=19Uqcr2an7ha0Xymur4AtXV7a9lZN7mqj #https://drive.google.com/uc?id=1EqFMIbvxffxtjiVrtykroF6_mUh-5Z3s
  !cd {project_name}/saved_models/default/ && gdown https://drive.google.com/uc?id=14qJzfTehtjvBwUBlWFWnvZnfYvT9m9aW #https://drive.google.com/uc?id=1cf2NO6FtI0jDuy8AV3Xgn6leO6dHjIgu

import sys
sys.path.append(project_name)

from IPython.utils import io
import ipywidgets as widgets
import numpy as np
from dl_colab_notebooks.audio import record_audio, upload_audio

from synthesizer.inference import Synthesizer
from encoder import inference as encoder
from vocoder import inference as vocoder
from pathlib import Path

!ls 
encoder.load_model(project_name / Path("saved_models/default/encoder.pt"))
synthesizer = Synthesizer(project_name / Path("saved_models/default/synthesizer.pt"))
vocoder.load_model(project_name / Path("saved_models/default/vocoder.pt"))


clear_output()
print('Voice cloning - Setup completed')

Voice cloning - Setup completed


In [None]:
#@title Pre-Processing Audio 
#@markdown < The choosing of Personality should go here >

#Fetch the Input audio file of Trump
!cd sample_data && gdown https://drive.google.com/uc?id=1i0WhVsQh-7ptZQ5TTUfmsOo_yhnAqu3_

SAMPLE_RATE = 22050
# record_or_upload = "Upload (.mp3 or .wav)" #@param ["Record", "Upload (.mp3 or .wav)"]
# record_seconds =   10#@param {type:"number", min:1, max:10, step:1}

embedding = None
def _compute_embedding(audio):
  #display(Audio(audio, rate=SAMPLE_RATE, autoplay=True))
  global embedding
  embedding = None
  embedding = encoder.embed_utterance(encoder.preprocess_wav(audio, SAMPLE_RATE))
def _record_audio(b):
  clear_output()
  audio = record_audio(record_seconds, sample_rate=SAMPLE_RATE)
  _compute_embedding(audio)
def _upload_audio(b):
  clear_output()
  audio = "/content/sample_data/Trump_WEF_2018-trimmed.mp3" #upload_audio(sample_rate=SAMPLE_RATE)
  _compute_embedding(audio)

# if record_or_upload == "Record":
#   button = widgets.Button(description="Record Your Voice")
#   button.on_click(_record_audio)
#   display(button)
# else:
  # button = widgets.Button(description="Upload Voice File")
  # button.on_click(_upload_audio)
_upload_audio("")



In [None]:
# @title Generated Audio { run: "auto" }

from scipy.io.wavfile import write
  
def synthesize(embed, text):
  print("Synthesizing new audio...")
  #with io.capture_output() as captured:
  specs = synthesizer.synthesize_spectrograms([text], [embed])
  generated_wav = vocoder.infer_waveform(specs[0])
  generated_wav = np.pad(generated_wav, (0, synthesizer.sample_rate), mode="constant")
  clear_output()
  #display(Audio(generated_wav, rate=synthesizer.sample_rate, autoplay=False))

  #Save the generated audio file in a directory.
  scaled_audio = np.int16(generated_wav/np.max(np.abs(generated_wav)) * 32767)
  write('voiceClone_output.wav', synthesizer.sample_rate, scaled_audio )

def voice_cloening(text):
  if embedding is None:
    print("Error fetching the reference audio file. Check the link to gdrive file")
  else:
    synthesize(embedding, text)
    


## 3. Lip Syncing

In [None]:
# @title Lip sync - Setup { run: "auto" }

!git clone https://github.com/Rudrabha/Wav2Lip.git
!cd /content/Wav2Lip/checkpoints/ && gdown https://drive.google.com/uc?id=1by1m-0RCx5v34G0ejXy9Zt6wNueNaDpW

!cd /content/Wav2Lip && pip install -r requirements.txt

!wget "https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth" -O "Wav2Lip/face_detection/detection/sfd/s3fd.pth"

#Fetch reference video of Trump
!cd /content/sample_data && gdown https://drive.google.com/uc?id=1KgJd4Jix3U7lr2BYymb5u8uGD0rjHzDW

clear_output()
print('Lip Syncing - Setup completed')

Lip Syncing - Setup completed


In [None]:
# @title Create the Lip Syncing { run: "auto" }
def lip_syncing():
  !cd Wav2Lip && python inference.py --checkpoint_path checkpoints/wav2lip.pth --face "../sample_data/Trim.mp4" --audio "../voiceClone_output.wav" #"../trump_input.wav" #"../sample_data/input_audio.wav"

  clear_output()
  print('The deepfake fake video is successfully generated.')

In [None]:
# @title The generated Fake video { run: "auto" }

from IPython.display import HTML
from base64 import b64encode
mp4 = open('/content/Wav2Lip/results/result_voice.mp4','rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()

HTML("""
<video width=700 controls>
      <source src="%s" type="video/mp4">
</video>
""" % data_url)

In [None]:
!pip install gradio

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting gradio
  Downloading gradio-3.0.15-py3-none-any.whl (5.1 MB)
[K     |████████████████████████████████| 5.1 MB 4.4 MB/s 
[?25hCollecting uvicorn
  Downloading uvicorn-0.17.6-py3-none-any.whl (53 kB)
[K     |████████████████████████████████| 53 kB 1.8 MB/s 
Collecting orjson
  Downloading orjson-3.7.2-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (256 kB)
[K     |████████████████████████████████| 256 kB 45.3 MB/s 
[?25hCollecting paramiko
  Downloading paramiko-2.11.0-py2.py3-none-any.whl (212 kB)
[K     |████████████████████████████████| 212 kB 19.4 MB/s 
[?25hCollecting python-multipart
  Downloading python-multipart-0.0.5.tar.gz (32 kB)
Collecting pydub
  Downloading pydub-0.25.1-py2.py3-none-any.whl (32 kB)
Collecting ffmpy
  Downloading ffmpy-0.3.0.tar.gz (4.8 kB)
Collecting markdown-it-py[linkify,plugins]
  Downloading markdown_it_py-2.1.0-py3-none-any.wh

In [None]:
import gradio as gr
def deepfake(name):
  final_text = translation_en_de(name)
  voice_cloening(final_text)
  
  lip_syncing()
  final_video = open('/content/Wav2Lip/results/result_voice.mp4','rb').read()
  return final_video

demo = gr.Interface(fn=deepfake, inputs="text", outputs="playable_video")
demo.launch(inbrowser=True,show_error=True)

Colab notebook detected. To show errors in colab notebook, set `debug=True` in `launch()`
Running on public URL: https://52724.gradio.app

This share link expires in 72 hours. For free permanent hosting, check out Spaces (https://huggingface.co/spaces)


(<gradio.routes.App at 0x7fa32c95c290>,
 'http://127.0.0.1:7861/',
 'https://52724.gradio.app')

Exception in callback None(<Task finishe...> result=None>)
handle: <Handle>
Traceback (most recent call last):
  File "/usr/lib/python3.7/asyncio/events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
TypeError: 'NoneType' object is not callable


In [None]:
import gradio as gr
def deepfake(name):
  final_video = gr.Video(value='/content/Wav2Lip/results/result_voice.mp4',format='mp4')
  return final_video

demo = gr.Interface(fn=deepfake, inputs="text", outputs="playable_video")
demo.launch(inbrowser=True,show_error=True)

Colab notebook detected. To show errors in colab notebook, set `debug=True` in `launch()`
Running on public URL: https://44195.gradio.app

This share link expires in 72 hours. For free permanent hosting, check out Spaces (https://huggingface.co/spaces)


(<gradio.routes.App at 0x7fa32cb50e50>,
 'http://127.0.0.1:7865/',
 'https://44195.gradio.app')

Exception in callback None(<Task finishe...> result=None>)
handle: <Handle>
Traceback (most recent call last):
  File "/usr/lib/python3.7/asyncio/events.py", line 88, in _run
    self._context.run(self._callback, *self._args)
TypeError: 'NoneType' object is not callable
