<a href="https://colab.research.google.com/github/hazael00/AndroidStudio/blob/master/Copia_de_RealTimeVoiceCloning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Real-Time Voice Cloning

This is a colab demo notebook using the open source project [CorentinJ/Real-Time-Voice-Cloning](https://github.com/CorentinJ/Real-Time-Voice-Cloning)
to clone a voice.

For other deep-learning Colab notebooks, visit [tugstugi/dl-colab-notebooks](https://github.com/tugstugi/dl-colab-notebooks).


Original issue: https://github.com/tugstugi/dl-colab-notebooks/issues/18

## Setup CorentinJ/Real-Time-Voice-Cloning

Creating a new virtual environment and installing the required packages in that environment. 

Create a new virtual environment:

In [7]:
!pip install numpy
import numpy as np

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [11]:
!pip install --upgrade xarray pandas astropy arviz sqlalchemy pydantic jaxlib jax chex bokeh librosa
!pip install numpy==1.21
!pip install typing-extensions==4.2.0

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting numpy>=1.21
  Using cached numpy-1.24.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.3 MB)
Collecting typing-extensions>=4.1.0
  Using cached typing_extensions-4.5.0-py3-none-any.whl (27 kB)
Collecting numpy>=1.21
  Using cached numpy-1.23.5-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (17.1 MB)
Installing collected packages: typing-extensions, numpy
  Attempting uninstall: typing-extensions
    Found existing installation: typing-extensions 3.7.4.3
    Uninstalling typing-extensions-3.7.4.3:
      Successfully uninstalled typing-extensions-3.7.4.3
  Attempting uninstall: numpy
    Found existing installation: numpy 1.19.5
    Uninstalling numpy-1.19.5:
      Successfully uninstalled numpy-1.19.5
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the followin

In [12]:
# !pip install -q tensorflow==2.6.0 tensorflow-io

import os
from os.path import exists, join, basename, splitext
import sys
from pathlib import Path
import gdown
import librosa

# Clone the project
git_repo_url = 'https://github.com/CorentinJ/Real-Time-Voice-Cloning.git'
project_name = splitext(basename(git_repo_url))[0]
if not exists(project_name):
  !git clone -q --recursive {git_repo_url}
  # Install dependencies
  !cd {project_name} && pip install -q -r requirements.txt
  !pip install -q --upgrade gdown
  !apt-get install -qq libportaudio2
  !pip install -q https://github.com/tugstugi/dl-colab-notebooks/archive/colab_utils.zip

# Download pretrained models
encoder_path = Path(project_name) / Path("saved_models/default/encoder.pt")
if not encoder_path.exists():
  gdown.download('https://drive.google.com/uc?id=1q8mEGwCkFy23KZsinbuvdKAQLqNKbYf1', str(encoder_path), quiet=False)

synthesizer_path = Path(project_name) / Path("saved_models/default/synthesizer.pt")
if not synthesizer_path.exists():
  gdown.download('https://drive.google.com/uc?id=1EqFMIbvxffxtjiVrtykroF6_mUh-5Z3s', str(synthesizer_path), quiet=False)

vocoder_path = Path(project_name) / Path("saved_models/default/vocoder.pt")
if not vocoder_path.exists():
  gdown.download('https://drive.google.com/uc?id=1cf2NO6FtI0jDuy8AV3Xgn6leO6dHjIgu', str(vocoder_path), quiet=False)

# Initialize the voice cloning models
sys.path.append(project_name)

from synthesizer.inference import Synthesizer
from encoder import inference as encoder
from vocoder import inference as vocoder

encoder.load_model(str(encoder_path))
synthesizer = Synthesizer(str(synthesizer_path))
vocoder.load_model(str(vocoder_path))

FileNotFoundError: ignored

# Sección nueva

In [13]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
#@title Record or Upload
#@markdown * Either record audio from microphone or upload audio from file (.mp3 or .wav) 

SAMPLE_RATE = 22050
record_or_upload = "Record" #@param ["Record", "Upload (.mp3 or .wav)"]
record_seconds =   10#@param {type:"number", min:1, max:10, step:1}

embedding = None
def _compute_embedding(audio):
  display(Audio(audio, rate=SAMPLE_RATE, autoplay=True))
  global embedding
  embedding = None
  embedding = encoder.embed_utterance(encoder.preprocess_wav(audio, SAMPLE_RATE))
def _record_audio(b):
  clear_output()
  audio = record_audio(record_seconds, sample_rate=SAMPLE_RATE)
  _compute_embedding(audio)
def _upload_audio(b):
  clear_output()
  audio = upload_audio(sample_rate=SAMPLE_RATE)
  _compute_embedding(audio)

if record_or_upload == "Record":
  button = widgets.Button(description="Record Your Voice")
  button.on_click(_record_audio)
  display(button)
else:
  #button = widgets.Button(description="Upload Voice File")
  #button.on_click(_upload_audio)
  _upload_audio("")

In [None]:
#@title Synthesize a text { run: "auto" }
text = "One of the two people who tested positive for the novel coronavirus in the United Kingdom is a student at the University of York in northern England." #@param {type:"string"}
  
def synthesize(embed, text):
  print("Synthesizing new audio...")
  #with io.capture_output() as captured:
  specs = synthesizer.synthesize_spectrograms([text], [embed])
  generated_wav = vocoder.infer_waveform(specs[0])
  generated_wav = np.pad(generated_wav, (0, synthesizer.sample_rate), mode="constant")
  clear_output()
  display(Audio(generated_wav, rate=synthesizer.sample_rate, autoplay=True))

if embedding is None:
  print("first record a voice or upload a voice file!")
else:
  synthesize(embedding, text)