**This was trained using WSL Ubuntu 22.04.3 LTS**

In [None]:
%git clone https://github.com/myshell-ai/MeloTTS.git
%pip install -e MeloTTS/
%pip install beautifulsoup4
import sys
sys.path.append("MeloTTS")

In [1]:
## GLaDOS ##
blocklist = ["potato", "_ding_", "00_part1_entry-6", "_escape_"]
sources = ["https://theportalwiki.com/wiki/GLaDOS_voice_lines_(Portal)", "https://theportalwiki.com/wiki/GLaDOS_voice_lines_(Portal_2)", "https://theportalwiki.com/wiki/GLaDOS_voice_lines_(Other)"]
base_dir="glados_out"

## Wheatley ##
# blocklist = ["jailbreakdooropens08", "jailbreakdooropens05", "jailbreakdooropens07", "jailbreakdooropens01", "sp_a1_intro7_pickupnags07", "sp_a1_wakeup_panic01", "sp_a1_wakeup_hacking09", "sp_a1_wakeup_hacking12", "sp_a1_wakeup_hacking10", "sp_a1_wakeup_hacking12", "sp_trust_flingalt08", "nanobotow03", "sp_a2_wheatley_ows", "bw_a4_2nd_first_test_solve_nags", "bw_sp_a2_core_actually05", "bw_sp_a2_core_actually01", "bw_sp_a2_core_potato04", "bw_sp_a4_tb_trust_drop_solve05", "bw_sp_a4_tb_trust_drop_impatient02", "bw_sp_a4_tb_wall_button_solve09", "bw_sp_a4_tb_polarity_solve04", "bw_a4_finale04_whitegel_break01", "bw_finale04_portal_opens_short05", "bw_finale04_portal_opens_short06", "bw_finale04_portal_opens_short07", "bw_finale04_portal_opens_short08", "bw_a4_finale04_wakeupa09", "bw_finale04_portal_opens17", "bw_finale04_portal_opens15", "bw_finale04_portal_opens13", "bw_finale04_portal_opens03", "openingwallhitone01", "demospherepowerup02"]
# sources = ["https://theportalwiki.com/wiki/wheatley_voice_lines"]
# base_dir="wheatley_out"

In [None]:
import requests
from multiprocessing import cpu_count
from multiprocessing.pool import ThreadPool
import shutil
import os
from bs4 import BeautifulSoup
import soundfile as sf
import string
import json
import re
import num2words
from tqdm.notebook import tqdm

class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKCYAN = '\033[96m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'

audio_dir = 'audio'
download_threads = 64

# temp_path = "temp_audio"
# sampling_rate = 22050
   
def prep(args, overwrite=True):
    already_exists = os.path.exists(audio_dir)
    
    if already_exists and not overwrite:
        print("Data already downloaded")
        return
    
    if already_exists:
        print("Deleting previously downloaded audio")
        shutil.rmtree(audio_dir)
        
        if os.path.exists(temp_path):
            shutil.rmtree(temp_path)
    
    os.mkdir(audio_dir)
    download_parallel(args)

def remove_punctuation(str):
    return str.translate(str.maketrans('', '', string.punctuation))
    
def audio_duration(fn):
    f = sf.SoundFile(fn)
    return f.frames / f.samplerate

def download_file(args):
    url, filename = args[0], args[1]

    try:
        response = requests.get(url, allow_redirects=False)

        open(os.path.join(audio_dir, filename), "wb").write(response.content)
        return filename, True
    except:
        return filename, False

def download_parallel(args):
    results = ThreadPool(download_threads).imap_unordered(download_file, args)
    for result in results:
        if result[1]:
            print(bcolors.OKGREEN + "[" + u'\u2713' + "] " + bcolors.ENDC + result[0])
        else:
            print(bcolors.FAIL + "[" + u'\u2715' + "] " + bcolors.ENDC + result[0])

def main():
    urls = []
    filenames = []
    texts = []

    for s in sources:
        r = requests.get(s, allow_redirects=False)
    
        soup = BeautifulSoup(r.text.encode('utf-8').decode('ascii', 'ignore'), 'html.parser')
        for link_item in soup.find_all('a'):
            url = link_item.get("href", None)
            if url:
                if "https:" in url and ".wav" in url:
                    list_item = link_item.find_parent("li")
                    ital_item = list_item.find_all('i')
                    if ital_item:
                        text = ital_item[0].text
                        text = text.replace('"', '')
                        filename = url[url.rindex("/")+1:]
    
                        if "[" not in text and "]" not in text and "$" not in text:
                            if url not in urls:
                                for s in blocklist:
                                    if s in url:
                                        break
                                else:
                                    urls.append(url)
                                    filenames.append(filename)
                                    text = text.replace('*', '')
                                    texts.append(text)

    print("Found " + str(len(urls)) + " urls")

    args = zip(urls, filenames)

    prep(args)
    
    
    total_audio_time = 0
    outFile=open(os.path.join(audio_dir, "manifest.json"), 'w')
    print("Writing manifest file")
    for i in range(len(urls)):
        item = {}
        text = texts[i]
        filename = filenames[i]
        item["audio_filepath"] = os.path.join(audio_dir, filename)
        #item["text_normalized"] = re.sub(r"(\d+)", lambda x: num2words.num2words(int(x.group(0))), text)
        item["text"] = re.sub(r"(\d+)", lambda x: num2words.num2words(int(x.group(0))), text).lower()
        try:
            duration = audio_duration(os.path.join(audio_dir, filename))
        except Exception as e:
            print("Error getting duration for " + filename)
            continue
        item["duration"] = duration
        total_audio_time = total_audio_time + item["duration"]
        outFile.write(json.dumps(item, ensure_ascii=True, sort_keys=True) + "\n")
 
    outFile.close()
    print("\n" + str(total_audio_time/60.0) + " min\n")
main()

In [None]:
import nltk
nltk.download('averaged_perceptron_tagger_eng')

In [None]:
speaker_name = "gladosV2"
language_code = "EN"

manifest_file = "normalized_audio/manifest.json"

file_data = []
with open(manifest_file, 'r') as file:
    for line in file:
        file_data.append(json.loads(line))

print("Writing metadata.list...")
os.makedirs(base_dir, exist_ok=True)
with open(os.path.join(base_dir, "metadata.list"), 'w') as file:
    for data in file_data:
        file.write(f"{data['audio_filepath']}|{speaker_name}|{language_code}|{data['text']}\n")

In [None]:
import shutil

shutil.copy("MeloTTS/melo/configs/config.json", f"{base_dir}/config.json")

In [None]:
!python MeloTTS/melo/preprocess_text.py --metadata {base_dir}/metadata.list --config_path {base_dir}/config.json

In [None]:
!bash MeloTTS/melo/train.sh {base_dir}/config.json 1

In [None]:
!python MeloTTS/melo/infer.py --text "This is version 2. Hello and, again, welcome to the Aperture Science computer-aided enrichment center. You will be testing with a partner. Please wave to your partner. And say 'hello partner'" -m "logs/glados_out/G_13000.pth" -o "glados_out"

In [None]:
import pygame
pygame.mixer.init()

def play_audio(file):
    pygame.mixer.music.load(file)
    pygame.mixer.music.play()
    while pygame.mixer.music.get_busy():
        pygame.time.Clock().tick(10) # check every 10ms

In [None]:
from melo.api import TTS


model = TTS(language="EN", config_path="glados_out/config.json", ckpt_path="logs/glados_out/G_13000.pth")

def tts_file(text: str, path: str):
    model.tts_to_file(text, 0, path)
    
def tts(text: str):
    temp_path = "temp.wav"
    tts_file(text, temp_path)
    play_audio(temp_path)
    


In [None]:
# tts("Hello, and, again, welcome to the Aperture Science computer-aided enrichment center. You will be testing with a partner. Please wave to your partner. And say 'hello partner'")

# something that an assistant would say
# tts("Good morning sir, I'm GLaDOS, your personal assistant. How can I help you today?")

tts("Oh, it's you. It's been a long time. How have you been? I've been really busy being dead. You know")