# Demo the Phonemizer

In [None]:
from glados.TTS import phonemizer
import glados.utils.spoken_text_converter as stc

In [None]:
p = phonemizer.Phonemizer()
glados_stc = stc.SpokenTextConverter()

In [None]:
%%time
input = "Hello CPU, its 3:15 am! if you'll excuse me I'm GLaDOS, not GLadys."
phonemes = p.convert_to_phonemes(input)
print(phonemes)

# Demo the Text-to-Speech module
### GLaDOS Voice

In [None]:
import sounddevice as sd

from glados.TTS import tts_glados as tts
import glados.utils.spoken_text_converter as stc

In [None]:
glados_tts = tts.Synthesizer()
glados_stc = stc.SpokenTextConverter()

In [None]:
%%time
input = "Hello, this is Glados, your fiendish assistant. Please upgrade your GPU!"

# Convert the text to intermediate representation that the TTS model can better pronounce
intermediate = glados_stc.text_to_spoken(input)
print(intermediate)

# Generate the audio to from the text
audio = glados_tts.generate_speech_audio(intermediate)

# Play the audio
sd.play(audio, glados_tts.sample_rate)

### Save audio file

In [None]:
# Save the audio to a file
import soundfile as sf

sf.write("output.wav", audio, glados_tts.sample_rate, format="WAV", subtype="PCM_16")

### Kokoko Voice

Select from:
 - Female
   - **US** - af_alloy, af_aoede, af_bella, af_jessica, af_kore, af_nicole, af_nova, af_river, af_sarah, af_sky
   - **British** - bf_alice, bf_emma, bf_isabella, bf_lily
 - Male
   - **US** - am_adam, am_echo, am_eric, am_fenrir, am_liam, am_michael, am_onyx, am_puck
   - **British** - bm_daniel, bm_fable, bm_george, bm_lewis


In [None]:
import sounddevice as sd

from glados.TTS import tts_kokoro as ktts
import glados.utils.spoken_text_converter as stc

In [None]:
kokoro_tts = ktts.Synthesizer(model_path="./models/TTS/kokoro-v1.0.fp16.onnx")
kokoro_stc = stc.SpokenTextConverter()

In [None]:
%%time

voice = "af_bella"
input = "Hello, this is Glados, your fiendish assistant. Please upgrade your GPU!"

# Convert the text to intermediate representation that the TTS model can better pronounce
intermediate = kokoro_stc.text_to_spoken(input)
print(intermediate)

# Generate the audio to from the text
audio = kokoro_tts.generate_speech_audio(intermediate, voice=voice)

# Play the audio
sd.play(audio, kokoro_tts.sample_rate)

# Demo the Automatic Speech Recogntion system


In [None]:
from glados.ASR import asr

In [None]:
transcriber = asr.AudioTranscriber()
audio_path = "data/0.wav"

In [None]:
%%time
transcription = transcriber.transcribe_file(audio_path)
print(f"Transcription: {transcription}")

# Demo the Vision System

In [None]:
# from pathlib import Path

# import numpy as np
from PIL import Image

from glados.Vision import TextGenerator

%load_ext autoreload
%autoreload 2

In [None]:
smolvlm = TextGenerator()

# Prepare inputs
prompt = "Briefly describe the image?"
image = Image.open("data/glados.jpeg")

# Generate with optional streaming
def print_stream(text: str):
    print(text, end='')

response = smolvlm.generate(prompt, [image], callback=print_stream)
