# CMPE490 - Text to 3D animation using available technology and services

## Text to Viseme 

### Amazon Polly (Requires AWS to be already configured.)

In [None]:
# Install boto3, a Python library for AWS, if it is not already installed
!pip install boto3


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.0[0m[39;49m -> [0m[32;49m25.0.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


Small test to check if Polly works, with mp3 output

In [None]:
import boto3
import os

# Initialize Polly client
polly = boto3.client('polly')

# Text to synthesize
text = "Hello, this is a test of Amazon Polly's speech output by Batuhan Yildiz.!"

# Request speech synthesis
response = polly.synthesize_speech(
    Text=text,
    OutputFormat='mp3',
    VoiceId='Joanna'  # Change to other voices like 'Matthew', 'Brian', etc.
)

# Save the audio file
audio_file = os.path.join("src", "speech.mp3")
with open(audio_file, "wb") as file:
    file.write(response['AudioStream'].read())

print(f"Speech saved as {audio_file}")

Speech saved as speech.mp3


In [None]:
from IPython.display import Audio

# Path to the MP3 file
audio_file = os.path.join("src", "speech.mp3")

# Display the audio player
Audio(audio_file)


Check Polly with json output

In [None]:
import json

# Initialize Polly client
polly = boto3.client('polly')

# The text to synthesize
text = "Hello, this is a test for Amazon Polly's viseme output."

# Request viseme speech marks from Polly
response = polly.synthesize_speech(
    Text=text,
    OutputFormat='json',         # Output as JSON when requesting speech marks
    VoiceId='Joanna',            # Choose the desired voice
    SpeechMarkTypes=['viseme']   # Request only viseme marks; you can also include 'word', 'sentence', etc.
)

# Save the viseme marks to a file
speech_marks_file = os.path.join("src", "viseme_marks.json")
with open(speech_marks_file, "wb") as file:
    file.write(response['AudioStream'].read())

print(f"Viseme speech marks saved as {speech_marks_file}")

# (Optional) Read and print each JSON object from the file
print("\nParsed viseme marks:")
with open(speech_marks_file, "r") as file:
    for line in file:
        try:
            mark = json.loads(line)
            print(mark)
        except json.JSONDecodeError as e:
            print("Error parsing JSON:", e)


Viseme speech marks saved as viseme_marks.json

Parsed viseme marks:
{'time': 6, 'type': 'viseme', 'value': 'k'}
{'time': 49, 'type': 'viseme', 'value': '@'}
{'time': 78, 'type': 'viseme', 'value': 't'}
{'time': 178, 'type': 'viseme', 'value': 'o'}
{'time': 513, 'type': 'viseme', 'value': 'sil'}
{'time': 700, 'type': 'viseme', 'value': 'T'}
{'time': 759, 'type': 'viseme', 'value': 'i'}
{'time': 811, 'type': 'viseme', 'value': 's'}
{'time': 898, 'type': 'viseme', 'value': 'i'}
{'time': 940, 'type': 'viseme', 'value': 's'}
{'time': 1017, 'type': 'viseme', 'value': '@'}
{'time': 1058, 'type': 'viseme', 'value': 't'}
{'time': 1189, 'type': 'viseme', 'value': 'E'}
{'time': 1280, 'type': 'viseme', 'value': 's'}
{'time': 1353, 'type': 'viseme', 'value': 't'}
{'time': 1382, 'type': 'viseme', 'value': 'f'}
{'time': 1421, 'type': 'viseme', 'value': 'O'}
{'time': 1478, 'type': 'viseme', 'value': 'r'}
{'time': 1555, 'type': 'viseme', 'value': 'a'}
{'time': 1686, 'type': 'viseme', 'value': 'p'}
{'t

### Azure TTS

### General Method to get visemes

In [None]:
import boto3

def get_viseme(text, service_name="polly", file_name = "untitled"):
    """
    Our general method to get visemes from a text:

    Input: 
    text: the text to get visemes from
    servive_name (optional): the name of the service to use (default: polly)
    file_name (optional): the name of the file to save the viseme marks

    Output:
    speech_marks_file: the name of the file saved in the current directory
    """
    if service_name == "polly":
        # Initialize Polly client
        polly = boto3.client('polly')

        # Request viseme speech marks from Polly
        response = polly.synthesize_speech(
            Text=text,
            OutputFormat='json',         # Output as JSON when requesting speech marks
            VoiceId='Joanna',            # Choose the desired voice
            SpeechMarkTypes=['viseme']   # Request only viseme marks; you can also include 'word', 'sentence', etc.
        )

    # Save the viseme marks to a file
    speech_marks_file = f"src/{file_name}.json"
    with open(speech_marks_file, "wb") as file:
        file.write(response['AudioStream'].read())

    print(f"Viseme speech marks saved as {speech_marks_file}")

    return speech_marks_file

## 3D Model 

## Animation using Visemes