## Imports

In [1]:
import re
import os
import json
from tqdm import tqdm
from bs4 import BeautifulSoup

import nest_asyncio
nest_asyncio.apply()

from Story.get_board import Board
from Audio.get_voices import Voices
from Audio.gen_audio import generate_audio, process_output

In [2]:
import config
import settings

PLAY_HT_API_KEY = config.PLAY_HT_API_KEY
PLAY_HT_USER_ID =  config.PLAY_HT_USER_ID

BOARD_URL = settings.BOARD_URL
JSON_PATH = settings.JSON_PATH
BOARDS_PATH = settings.BOARDS_PATH

VOICE_DICT = settings.VOICE_DICT

## Helpers

## Get Voices

In [3]:
playht = Voices()

voices = playht.get_voices()

In [4]:
# Define your filter
filter_dict = {
    # "name": ""
    "accent": ["american", "canadian"],  # "british", "american", "canadian"
    "age": ["youth", "adult"],  # "youth", "adult", "old"
    "gender": ["male"],  # "male", "female"
    # "loudness": ["low", "neutral", "high"],  # "low", "neutral", "high"
    # "style": ["narrative"],  # "narrative", "videos", "training", "advertising"
    # "tempo": ["fast"],  # "slow", "neutral", "fast"
    # "texture": ["smooth", "round", "thick", "gravelly"]  # "smooth", "round", "thick", "gravelly"
}

# Filter voices
filtered_voices = playht.filter_voices(filter_dict)

# Print filtered voices
playht.print_voices(filtered_voices)

voice_ids = playht.get_attribute_list('id')

id       | name     | sample                                                                   | accent   | age   | gender | language     | language_code | loudness | style       | tempo   | texture  | is_cloned | 
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
adolfo   | Adolfo   | https://peregrine-samples.s3.amazonaws.com/editor-samples/adolfo.wav     | american | adult | male   | English (US) | en-US         | neutral  | narrative   | fast    | thick    | False     | 
alfonso  | Alfonso  | https://peregrine-samples.s3.amazonaws.com/editor-samples/alfonso.wav    | american | adult | male   | English (US) | en-US         | neutral  | videos      | neutral | gravelly | False     | 
Anthony  | Anthony  | https://peregrine-samples.s3.amazonaws.com/editor-samples/Anthony.wav    | american | adult | male   | English (US) | 

## Generate Files

### Get Story

In [5]:
board = Board(BOARD_URL)

board.save_chunks("data/board_chunks", chunk_size=100)

Extracting threads: 100%|██████████| 13/13 [01:21<00:00,  6.29s/it]


In [6]:
def check_red(content, character):
    """
    This function checks the given content for conditions that will mark the block as 'red'.
    """
    # Check if content contains math equations
    if re.search(r'\$[^$]*\$', content):  # Simple check for LaTeX equations
        return True
    
    # Check if content contains certain HTML tags
    tags_to_check = ['img', 'big', 'small', 'details', 'hr', 'span']
    soup = BeautifulSoup(content, 'html.parser')
    for tag in tags_to_check:
        if soup.find(tag):
            return True
    
    # Check if there are no authors (so only narrator) yet there are quotes in the block
    text = BeautifulSoup(content, 'html.parser').get_text()
    if character is None and '"' in text:
        return True
    
    if character not in VOICE_DICT.keys():
        return True
    
    # [Add other checks here]
    
    return False

In [7]:
def clean_text(text):
    # Remove strange strings like '\u00a0' and replace with space
    cleaned_text = text.replace('\u00a0', ' ').replace('\xa0', ' ').replace('  ', ' ')
    return cleaned_text

def contains_english_word(s):
    # Check if the string contains any English word using a regular expression
    return re.search(r'\b[A-Za-z]+\b', s) is not None

def separate_speech(block_text, character_name, narrator_name):
    results = []
    
    if character_name is None:
        text = block_text.split('\n')
        for t in text:
            if contains_english_word(t):
                results.append({'name': narrator_name, 'text': clean_text(t)})
        return results
    
    # Split text into speech sections, delimited by quotes
    speeches = re.split(r'(".*?")', block_text)
    
    for i, speech in enumerate(speeches):
        speech = speech.strip()
        if not speech:
            continue
        name = narrator_name if i % 2 == 0 else character_name
        text = clean_text(speech[1:-1]) if name == character_name else clean_text(speech)
        
        # Split text by paragraph
        text = text.split('\n')
        for t in text:
            if contains_english_word(t):
                results.append({'name': name, 'text': clean_text(t)})
    return results

def html_to_plain_text(html):
    soup = BeautifulSoup(html, 'html.parser')
    return soup.get_text()

In [8]:
def update_blocks(folder_path):
    def extract_number(filename):
        match = re.search(r'\d+', filename)
        return int(match.group()) if match else float('inf')

    json_files = sorted([f for f in os.listdir(folder_path) if f.endswith('.json')], key=extract_number)

    for json_file in tqdm(json_files):
        try:
            with open(f"{folder_path}/{json_file}", "r") as f:
                data = json.load(f)
        except json.JSONDecodeError as e:
            print(f"Error decoding JSON from file: {json_file}")
            raise e

        for block in data:
            plain_text = html_to_plain_text(block['content'])
            new_voices = []

            for voice in separate_speech(plain_text, block['character'], "Narrator"):
                new_voices.append({
                    'name': voice['name'],
                    'text': voice['text']
                })
            block['voices'] = new_voices
            block['red'] = check_red(block['content'], block['character'])
        with open(f"{folder_path}/{json_file}", "w") as f:
            json.dump(data, f, indent=4)


In [9]:
update_blocks(BOARDS_PATH)

100%|██████████| 203/203 [00:31<00:00,  6.34it/s]



## Generate Audio

### Audio helpers

In [5]:
# The first function prepares the input for the generate_audio function
def prepare_text_groups(data):
    text_groups = []
    for block in data:
        for voice in block['voices']:
            voice_id = VOICE_DICT[voice['name']]
            if voice_id not in voice_ids:
                print(f"Invalid voice: \"{voice}\"")
                print(f"Voice ID: {voice_id}")
                raise ValueError(f"Invalid voice ID: {voice_id}")
            text_group = {
                'text': voice['text'], 
                'voice': voice_id,
            }
            text_groups.append(text_group)
    return text_groups

# The third function updates the original JSON data with the processed output
def update_json(original_data, processed_output):
    output_index = 0
    for block in original_data:
        for voice in block['voices']:
            voice['audio'] = processed_output[output_index]['url']
            voice['duration'] = processed_output[output_index]['duration']
            output_index += 1
    return original_data

### Generate Audio

In [6]:
async def generate_audio_for_file(filepath):
    with open(filepath, 'r') as file:
        data = json.load(file)

    n = 50  # The size of each chunk
    for i in range(0, len(data), n):
        text_groups = prepare_text_groups(data[i:i+n])

        # As this script is meant to be run in an async environment,
        # 'await' is required before calling generate_audio.
        audio_output = await generate_audio(text_groups)
        processed_output = process_output(audio_output)
        updated_data = update_json(data[i:i+n], processed_output)

        # Save updated data back to the file after every chunk
        data[i:i+n] = updated_data
        with open(filepath, 'w') as file:
            json.dump(data, file, indent=4)

In [13]:
path = f'{BOARDS_PATH}/planecrash_chunk_1_of_203.json'

# Load JSON data
with open(path, 'r') as file:
    data = json.load(file)[:10]

# Prepare text groups for generate_audio function
text_groups = prepare_text_groups(data)

In [14]:
audio_output = await generate_audio(text_groups)
processed_output = process_output(audio_output)

In [15]:
updated_data = update_json(data, processed_output)

In [16]:
with open(path, 'w', encoding='utf-8') as file:
    json.dump(updated_data, file, indent=4)

## App

In [7]:
print("http://localhost:5000/")
%run app.py

 * Serving Flask app "app" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on
