In [None]:
import os
import io
import requests
import uuid
import json
import wave
import warnings
import pandas as pd

import numpy as np
from scipy.io.wavfile import read, write
from IPython.display import Audio
import scipy.signal as sg
import matplotlib.pyplot as plt
from tqdm import tqdm
import yaml

In [None]:
yaml_path = 'yandex_api.yaml'
with open(yaml_path, 'r') as file:
    config = yaml.safe_load(file)

In [None]:
url = config['yandex_tts']['api_url']
oauth_token = config['yandex_tts']['iam_token']
folder_id = config['yandex_tts']['folder_id']

In [None]:
headers = {'Content-Type': 'application/json'}
data = '{"yandexPassportOauthToken":"%s"}' % oauth_token

response = requests.post("https://iam.api.cloud.yandex.net/iam/v1/tokens", headers=headers, data=data)
iam_token = response.json()['iamToken']
print(iam_token)

In [None]:
headers = {"Authorization": "Bearer " + iam_token}
response = requests.get("https://resource-manager.api.cloud.yandex.net/resource-manager/v1/clouds", headers=headers)
json_data = json.loads(response.text)
print(response.text)

In [None]:
def synthesize(text, iam_token, folder_id, voice, speed, path_save):
    url = 'https://tts.api.cloud.yandex.net/speech/v1/tts:synthesize'
    headers = {
        'Authorization': 'Bearer ' + iam_token,
    }

    data = {
        'text': text,
        'lang': 'ru-RU',
        'voice': voice,
        'folderId': folder_id,
        'format': 'lpcm',
        'speed': speed,
        'sampleRateHertz': 48000,
    }

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        response_sound = requests.post(url, headers=headers, data=data, verify=False)
        assert response_sound.status_code == 200, response_sound.status_code

        audio = np.frombuffer(response_sound.content, dtype=np.int16)
        
    write(path_save, rate=48000, data=audio)
    return audio

In [None]:
df = pd.read_excel('База_слов_021523.xls')
df_subset = df.copy()
df_subset = df_subset[df_subset['5 Фонем'] != 0]
df_subset = df_subset[df_subset['Подходил ли по смыслу'] != 0]
df_subset = df_subset[df_subset['voice = \'Филипп\'\nspeed = 0.8'] != 0]
accents = df_subset['5 Фонем'].values
words_splits = [accent.split('\'') for accent in accents]
words_txt = [''.join(split) for split in words_splits]
accents_tts = [(split[0][:-1] + '+' + split[0][-1] + split[1]) for split in words_splits]

In [None]:
with open("words_021523.txt", "w", encoding="utf8") as file:
    for word in words_txt:
        file.write(word + "\n")

In [None]:
voices = {
    'Алена':'alena',
    'Филипп':'filipp', 
    'Ермил':'ermil', 
    'Женя':'jane', 
    'Мадирус':'madirus', 
    'Оммаж':'omazh',
    'Захар':'zahar',
}

In [None]:
voices_names = ['Филипп', 'Женя']
speeds = [0.6, 0.8]
# [0.1, 0.3, 0.5, 0.7, 1]

In [None]:
line = 'Они пересказывали друг дружке, где они сегодня побывали и где какой хороший корм находили; а одна из них и говорит с досадою:'

In [None]:
voice = 'Филипп'
speed = 0.7
line_encoded = line.encode('utf-8')

dir_save = f'sounds/yandex_test/{voice}/speed{int(speed*10)}/text/'
os.makedirs(dir_save, exist_ok=True)

file_save = f'test.wav'
path_save = dir_save + file_save
audio = synthesize(line_encoded, iam_token, folder_id, voice=voices[voice], speed=speed, path_save=path_save)

In [None]:
sr = 48000
Audio(data=audio, rate=sr, autoplay=False)

In [None]:
accents_tts[818]

In [None]:
# audios = []
voices_names = ['Филипп', 'Женя']
speeds = 0.8

for voice in voices_names:
    for speed in speeds:
        for index, line_accent in tqdm(enumerate(accents_tts)):
            line_accent_encoded = line_accent.encode('utf-8')

            dir_save = f'sounds/yandex/{voice}/speed{int(speed*10)}/words/'
            os.makedirs(dir_save, exist_ok=True)

            file_save = f'{index+1}.wav'
            path_save = dir_save + file_save
            audio = synthesize(line_accent_encoded, iam_token, folder_id, voice=voices[voice], speed=speed, path_save=path_save)
#             audios.append(audio)

In [None]:
# audios = []
voices_names = ['Филипп', 'Женя']
speeds = [0.7, 0.8, 0.9, 1]

for voice in voices_names:
    for speed in speeds:
        for index, line_accent in tqdm(enumerate(accents_tts)):
            line_accent_encoded = line_accent.encode('utf-8')

            dir_save = f'sounds/yandex/{voice}/speed{int(speed*10)}/words/'
            os.makedirs(dir_save, exist_ok=True)

            file_save = f'{index+1}.wav'
            path_save = dir_save + file_save
            audio = synthesize(line_accent_encoded, iam_token, folder_id, voice=voices[voice], speed=speed, path_save=path_save)
#             audios.append(audio)

In [None]:
audios_cat = np.concatenate(audios)

In [None]:
sr = 48000
Audio(data=audios_cat, rate=sr, autoplay=False)

In [None]:
text_paths = [f'texts/part{i}.txt' for i in range(4, 10)]

In [None]:
lines = []
for text_path in text_paths:
    with open(text_path, 'r', encoding="utf8") as f:
        line = [line.rstrip() for line in f][0]
        lines.append(line)
        print(len(line))

In [None]:
## voice = 'Филипп'
# speed = 0.9

# audios = []

voices_names = ['Филипп', 'Женя']
speeds = [0.7, 0.8, 0.9, 1]

for voice in voices_names:
    for speed in speeds:
        for index, line in tqdm(enumerate(lines)):
            line_encoded = line.encode('utf-8')

            dir_save = f'sounds/yandex/{voice}/speed{int(speed*10)}/text_full/'
            os.makedirs(dir_save, exist_ok=True)

            file_save = f'{index+1}.wav'
            path_save = dir_save + file_save
            audio = synthesize(line_encoded, iam_token, folder_id, voice=voices[voice], speed=speed, path_save=path_save)
        #     audios.append(audio)

In [None]:
lines

In [None]:
sr = 48000
Audio(data=audios[3], rate=sr, autoplay=False)