In [1]:
import os
from bin.myinfer import InferModel

os.environ["CUDA_VISIBLE_DEVICES"] = "6"
model = InferModel(checkpoint="exp/valle/best-valid-loss.pt")

In [7]:
import sys

if "bin.utils" in sys.modules:
    del sys.modules["bin.utils"]

from bin.utils import *


class AIShell2DataWrapper:
    def __init__(self) -> None:
        self.dir = Path("download/aishell2/AISHELL-2/iOS/test")
        lines = open(self.dir / "trans.txt").readlines()
        self.voice = {
            key: value for line in lines for key, value in [self.line_parser(line)]
        }

    def line_parser(self, line: str):
        id = line[:11]
        id1 = line[1:6]
        # id2 = line[6:11]
        text = line[12:].strip()
        return id, {
            "file_name": Path(f"wav/{id1}/{id}.wav"),
            "text": text,
            "npc_id": int(id1[-2:]),
        }

    def get_ids(
        self,
        count: int = 5,
        min_len: int = 8,
        max_len: int = 18,
        nice: bool = False,
    ):
        ids = [
            key
            for key in self.voice.keys()
            if min_len <= len(self.voice[key]["text"]) <= max_len
        ]
        if nice:
            nice_set = {16, 17, 19, 22, 23, 25}
            ids = [key for key in ids if int(self.voice[key]["npc_id"]) in nice_set]
        return random_samples(ids, count)

    def create_file(
        self,
        file_name: PathLike,
        ids,
        texts,
        infer_dir="audios/aishell2",
        copy: bool = False,
    ):
        voice = self.voice
        return create_file_base(
            file_name=file_name,
            ids=ids,
            get_info=lambda id: (voice[id]["text"], self.dir / voice[id]["file_name"]),
            texts=texts,
            infer_dir=infer_dir,
            copy=copy,
        )

In [13]:
wrapper = AIShell2DataWrapper()

ids = wrapper.get_ids(count=5,nice=True)

texts = get_tts_texts('short')

def process_char(c):
    if re.match(r'[\u4e00-\u9fff，]',c):
        return c
    else:
        return ''
def process_str(s):
    return "".join([process_char(x) for x in s])

texts = [process_str(x) for x in texts]

file_name = wrapper.create_file("aishell2.txt", ids, texts)

人们会向你寻求意见吗	download/aishell2/AISHELL-2/iOS/test/wav/T0023/IT0023W0267.wav	快乐源于内心，而不是外界的条件	audios/aishell2/IT0023W0267_infer.wav
下次再这样我会把你钉在桩上	download/aishell2/AISHELL-2/iOS/test/wav/T0017/IT0017W0072.wav	积极乐观的态度能够战胜一切困难	audios/aishell2/IT0017W0072_infer.wav
我去找朋友来别急	download/aishell2/AISHELL-2/iOS/test/wav/T0019/IT0019W0027.wav	勇敢尝试，你将发现自己的潜力	audios/aishell2/IT0019W0027_infer.wav
我们在车的前座发现了一个钱包	download/aishell2/AISHELL-2/iOS/test/wav/T0023/IT0023W0292.wav	当生活变得艰难时，保持微笑，坚持向前	audios/aishell2/IT0023W0292_infer.wav
你只是担心如果威廉知道你以前的样子	download/aishell2/AISHELL-2/iOS/test/wav/T0019/IT0019W0161.wav	勇敢追逐梦想，你将成为自己的英雄	audios/aishell2/IT0019W0161_infer.wav



In [14]:
model.infer_by_file(file_name)

show_audios(file_name)

synthesize text: 快乐源于内心，而不是外界的条件
VALL-E EOS [271 -> 626]
synthesize text: 积极乐观的态度能够战胜一切困难
VALL-E EOS [294 -> 603]
synthesize text: 勇敢尝试，你将发现自己的潜力
VALL-E EOS [167 -> 179]
synthesize text: 当生活变得艰难时，保持微笑，坚持向前
VALL-E EOS [337 -> 655]
synthesize text: 勇敢追逐梦想，你将成为自己的英雄
VALL-E EOS [275 -> 289]
-------0-----IT0023W0267---------------


'人们会向你寻求意见吗'

'快乐源于内心，而不是外界的条件'

-------1-----IT0017W0072---------------


'下次再这样我会把你钉在桩上'

'积极乐观的态度能够战胜一切困难'

-------2-----IT0019W0027---------------


'我去找朋友来别急'

'勇敢尝试，你将发现自己的潜力'

-------3-----IT0023W0292---------------


'我们在车的前座发现了一个钱包'

'当生活变得艰难时，保持微笑，坚持向前'

-------4-----IT0019W0161---------------


'你只是担心如果威廉知道你以前的样子'

'勇敢追逐梦想，你将成为自己的英雄'

# others

In [None]:
import librosa
import librosa.display
import matplotlib.pyplot as plt

filename = "audios/aishell2/IT0016W0441.wav"
waveform, sr = librosa.load(filename)

plt.figure(figsize=(12, 4))
# librosa.display.waveplot(waveform, sr=sr)
plt.plot(waveform, color="#007acc")
plt.title("Waveform")
plt.xlabel("Time (seconds)")
plt.ylabel("Amplitude")
plt.show()

## 数据统计

In [3]:
from collections import Counter


wrapper = AIShell2DataWrapper()
counter = Counter()
count = 0
with open("download/aishell2/AISHELL-2/iOS/data/trans.txt") as file:
    while True:
        line = file.readline()
        if not line:
            break
        ls = re.findall("[㶧䶮𫚉]+", line[12:])
        count += bool(ls)
        counter.update(ls)

sorted_counter = sorted(counter.items(), key=lambda x: x[1], reverse=True)

print(f"count:{count}")
print(*sorted_counter, sep="\n")

count:4
('䶮', 2)
('𫚉', 1)
('㶧', 1)


In [16]:
from valle.data.collation import get_text_token_collater
from valle.data.tokenizer import TextTokenizer, tokenize_text


text_tokenizer = TextTokenizer(backend="pypinyin_initials_finals")
text_collater = get_text_token_collater("data/tokenized/unique_text_tokens.k2symbols")
text = "WE WILL ROCK YOU 好而"


print([tokenize_text(text_tokenizer, text=text)])

[['W', 'E', '_', 'W', 'I', 'L', 'L', '_', 'R', 'O', 'C', 'K', '_', 'Y', 'O', 'U', '_', '-', 'h', 'ao3', '-', 'er2']]


In [None]:
from bin.utils import *
import re

for item in read_jsonl_gz('data/manifests/aishell2_supervisions_train.jsonl.gz'):
    ls = re.findall(r'[a-zA-Z]+',item['text'])
    if ls:
        print(ls)