Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Bug] I got RuntimeError: numel: integer multiplication overflow #2341

Closed
AIFahim opened this issue Feb 12, 2023 · 2 comments
Closed

[Bug] I got RuntimeError: numel: integer multiplication overflow #2341

AIFahim opened this issue Feb 12, 2023 · 2 comments
Labels
bug Something isn't working wontfix This will not be worked on but feel free to help.

Comments

@AIFahim
Copy link

AIFahim commented Feb 12, 2023

Describe the bug

Screenshot 2023-02-12 153700

To Reproduce

import os
from TTS.tts.configs.shared_configs import BaseDatasetConfig,BaseAudioConfig,CharactersConfig
from TTS.tts.configs.glow_tts_config import GlowTTSConfig

from TTS.tts.configs.vits_config import VitsConfig
from TTS.tts.models.vits import Vits, VitsAudioConfig

from TTS.utils.audio import AudioProcessor
from TTS.tts.utils.text.tokenizer import TTSTokenizer
from TTS.tts.models.glow_tts import GlowTTS
from TTS.tts.datasets import load_tts_samples
from trainer import Trainer, TrainerArgs

def main():
# BaseDatasetConfig: defines name, formatter and path of the dataset.
output_path = "checkpoints_bn_female"

dataset_config = BaseDatasetConfig(
    formatter="ljspeech", meta_file_train="/home/asif/Datasets/Dataset_Bangla/MaleVoice/Meta_fr_all_bangla_context_n_noncontext/metadata.csv", path="/home/asif/Datasets/Dataset_Bangla/MaleVoice/Dataset_Bangla_ori_10152/" # os.path.join(output_path, "LJSpeech-1.1/"
)

# GlowTTSConfig: all model related values for training, validating and testing.
my_valid_lis= ['a', 'a_1', 'a_2', 'ã', 'ã_1', 'ã_2', 'b', 'b_1', 'b_2', 'bʰ', 'bʰ_1', 'bʰ_2', 'c', 'c_1', 'c_2', 'cʰ', 'cʰ_1', 'cʰ_2', 'd', 'd_1', 'd_2', 'dʰ', 'dʰ_1', 'dʰ_2', 'd̪', 'd̪_1', 'd̪_2', 'd̪ʰ', 'd̪ʰ_1', 'd̪ʰ_2', 'e', 'e_1', 'e_2', 'ẽ', 'ẽ_1', 'ẽ_2', 'g', 'g_1', 'g_2', 'gʰ', 'gʰ_1', 'gʰ_2', 'h', 'h_1', 'h_2', 'i', 'i_1', 'i_2', 'ĩ', 'ĩ_1', 'ĩ_2', 'i̯', 'i̯_2', 'k', 'k_1', 'k_2', 'kʰ', 'kʰ_1', 'kʰ_2', 'l', 'l_1', 'l_2', 'm', 'm_1', 'm_2', 'n', 'n_1', 'n_2', 'o', 'o_1', 'o_2', 'õ', 'õ_1', 'õ_2', 'o̯', 'o̯_1', 'o̯_2', 'p', 'p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7', 'p_1', 'p_2', 'pʰ', 'pʰ_1', 'pʰ_2', 'r', 'r_1', 'r_2', 's', 's_1', 's_2', 't', 't_1', 't_2', 'tʰ', 'tʰ_1', 'tʰ_2', 't̪', 't̪_1', 't̪_2', 't̪ʰ', 't̪ʰ_1', 't̪ʰ_2', 'u', 'u_1', 'u_2', 'ũ', 'ũ_1', 'ũ_2', 'u̯', 'u̯_2', 'æ', 'æ_1', 'æ_2', 'æ̃', 'æ̃_2', 'ŋ', 'ŋ_2', 'ɔ', 'ɔ_1', 'ɔ_2', 'ɔ̃', 'ɔ̃_2', 'ɟ', 'ɟ_1', 'ɟ_2', 'ɟʰ', 'ɟʰ_1', 'ɟʰ_2', 'ɽ', 'ɽ_2', 'ɽʰ', 'ʃ', 'ʃ_1', 'ʃ_2', 'ʲ', 'ʲ_2', 'ʰ', 'ʷ', 'ɔ̃_1', 'ʲ_1', 'ɽʰ_1', '-']

# print(str(my_valid_lis))

# assert False
# char_list = ['a', 'a_1', 'a_2', 'ã', 'ã_1', 'ã_2', 'b', 'b_1', 'b_2', 'bʰ', 'bʰ_1', 'bʰ_2', 'c', 'c_1', 'c_2', 'cʰ', 'cʰ_1', 'cʰ_2', 'd', 'd_1', 'd_2', 'dʰ', 'dʰ_1', 'dʰ_2', 'd̪', 'd̪_1', 'd̪_2', 'd̪ʰ', 'd̪ʰ_1', 'd̪ʰ_2', 'e', 'e_1', 'e_2', 'ẽ', 'ẽ_1', 'ẽ_2', 'g', 'g_1', 'g_2', 'gʰ', 'gʰ_1', 'gʰ_2', 'h', 'h_1', 'h_2', 'i', 'i_1', 'i_2', 'ĩ', 'ĩ_1', 'ĩ_2', 'i̯', 'i̯_2', 'k', 'k_1', 'k_2', 'kʰ', 'kʰ_1', 'kʰ_2', 'l', 'l_1', 'l_2', 'm', 'm_1', 'm_2', 'n', 'n_1', 'n_2', 'o', 'o_1', 'o_2', 'õ', 'õ_1', 'õ_2', 'o̯', 'o̯_1', 'o̯_2', 'p', 'p1', 'p2', 'p3', 'p4', 'p5', 'p6', 'p7', 'p_1', 'p_2', 'pʰ', 'pʰ_1', 'pʰ_2', 'r', 'r_1', 'r_2', 's', 's_1', 's_2', 't', 't_1', 't_2', 'tʰ', 'tʰ_1', 'tʰ_2', 't̪', 't̪_1', 't̪_2', 't̪ʰ', 't̪ʰ_1', 't̪ʰ_2', 'u', 'u_1', 'u_2', 'ũ', 'ũ_1', 'ũ_2', 'u̯', 'u̯_2', 'æ', 'æ_1', 'æ_2', 'æ̃', 'æ̃_2', 'ŋ', 'ŋ_2', 'ɔ', 'ɔ_1', 'ɔ_2', 'ɔ̃', 'ɔ̃_2', 'ɟ', 'ɟ_1', 'ɟ_2', 'ɟʰ', 'ɟʰ_1', 'ɟʰ_2', 'ɽ', 'ɽ_2', 'ɽʰ', 'ʃ', 'ʃ_1', 'ʃ_2', 'ʲ', 'ʲ_2', 'ʷ']

# traverse in the string
# char_sen = ""
# for x in my_valid_lis:
#     char_sen += x
# # print(char_sen)
characters_config = CharactersConfig(
    pad = '',#'<PAD>',
    eos = '',#'\n', #'<EOS>', #'।',
    bos = '',#'<BOS>',# None,
    blank = '',#'<BLNK>',
    phonemes = None,
    # characters =  "তট৫ভিঐঋখঊড়ইজমএেঘঙসীঢ়হঞ‘ঈকণ৬ঁৗশঢঠ\u200c১্২৮দৃঔগও—ছউংবৈঝাযফ\u200dচরষঅৌৎথড়৪ধ০ুূ৩আঃপয়’নলো",


    characters = my_valid_lis ,#char_sen,
    punctuations = '' # "-!,|.? ",
)


# audio_config = BaseAudioConfig(
#     sample_rate = 16000,
#     resample =True
# )
audio_config = VitsAudioConfig(
    sample_rate=16000, win_length=1024, hop_length=256, num_mels=80, mel_fmin=0, mel_fmax=None
)



config =  VitsConfig(
    batch_size=64,
    eval_batch_size=128,
    num_loader_workers=16,
    num_eval_loader_workers=16,
    batch_group_size=5,
    run_eval=True,
    test_delay_epochs=-1,
    epochs=5000,
    text_cleaner= "collapse_whitespace", #"collapse_whitespace",
    compute_input_seq_cache=True,
    use_phonemes=False,
    # phoneme_language="bn",
    phoneme_cache_path=os.path.join(output_path, "phoneme_cache"),
    print_step=25,
    print_eval=False,
    mixed_precision=False,
    output_path=output_path,
    datasets=[dataset_config],
    save_step=1000,
    audio=audio_config,
    characters=characters_config,
    cudnn_benchmark=False,
    min_audio_len=32 * 256 * 4,
    max_audio_len=160000,
    test_sentences = [
#         "পিপলস ইন্স্যুরেন্স অব চায়না ছেষট্টি বছর আগে ব্যবসা চালু করে।",
#         "সোনার বাংলা আমি তোমায় ভালবাসি।"
        # "হয়,হয়ে,ওয়া,হয়েছ,হয়েছে,দিয়ে,যায়,দায়,নিশ্চয়,আয়,ভয়,নয়,আয়াত,নিয়ে,হয়েছে,দিয়েছ,রয়ে,রয়েছ,রয়েছে।",
        # "দেয়,দেওয়া,বিষয়,হয়,হওয়া,সম্প্রদায়,সময়,হয়েছি,দিয়েছি,হয়,হয়েছিল,বিষয়ে,নয়,কিয়াম,ইয়া,দেয়া,দিয়েছে,আয়াতে,দয়া।",
        # "হওয়ার,হয়েছে,নিশ্চয়ই,রায়,কিয়ামত,উভয়,দিয়েছেন,দুনিয়া,ন্যায়,অবস্থায়,যায়,ফিরিয়ে,দিয়েছিল,ভয়ে,দ্বিতীয়,দায়ক,পায়।",
        # "গিয়ে,চেয়ে,হিদায়াত,দায়ে,নিয়েছ,রয়েছে,শয়তান,কিয়ামতে,সম্প্রদায়ে,সম্প্রদায়ের,নেয়,জয়,কিয়ামতের,স্থায়ী,যাওয়া,দয়ালু।",
        # "ইয়াহুদ,নয়,ব্যয়,ইয়াহুদী,নেওয়া,উভয়ে,যায়,হয়েছিল,প্রয়োজন।"
        "ʃ_1 ɔ n n o b o t̪ i_2 ɔ_1 r t̪ t̪ʰ o_2 cʰ_1 i ʲ a n ɔ b b o i̯_2 ʃ_1 o ŋ kʰ o k_2" #ষণ্নবতি অর্থ ছিয়ানব্বই সংখ্যক।|
    ],
)



ap = AudioProcessor.init_from_config(config)

ap.resample



tokenizer, config = TTSTokenizer.init_from_config(config)


def formatter(root_path, meta_file, **kwargs):  # pylint: disable=unused-argument
    """Normalizes the LJSpeech meta data file to TTS format
    https://keithito.com/LJ-Speech-Dataset/"""
    txt_file = meta_file
    items = []
    speaker_name = "ljspeech"
    with open(txt_file, "r", encoding="utf-8") as ttf:
        for line in ttf:
            cols = line.split("|")
            wav_file = os.path.join(root_path, "wav", cols[0] + ".wav")
            try:
                text = cols[2]
            except:
                print("not found")

            items.append({"text": text, "audio_file": wav_file, "speaker_name": speaker_name, "root_path": root_path})
    return items


train_samples, eval_samples = load_tts_samples(
    dataset_config,
    eval_split=True,
    eval_split_max_size=config.eval_split_max_size,
    eval_split_size=config.eval_split_size,
    formatter=formatter,
)



model = Vits(config, ap, tokenizer, speaker_manager=None)



trainer = Trainer(
    TrainerArgs(), config, output_path, model=model, train_samples=train_samples, eval_samples=eval_samples
)

trainer.fit()

if name == "main":
main()

Expected behavior

No response

Logs

Traceback (most recent call last):
  File "/home/asif/miniconda3/envs/denoise_py_3_9_12/lib/python3.9/site-packages/trainer/trainer.py", line 1593, in fit
    self._fit()
  File "/home/asif/miniconda3/envs/denoise_py_3_9_12/lib/python3.9/site-packages/trainer/trainer.py", line 1546, in _fit
    self.train_epoch()
  File "/home/asif/miniconda3/envs/denoise_py_3_9_12/lib/python3.9/site-packages/trainer/trainer.py", line 1310, in train_epoch
    _, _ = self.train_step(batch, batch_num_steps, cur_step, loader_start_time)
  File "/home/asif/miniconda3/envs/denoise_py_3_9_12/lib/python3.9/site-packages/trainer/trainer.py", line 1162, in train_step
    outputs, loss_dict_new, step_time = self._optimize(
  File "/home/asif/miniconda3/envs/denoise_py_3_9_12/lib/python3.9/site-packages/trainer/trainer.py", line 1023, in _optimize
    outputs, loss_dict = self._model_train_step(batch, model, criterion, optimizer_idx=optimizer_idx)
  File "/home/asif/miniconda3/envs/denoise_py_3_9_12/lib/python3.9/site-packages/trainer/trainer.py", line 970, in _model_train_step
    return model.train_step(*input_args)
  File "/home/asif/coqui_vits_2/TTS/TTS/tts/models/vits.py", line 1255, in train_step
    outputs = self.forward(
  File "/home/asif/coqui_vits_2/TTS/TTS/tts/models/vits.py", line 1018, in forward
    x, m_p, logs_p, x_mask = self.text_encoder(x, x_lengths, lang_emb=lang_emb)
  File "/home/asif/miniconda3/envs/denoise_py_3_9_12/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
    return forward_call(*input, **kwargs)
  File "/home/asif/coqui_vits_2/TTS/TTS/tts/layers/vits/networks.py", line 104, in forward
    x = self.encoder(x , x_mask)
  File "/home/asif/miniconda3/envs/denoise_py_3_9_12/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
    return forward_call(*input, **kwargs)
  File "/home/asif/coqui_vits_2/TTS/TTS/tts/layers/glow_tts/transformer.py", line 424, in forward
    y = self.attn_layers[i](x, x, attn_mask)
  File "/home/asif/miniconda3/envs/denoise_py_3_9_12/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1194, in _call_impl
    return forward_call(*input, **kwargs)
  File "/home/asif/coqui_vits_2/TTS/TTS/tts/layers/glow_tts/transformer.py", line 118, in forward
    print(" glow_tts/transformer.py ", x)
  File "/home/asif/miniconda3/envs/denoise_py_3_9_12/lib/python3.9/site-packages/torch/_tensor.py", line 427, in __repr__
    return torch._tensor_str._str(self, tensor_contents=tensor_contents)
  File "/home/asif/miniconda3/envs/denoise_py_3_9_12/lib/python3.9/site-packages/torch/_tensor_str.py", line 637, in _str
    return _str_intern(self, tensor_contents=tensor_contents)
  File "/home/asif/miniconda3/envs/denoise_py_3_9_12/lib/python3.9/site-packages/torch/_tensor_str.py", line 568, in _str_intern
    tensor_str = _tensor_str(self, indent)
  File "/home/asif/miniconda3/envs/denoise_py_3_9_12/lib/python3.9/site-packages/torch/_tensor_str.py", line 328, in _tensor_str
    formatter = _Formatter(get_summarized_data(self) if summarize else self)
  File "/home/asif/miniconda3/envs/denoise_py_3_9_12/lib/python3.9/site-packages/torch/_tensor_str.py", line 115, in __init__
    nonzero_finite_vals = torch.masked_select(
RuntimeError: numel: integer multiplication overflow
(denoise_py_3_9_12) asif@152-67-3-48:~/coqui_vits_2/TTS$

Environment

- TTS Version current

Additional context

No response

@AIFahim AIFahim added the bug Something isn't working label Feb 12, 2023
@stale
Copy link

stale bot commented Mar 14, 2023

This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions. You might also look our discussion channels.

@stale stale bot added the wontfix This will not be worked on but feel free to help. label Mar 14, 2023
@stale stale bot closed this as completed Mar 22, 2023
@Xujingkk
Copy link

Hello , I had a similar problem, have you solved it yet? @AIFahim

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug Something isn't working wontfix This will not be worked on but feel free to help.
Projects
None yet
Development

No branches or pull requests

2 participants