# Import the library

In [1]:
import os

import torch
from trainer import Trainer, TrainerArgs

# from TTS.bin.compute_embeddings import compute_embeddings
from compute_embeddings import compute_embeddings # use custom formatter without forking the lib
from TTS.bin.resample import resample_files
from TTS.config.shared_configs import BaseDatasetConfig
from TTS.tts.configs.vits_config import VitsConfig
from TTS.tts.datasets import load_tts_samples
from TTS.tts.models.vits import CharactersConfig, Vits, VitsArgs, VitsAudioConfig
# from TTS.utils.downloaders import download_vctk
# from TTS.config import load_config
# from TTS.config.shared_configs import BaseDatasetConfig
# from TTS.tts.datasets import load_tts_samples
# from TTS.tts.utils.managers import save_file
# from TTS.tts.utils.speakers import SpeakerManager
from TTS.tts.datasets.formatters import vctk
from functools import partial

from tqdm import tqdm

torch.set_num_threads(24)

# Setup constants

In [2]:
# Current path
CURRENT_PATH = os.getcwd()

# Name of the run for the Trainer
RUN_NAME = "KhongKhunTTS-TH-VCTK"

# Path where you want to save the models outputs (configs, checkpoints and tensorboard logs)
OUT_PATH = os.getcwd()

# If you want to do transfer learning and speedup your training you can set here the path to the model
RESTORE_PATH = None

# This paramter is useful to debug, it skips the training epochs and just do the evaluation  and produce the test sentences
SKIP_TRAIN_EPOCH = False

# Set here the batch size to be used in training and evaluation
BATCH_SIZE = 32

# Training Sampling rate and the target sampling rate for resampling the downloaded dataset (Note: If you change this you might need to redownload the dataset !!)
# Note: If you add new datasets, please make sure that the dataset sampling rate and this parameter are matching, otherwise resample your audios
SAMPLE_RATE = 32000

# Max audio length in seconds to be used in training (every audio bigger than it will be ignored)
MAX_AUDIO_LEN_IN_SECONDS = 10

### Download VCTK dataset
VCTK_DOWNLOAD_PATH = os.path.join(CURRENT_PATH, "commonvoice-to-vctk")
# Define the number of threads used during the audio resampling
NUM_RESAMPLE_THREADS = 10

# Dataset configuration

In [3]:
# Check if VCTK dataset is not already downloaded, if not download it
# if not os.path.exists(VCTK_DOWNLOAD_PATH):
#     print(">>> Downloading VCTK dataset:")
#     download_vctk(VCTK_DOWNLOAD_PATH, True)
    # resample_files(VCTK_DOWNLOAD_PATH, SAMPLE_RATE, file_ext="flac", n_jobs=NUM_RESAMPLE_THREADS)

# init configs
vctk_config = BaseDatasetConfig(
    formatter="vctk_32k",
    dataset_name="vctk",
    meta_file_train="",
    meta_file_val="",
    path=VCTK_DOWNLOAD_PATH,
    language="th",
    ignored_speakers=[
        "cv017", # Female Teenager
        "cv048", # Female Teenager
        "cv039", # Female Adult
        "cv052", # Female Adult
        "cv069", # Male Teenager
        "cv054", # Male Teenager
        "cv049", # Male Adult
        "cv026", # Male Adult
    ], # For testing set
)

# Add here all datasets configs, in our case we just want to train with the VCTK dataset then we need to add just VCTK. Note: If you want to add new datasets, just add them here and it will automatically compute the speaker embeddings (d-vectors) for this new dataset :)
DATASETS_CONFIG_LIST = [vctk_config]

# Extract speaker embeddings

In [4]:
# from tqdm import tqdm

# from TTS.config import load_config
# from TTS.config.shared_configs import BaseDatasetConfig
# from TTS.tts.datasets import load_tts_samples
# from TTS.tts.utils.managers import save_file
# from TTS.tts.utils.speakers import SpeakerManager
# from TTS.tts.datasets.formatters import vctk
# from functools import partial

# def compute_embeddings(
#     model_path,
#     config_path,
#     output_path,
#     formatter_name=None,
#     dataset_name=None,
#     dataset_path=None,
#     meta_file_train=None,
#     meta_file_val=None,
# ):
#     use_cuda = torch.cuda.is_available()

#     c_dataset = BaseDatasetConfig()
#     c_dataset.formatter = formatter_name
#     c_dataset.dataset_name = dataset_name
#     c_dataset.path = dataset_path
#     if meta_file_train is not None:
#         c_dataset.meta_file_train = meta_file_train
#     if meta_file_val is not None:
#         c_dataset.meta_file_val = meta_file_val
#     meta_data_train, meta_data_eval = load_tts_samples(c_dataset, eval_split=True, formatter=custom_vctk)
    
#     samples = meta_data_train + meta_data_eval

#     encoder_manager = SpeakerManager(
#         encoder_model_path=model_path,
#         encoder_config_path=config_path,
#         d_vectors_file_path=None,
#         use_cuda=use_cuda,
#     )

#     class_name_key = encoder_manager.encoder_config.class_name_key

#     # compute speaker embeddings
#     speaker_mapping = {}

#     for fields in tqdm(samples):
#         class_name = fields[class_name_key]
#         audio_file = fields["audio_file"]
#         embedding_key = fields["audio_unique_name"]

#         # Only update the speaker name when the embedding is already in the old file.
#         if embedding_key in speaker_mapping:
#             speaker_mapping[embedding_key]["name"] = class_name
#             continue

#         embedd = encoder_manager.compute_embedding_from_clip(audio_file)

#         # create speaker_mapping if target dataset is defined
#         speaker_mapping[embedding_key] = {}
#         speaker_mapping[embedding_key]["name"] = class_name
#         speaker_mapping[embedding_key]["embedding"] = embedd

#     if speaker_mapping:
#         # save speaker_mapping if target dataset is defined
#         if os.path.isdir(output_path):
#             mapping_file_path = os.path.join(output_path, "speakers.pth")
#         else:
#             mapping_file_path = output_path

#         if os.path.dirname(mapping_file_path) != "":
#             os.makedirs(os.path.dirname(mapping_file_path), exist_ok=True)

#         save_file(speaker_mapping, mapping_file_path)
#         print("Speaker embeddings saved at:", mapping_file_path)

In [5]:
SPEAKER_ENCODER_CHECKPOINT_PATH = (
    "https://github.com/coqui-ai/TTS/releases/download/speaker_encoder_model/model_se.pth.tar"
)
SPEAKER_ENCODER_CONFIG_PATH = "https://github.com/coqui-ai/TTS/releases/download/speaker_encoder_model/config_se.json"

D_VECTOR_FILES = []  # List of speaker embeddings/d-vectors to be used during the training

vctk_32k = partial(
    vctk,
    wavs_path="wav32_silence_trimmed",
)

# Iterates all the dataset configs checking if the speakers embeddings are already computated, if not compute it
for dataset_conf in DATASETS_CONFIG_LIST:
    # Check if the embeddings weren't already computed, if not compute it
    embeddings_file = os.path.join(vctk_config.path, "speakers.pth")
    if not os.path.isfile(embeddings_file):
        print(f">>> Computing the speaker embeddings for the {vctk_config.dataset_name} dataset")
        compute_embeddings(
            SPEAKER_ENCODER_CHECKPOINT_PATH,
            SPEAKER_ENCODER_CONFIG_PATH,
            embeddings_file,
            formatter_name=vctk_config.formatter,
            formatter=vctk_32k if vctk_config.formatter == "vctk_32k" else None,
            dataset_name=vctk_config.dataset_name,
            dataset_path=vctk_config.path,
            meta_file_train=vctk_config.meta_file_train,
            meta_file_val=vctk_config.meta_file_val,
        )

        # meta_data_train, meta_data_eval = load_tts_samples(vctk_config, eval_split=True, formatter=custom_vctk)

        # samples = meta_data_train + meta_data_eval

        # encoder_manager = SpeakerManager(
        #     encoder_model_path=SPEAKER_ENCODER_CHECKPOINT_PATH,
        #     encoder_config_path=SPEAKER_ENCODER_CONFIG_PATH,
        #     d_vectors_file_path=None,
        #     use_cuda=torch.cuda.is_available(),
        # )

        # class_name_key = encoder_manager.encoder_config.class_name_key

        # # compute speaker embeddings
        # speaker_mapping = {}

        # for fields in tqdm(samples):
        #     class_name = fields[class_name_key]
        #     audio_file = fields["audio_file"]
        #     embedding_key = fields["audio_unique_name"]

        #     # Only update the speaker name when the embedding is already in the old file.
        #     if embedding_key in speaker_mapping:
        #         speaker_mapping[embedding_key]["name"] = class_name
        #         continue

        #     embedd = encoder_manager.compute_embedding_from_clip(audio_file)

        #     # create speaker_mapping if target dataset is defined
        #     speaker_mapping[embedding_key] = {}
        #     speaker_mapping[embedding_key]["name"] = class_name
        #     speaker_mapping[embedding_key]["embedding"] = embedd
        
        # if speaker_mapping:
        #     # save speaker_mapping if target dataset is defined
        #     if os.path.isdir(embeddings_file):
        #         mapping_file_path = os.path.join(embeddings_file, "speakers.pth")
        #     else:
        #         mapping_file_path = embeddings_file

        #     if os.path.dirname(mapping_file_path) != "":
        #         os.makedirs(os.path.dirname(mapping_file_path), exist_ok=True)

        #     save_file(speaker_mapping, mapping_file_path)
        #     print("Speaker embeddings saved at:", mapping_file_path)

    D_VECTOR_FILES.append(embeddings_file)

# Audio config used in training.

In [6]:
audio_config = VitsAudioConfig(
    sample_rate=SAMPLE_RATE,
    hop_length=256,
    win_length=1024,
    fft_size=1024,
    mel_fmin=0.0,
    mel_fmax=None,
    num_mels=80,
)

# Model configuration

In [7]:
# Init VITSArgs setting the arguments that are needed for the KhongKhunTTS model
model_args = VitsArgs(
    d_vector_file=D_VECTOR_FILES,
    use_d_vector_file=True,
    d_vector_dim=512,
    num_layers_text_encoder=10,
    speaker_encoder_model_path=SPEAKER_ENCODER_CHECKPOINT_PATH,
    speaker_encoder_config_path=SPEAKER_ENCODER_CONFIG_PATH,
    resblock_type_decoder="2",  # In the YourTTS paper, trained using ResNet blocks type 2, if you like you can use the ResNet blocks type 1 like the VITS model
    # Useful parameters to enable the Speaker Consistency Loss (SCL) described in the paper
    # use_speaker_encoder_as_loss=True,
    # Useful parameters to enable multilingual training
    # use_language_embedding=True,
    # embedded_language_dim=4,
)

In [8]:
# General training config, here you can change the batch size and others useful parameters
config = VitsConfig(
    output_path=OUT_PATH,
    model_args=model_args,
    run_name=RUN_NAME,
    project_name="KhongKhunTTS",
    run_description="""
            - KhongKhunTTS trained using CommonVoiceTH (VCTK structure)
        """,
    dashboard_logger="tensorboard",
    logger_uri=None,
    audio=audio_config,
    batch_size=BATCH_SIZE,
    batch_group_size=48,
    eval_batch_size=BATCH_SIZE,
    num_loader_workers=8,
    eval_split_max_size=256,
    print_step=50,
    plot_step=100,
    log_model_step=1000,
    save_step=5000,
    save_n_checkpoints=2,
    save_checkpoints=True,
    target_loss="loss_1",
    print_eval=False,
    use_phonemes=False,
    phonemizer="espeak",
    phoneme_language="en",
    compute_input_seq_cache=True,
    add_blank=True,
    text_cleaner="multilingual_cleaners",
    characters=CharactersConfig(
        characters_class="TTS.tts.models.vits.VitsCharacters",
        pad="_",
        eos="&",
        bos="*",
        blank=None,
        characters="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\u00af\u00b7\u00df\u00e0\u00e1\u00e2\u00e3\u00e4\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f9\u00fa\u00fb\u00fc\u00ff\u0101\u0105\u0107\u0113\u0119\u011b\u012b\u0131\u0142\u0144\u014d\u0151\u0153\u015b\u016b\u0171\u017a\u017c\u01ce\u01d0\u01d2\u01d4\u0430\u0431\u0432\u0433\u0434\u0435\u0436\u0437\u0438\u0439\u043a\u043b\u043c\u043d\u043e\u043f\u0440\u0441\u0442\u0443\u0444\u0445\u0446\u0447\u0448\u0449\u044a\u044b\u044c\u044d\u044e\u044f\u0451\u0454\u0456\u0457\u0491\u2013!\"'(),-.:;?|~ \u0e01\u0e02\u0e04\u0e06\u0e07\u0e08\u0e09\u0e0a\u0e0b\u0e0c\u0e0d\u0e0e\u0e0f\u0e10\u0e11\u0e12\u0e13\u0e14\u0e15\u0e16\u0e17\u0e18\u0e19\u0e1a\u0e1b\u0e1c\u0e1d\u0e1e\u0e1f\u0e20\u0e21\u0e22\u0e23\u0e24\u0e25\u0e27\u0e28\u0e29\u0e2a\u0e2b\u0e2c\u0e2d\u0e2e\u0e2f\u0e30\u0e31\u0e32\u0e33\u0e34\u0e35\u0e36\u0e37\u0e38\u0e39\u0e40\u0e41\u0e42\u0e43\u0e44\u0e45\u0e46\u0e47\u0e48\u0e49\u0e4a\u0e4b\u0e4c\u0e4d\u2014\u2018\u2019\u201c\u201d",
        punctuations="!\"'(),-.:;?|~ ",
        phonemes="",
        is_unique=True,
        is_sorted=True,
    ),
    phoneme_cache_path=None,
    precompute_num_workers=12,
    start_by_longest=True,
    datasets=DATASETS_CONFIG_LIST,
    cudnn_benchmark=False,
    max_audio_len=SAMPLE_RATE * MAX_AUDIO_LEN_IN_SECONDS,
    mixed_precision=False,
    test_sentences=[
        [
            "ฉันใช้เวลานานมากในการพัฒนาเสียง และตอนนี้ฉันมีมันแล้ว ฉันจะไม่เงียบอีกต่อไป",
            "VCTK_cv005",
            None,
            "th",
        ],
        [
            "ยักษ์ใหญ่ไล่ยักษ์เล็ก ยักษ์เล็กไล่ยักษ์ใหญ่",
            "VCTK_cv068",
            None,
            "th",
        ],
        [
            "ยายกินลำไยน้ำลายยายไหลย้อย",
            "VCTK_cv057",
            None,
            "th",
        ],
        [
            "ชามเขียวคว่ำเช้า ชามขามคว่ำค่ำ",
            "VCTK_cv103",
            None,
            "th",
        ],
        [
            "หมอนลอยน้ำมาว่ายน้ำไปถอยหมอน",
            "VCTK_cv133",
            None,
            "th",
        ],
        [
            "เช้าฟาดผัดฟัก เย็นฟาดฟักผัด",
            "VCTK_cv128",
            None,
            "t",
        ],
    ],
    # # Enable the weighted sampler
    # use_weighted_sampler=True,
    # # Ensures that all speakers are seen in the training batch equally no matter how many samples each speaker has
    # weighted_sampler_attrs={"speaker_name": 1.0},
    # weighted_sampler_multipliers={},
    # weighted_sampler_multipliers={"Makeitnotblack": None},

    # It defines the Speaker Consistency Loss (SCL) α to 9 like the paper
    speaker_encoder_loss_alpha=9.0,
)

# config.weighted_sampler_multipliers = {}

# print(config.weighted_sampler_multipliers)

# Training

In [9]:
# Load all the datasets samples and split traning and evaluation sets
train_samples, eval_samples = load_tts_samples(
    config.datasets,
    eval_split=True,
    eval_split_max_size=config.eval_split_max_size,
    eval_split_size=config.eval_split_size,
    formatter=vctk_32k if vctk_config.formatter == "vctk_32k" else None,
)

 | > Found 91809 files in /home/titor/Capstone/dubbing-ai/KhongKhunTTS/commonvoice-to-vctk


In [10]:
# Init the model
model = Vits.init_from_config(config)

 > Setting up Audio Processor...
 | > sample_rate:32000
 | > resample:False
 | > num_mels:80
 | > log_func:np.log10
 | > min_level_db:0
 | > frame_shift_ms:None
 | > frame_length_ms:None
 | > ref_level_db:None
 | > fft_size:1024
 | > power:None
 | > preemphasis:0.0
 | > griffin_lim_iters:None
 | > signal_norm:None
 | > symmetric_norm:None
 | > mel_fmin:0
 | > mel_fmax:None
 | > pitch_fmin:None
 | > pitch_fmax:None
 | > spec_gain:20.0
 | > stft_pad_mode:reflect
 | > max_norm:1.0
 | > clip_norm:True
 | > do_trim_silence:False
 | > trim_db:60
 | > do_sound_norm:False
 | > do_amp_to_db_linear:True
 | > do_amp_to_db_mel:True
 | > do_rms_norm:False
 | > db_level:None
 | > stats_path:None
 | > base:10
 | > hop_length:256
 | > win_length:1024
 > Model fully restored. 
 > Setting up Audio Processor...
 | > sample_rate:16000
 | > resample:False
 | > num_mels:64
 | > log_func:np.log10
 | > min_level_db:-100
 | > frame_shift_ms:None
 | > frame_length_ms:None
 | > ref_level_db:20
 | > fft_size:512


In [11]:
# Init the trainer and 🚀
trainer = Trainer(
    TrainerArgs(restore_path=RESTORE_PATH, skip_train_epoch=SKIP_TRAIN_EPOCH),
    config,
    output_path=OUT_PATH,
    model=model,
    train_samples=train_samples,
    eval_samples=eval_samples,
)

 > Training Environment:
 | > Backend: Torch
 | > Mixed precision: False
 | > Precision: float32
 | > Current device: 0
 | > Num. of GPUs: 1
 | > Num. of CPUs: 6
 | > Num. of Torch Threads: 24
 | > Torch seed: 54321
 | > Torch CUDNN: True
 | > Torch CUDNN deterministic: False
 | > Torch CUDNN benchmark: False
 | > Torch TF32 MatMul: False
 > Start Tensorboard: tensorboard --logdir=/home/titor/Capstone/dubbing-ai/KhongKhunTTS/KhongKhunTTS-TH-VCTK-January-30-2025_11+17AM-31456ff

 > Model has 86580844 parameters


 > `speakers.pth` is saved to /home/titor/Capstone/dubbing-ai/KhongKhunTTS/KhongKhunTTS-TH-VCTK-January-30-2025_11+17AM-31456ff/speakers.pth.
 > `speakers_file` is updated in the config.json.


In [None]:
trainer.fit()


[4m[1m > EPOCH: 0/1000[0m
 --> /home/titor/Capstone/dubbing-ai/KhongKhunTTS/KhongKhunTTS-TH-VCTK-January-30-2025_11+17AM-31456ff




> DataLoader initialization
| > Tokenizer:
	| > add_blank: True
	| > use_eos_bos: False
	| > use_phonemes: False
| > Number of instances : 91553



[1m > TRAINING (2025-01-30 11:17:47) [0m


 | > Preprocessing samples
 | > Max text length: 165
 | > Min text length: 2
 | > Avg text length: 30.981311371555275
 | 
 | > Max audio length: 184127.5
 | > Min audio length: 6368.0
 | > Avg audio length: 39316.12723777484
 | > Num. instances discarded samples: 0
 | > Batch group size: 1536.


Note: you can still call torch.view_as_real on the complex output to recover the old return format. (Triggered internally at /pytorch/aten/src/ATen/native/SpectralOps.cpp:873.)
  return _VF.stft(  # type: ignore[attr-defined]
  with autocast(enabled=False):  # use float32 for the criterion
  with autocast(enabled=False):
  with autocast(enabled=False):  # use float32 for the criterion

[1m   --> TIME: 2025-01-30 11:17:52 -- STEP: 0/2862 -- GLOBAL_STEP: 0[0m
     | > loss_disc: 5.927588939666748  (5.927588939666748)
     | > loss_disc_real_0: 0.9971902966499329  (0.9971902966499329)
     | > loss_disc_real_1: 1.0175321102142334  (1.0175321102142334)
     | > loss_disc_real_2: 0.9739839434623718  (0.9739839434623718)
     | > loss_disc_real_3: 0.9607096910476685  (0.9607096910476685)
     | > loss_disc_real_4: 0.9741401672363281  (0.9741401672363281)
     | > loss_disc_real_5: 1.0032157897949219  (1.0032157897949219)
     | > loss_0: 5.927588939666748  (5.927588939666748)
     | > grad



> DataLoader initialization
| > Tokenizer:
	| > add_blank: True
	| > use_eos_bos: False
	| > use_phonemes: False
| > Number of instances : 256
 | > Preprocessing samples
 | > Max text length: 101
 | > Min text length: 5
 | > Avg text length: 29.74609375
 | 
 | > Max audio length: 100689.0
 | > Min audio length: 8546.5
 | > Avg audio length: 37965.189453125
 | > Num. instances discarded samples: 0
 | > Batch group size: 0.
 | > Synthesizing test sentences.


  test_figures["{}-alignment".format(idx)] = plot_alignment(alignment.T, output_fig=False)

  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.14063266345432826 [0m(+0)
     | > avg_loss_disc: 2.5203724248068675 [0m(+0)
     | > avg_loss_disc_real_0: 0.05398095026612282 [0m(+0)
     | > avg_loss_disc_real_1: 0.21231995522975922 [0m(+0)
     | > avg_loss_disc_real_2: 0.12579065241983958 [0m(+0)
     | > avg_loss_disc_real_3: 0.180086116705622 [0m(+0)
     | > avg_loss_disc_real_4: 0.1304599302155631 [0m(+0)
     | > avg_loss_disc_real_5: 0.12704946526459285 [0m(+0)
     | > avg_loss_0: 2.5203724248068675 [0m(+0)
     | > avg_loss_gen: 1.9085187230791365 [0m(+0)
     | > avg_loss_kl: 2.8735078402927945 [0m(+0)
     | > avg_loss_feat: 5.431185517992292 [0m(+0)
     | > avg_loss_mel: 32.36637142726353 [0m(+0)
     | > avg_loss_duration: 1.6557222264153617 [0m(+0)
     | > avg_loss_1: 44.23530469621931 [0m(+0)

 > BEST MODEL : /home/titor/Capstone/dubbing-ai/KhongKhun

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.13597641672406877 [0m(-0.0046562467302594945)
     | > avg_loss_disc:[92m 2.2806153297424316 [0m(-0.23975709506443588)
     | > avg_loss_disc_real_0:[91m 0.10020471896444048 [0m(+0.046223768698317666)
     | > avg_loss_disc_real_1:[92m 0.15775044475282943 [0m(-0.05456951047692979)
     | > avg_loss_disc_real_2:[91m 0.21550200028078897 [0m(+0.08971134786094939)
     | > avg_loss_disc_real_3:[92m 0.16400142652647837 [0m(-0.01608469017914363)
     | > avg_loss_disc_real_4:[91m 0.19272160742964065 [0m(+0.06226167721407755)
     | > avg_loss_disc_real_5:[91m 0.16686085292271205 [0m(+0.0398113876581192)
     | > avg_loss_0:[92m 2.2806153297424316 [0m(-0.23975709506443588)
     | > avg_loss_gen:[91m 2.4542465209960938 [0m(+0.5457277979169572)
     | > avg_loss_kl:[91m 2.953603914805821 [0m(+0.08009607451302658)
     | > avg_loss_feat:[91m 6.087389196668353 [0m(+0.6562036786760608)
     | > avg_loss_mel:[9

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.1269850049700056 [0m(-0.008991411754063178)
     | > avg_loss_disc:[92m 2.066134912627084 [0m(-0.2144804171153476)
     | > avg_loss_disc_real_0:[92m 0.07449062487908772 [0m(-0.02571409408535276)
     | > avg_loss_disc_real_1:[91m 0.21596457915646688 [0m(+0.05821413440363746)
     | > avg_loss_disc_real_2:[92m 0.19686319359711238 [0m(-0.018638806683676595)
     | > avg_loss_disc_real_3:[91m 0.17774218107972825 [0m(+0.013740754553249884)
     | > avg_loss_disc_real_4:[92m 0.12151243963411876 [0m(-0.07120916779552189)
     | > avg_loss_disc_real_5:[91m 0.18648438368524825 [0m(+0.0196235307625362)
     | > avg_loss_0:[92m 2.066134912627084 [0m(-0.2144804171153476)
     | > avg_loss_gen:[91m 2.930886915751866 [0m(+0.47664039475577225)
     | > avg_loss_kl:[92m 2.5376313073294505 [0m(-0.41597260747637055)
     | > avg_loss_feat:[91m 6.852057184491839 [0m(+0.7646679878234863)
     | > avg_loss_mel:[92m 2

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.12765462057931082 [0m(+0.0006696156093052297)
     | > avg_loss_disc:[91m 2.7575593335287913 [0m(+0.6914244209017073)
     | > avg_loss_disc_real_0:[91m 0.08497355771916253 [0m(+0.010482932840074802)
     | > avg_loss_disc_real_1:[91m 0.24488507211208344 [0m(+0.02892049295561655)
     | > avg_loss_disc_real_2:[91m 0.2377058948789324 [0m(+0.04084270128182002)
     | > avg_loss_disc_real_3:[91m 0.23634248120444162 [0m(+0.05860030012471337)
     | > avg_loss_disc_real_4:[91m 0.12907963458980834 [0m(+0.007567194955689582)
     | > avg_loss_disc_real_5:[92m 0.12729396032435553 [0m(-0.059190423360892724)
     | > avg_loss_0:[91m 2.7575593335287913 [0m(+0.6914244209017073)
     | > avg_loss_gen:[92m 1.925883378301348 [0m(-1.005003537450518)
     | > avg_loss_kl:[91m 3.0355652400425504 [0m(+0.4979339327130998)
     | > avg_loss_feat:[92m 4.992782388414655 [0m(-1.8592747960771838)
     | > avg_loss_mel:[92m

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.14211375372750418 [0m(+0.01445913314819336)
     | > avg_loss_disc:[92m 2.3237832614353726 [0m(-0.43377607209341873)
     | > avg_loss_disc_real_0:[91m 0.10228370555809566 [0m(+0.01731014783893313)
     | > avg_loss_disc_real_1:[92m 0.17972922325134277 [0m(-0.06515584886074066)
     | > avg_loss_disc_real_2:[91m 0.2968323401042393 [0m(+0.05912644522530691)
     | > avg_loss_disc_real_3:[91m 0.29374657784189495 [0m(+0.05740409663745333)
     | > avg_loss_disc_real_4:[91m 0.16748542019299098 [0m(+0.03840578560318264)
     | > avg_loss_disc_real_5:[91m 0.25107116784368244 [0m(+0.12377720751932692)
     | > avg_loss_0:[92m 2.3237832614353726 [0m(-0.43377607209341873)
     | > avg_loss_gen:[91m 2.78622944014413 [0m(+0.8603460618427821)
     | > avg_loss_kl:[92m 2.7980588504246304 [0m(-0.23750638961791992)
     | > avg_loss_feat:[91m 6.148424829755511 [0m(+1.1556424413408557)
     | > avg_loss_mel:[91m 2

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.13271611077444895 [0m(-0.00939764295305523)
     | > avg_loss_disc:[92m 2.3190130506243025 [0m(-0.004770210811070097)
     | > avg_loss_disc_real_0:[91m 0.10868050903081894 [0m(+0.006396803472723284)
     | > avg_loss_disc_real_1:[91m 0.20040626611028398 [0m(+0.020677042858941203)
     | > avg_loss_disc_real_2:[92m 0.20123173296451569 [0m(-0.09560060713972363)
     | > avg_loss_disc_real_3:[92m 0.2333084855760847 [0m(-0.06043809226581026)
     | > avg_loss_disc_real_4:[92m 0.13019210738795145 [0m(-0.03729331280503953)
     | > avg_loss_disc_real_5:[92m 0.24909429677895137 [0m(-0.0019768710647310728)
     | > avg_loss_0:[92m 2.3190130506243025 [0m(-0.004770210811070097)
     | > avg_loss_gen:[92m 2.519167866025652 [0m(-0.2670615741184781)
     | > avg_loss_kl:[91m 2.9203098160879954 [0m(+0.12225096566336502)
     | > avg_loss_feat:[91m 6.210210595812116 [0m(+0.06178576605660524)
     | > avg_loss_mel

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.13492001805986678 [0m(+0.002203907285417833)
     | > avg_loss_disc:[91m 2.487563439777919 [0m(+0.16855038915361664)
     | > avg_loss_disc_real_0:[91m 0.17330550934587205 [0m(+0.06462500031505311)
     | > avg_loss_disc_real_1:[91m 0.2526812106370926 [0m(+0.052274944526808614)
     | > avg_loss_disc_real_2:[91m 0.2416678283895765 [0m(+0.040436095425060825)
     | > avg_loss_disc_real_3:[91m 0.2367860291685377 [0m(+0.003477543592453003)
     | > avg_loss_disc_real_4:[91m 0.21582921275070735 [0m(+0.0856371053627559)
     | > avg_loss_disc_real_5:[91m 0.2830209221158709 [0m(+0.03392662533691951)
     | > avg_loss_0:[91m 2.487563439777919 [0m(+0.16855038915361664)
     | > avg_loss_gen:[91m 3.032541717801775 [0m(+0.513373851776123)
     | > avg_loss_kl:[92m 2.849803958620344 [0m(-0.07050585746765137)
     | > avg_loss_feat:[91m 6.461684090750558 [0m(+0.25147349493844207)
     | > avg_loss_mel:[91m 24.

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.1334101813180106 [0m(-0.0015098367418561742)
     | > avg_loss_disc:[91m 2.614149808883667 [0m(+0.12658636910574783)
     | > avg_loss_disc_real_0:[92m 0.12038307530539376 [0m(-0.05292243404047829)
     | > avg_loss_disc_real_1:[91m 0.3518568277359009 [0m(+0.09917561709880829)
     | > avg_loss_disc_real_2:[91m 0.2970128783157894 [0m(+0.055345049926212864)
     | > avg_loss_disc_real_3:[91m 0.2854977122374943 [0m(+0.048711683068956624)
     | > avg_loss_disc_real_4:[92m 0.18805456374372756 [0m(-0.02777464900697979)
     | > avg_loss_disc_real_5:[92m 0.23983200320175715 [0m(-0.04318891891411372)
     | > avg_loss_0:[91m 2.614149808883667 [0m(+0.12658636910574783)
     | > avg_loss_gen:[92m 2.7348886217389787 [0m(-0.29765309606279633)
     | > avg_loss_kl:[91m 3.0390968322753906 [0m(+0.18929287365504655)
     | > avg_loss_feat:[92m 6.0538284437997 [0m(-0.40785564695085785)
     | > avg_loss_mel:[92m 

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.13505244255065918 [0m(+0.001642261232648573)
     | > avg_loss_disc:[92m 2.5105133397238597 [0m(-0.10363646915980729)
     | > avg_loss_disc_real_0:[91m 0.18020441489560263 [0m(+0.05982133959020887)
     | > avg_loss_disc_real_1:[92m 0.18549705828939164 [0m(-0.16635976944650924)
     | > avg_loss_disc_real_2:[91m 0.29917639068194796 [0m(+0.002163512366158582)
     | > avg_loss_disc_real_3:[91m 0.3377122793878828 [0m(+0.05221456715038847)
     | > avg_loss_disc_real_4:[92m 0.18504830130508967 [0m(-0.0030062624386378856)
     | > avg_loss_disc_real_5:[91m 0.2936673249517168 [0m(+0.05383532174995967)
     | > avg_loss_0:[92m 2.5105133397238597 [0m(-0.10363646915980729)
     | > avg_loss_gen:[91m 3.1360113280160085 [0m(+0.40112270627702973)
     | > avg_loss_kl:[92m 2.8917325905391147 [0m(-0.14736424173627594)
     | > avg_loss_feat:[91m 6.517025062016079 [0m(+0.4631966182163785)
     | > avg_loss_mel:

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.13148484911237443 [0m(-0.0035675934382847496)
     | > avg_loss_disc:[91m 2.6243157046181813 [0m(+0.1138023648943216)
     | > avg_loss_disc_real_0:[92m 0.15898231949125016 [0m(-0.021222095404352465)
     | > avg_loss_disc_real_1:[91m 0.19625979023320334 [0m(+0.010762731943811693)
     | > avg_loss_disc_real_2:[92m 0.2558960701738085 [0m(-0.04328032050813946)
     | > avg_loss_disc_real_3:[92m 0.2643961161375046 [0m(-0.07331616325037821)
     | > avg_loss_disc_real_4:[91m 0.22431160509586334 [0m(+0.03926330379077367)
     | > avg_loss_disc_real_5:[92m 0.2822945841721126 [0m(-0.011372740779604207)
     | > avg_loss_0:[91m 2.6243157046181813 [0m(+0.1138023648943216)
     | > avg_loss_gen:[92m 2.373844793864659 [0m(-0.7621665341513495)
     | > avg_loss_kl:[91m 3.1999363899230957 [0m(+0.308203799383981)
     | > avg_loss_feat:[92m 5.91817079271589 [0m(-0.5988542693001886)
     | > avg_loss_mel:[91m 23

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.13073689596993582 [0m(-0.0007479531424386121)
     | > avg_loss_disc:[92m 2.4665751116616383 [0m(-0.15774059295654297)
     | > avg_loss_disc_real_0:[91m 0.1748517049210412 [0m(+0.01586938542979105)
     | > avg_loss_disc_real_1:[91m 0.2722202262708119 [0m(+0.07596043603760855)
     | > avg_loss_disc_real_2:[92m 0.23363545962742396 [0m(-0.022260610546384535)
     | > avg_loss_disc_real_3:[92m 0.22548708532537734 [0m(-0.03890903081212724)
     | > avg_loss_disc_real_4:[91m 0.23143720413957322 [0m(+0.007125599043709879)
     | > avg_loss_disc_real_5:[92m 0.24404323313917434 [0m(-0.03825135103293828)
     | > avg_loss_0:[92m 2.4665751116616383 [0m(-0.15774059295654297)
     | > avg_loss_gen:[91m 2.6874753066471646 [0m(+0.31363051278250564)
     | > avg_loss_kl:[91m 3.269850288118635 [0m(+0.06991389819553939)
     | > avg_loss_feat:[91m 6.668179375784738 [0m(+0.7500085830688477)
     | > avg_loss_mel:[

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.13398173877171107 [0m(+0.003244842801775255)
     | > avg_loss_disc:[91m 2.888767753328596 [0m(+0.4221926416669577)
     | > avg_loss_disc_real_0:[92m 0.16980585881641932 [0m(-0.005045846104621887)
     | > avg_loss_disc_real_1:[92m 0.2509253919124603 [0m(-0.021294834358351555)
     | > avg_loss_disc_real_2:[91m 0.31676887614386423 [0m(+0.08313341651644027)
     | > avg_loss_disc_real_3:[91m 0.31438539709363666 [0m(+0.08889831176825932)
     | > avg_loss_disc_real_4:[92m 0.2205690039055688 [0m(-0.010868200234004421)
     | > avg_loss_disc_real_5:[91m 0.293995018516268 [0m(+0.04995178537709369)
     | > avg_loss_0:[91m 2.888767753328596 [0m(+0.4221926416669577)
     | > avg_loss_gen:[92m 2.6288486889430454 [0m(-0.058626617704119255)
     | > avg_loss_kl:[92m 3.0715446131569997 [0m(-0.19830567496163543)
     | > avg_loss_feat:[92m 6.080275739942278 [0m(-0.5879036358424594)
     | > avg_loss_mel:[92m 

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.1271869114467076 [0m(-0.006794827325003472)
     | > avg_loss_disc:[92m 2.641646044594901 [0m(-0.2471217087336952)
     | > avg_loss_disc_real_0:[91m 0.24406007358006068 [0m(+0.07425421476364136)
     | > avg_loss_disc_real_1:[92m 0.18247543701103755 [0m(-0.06844995490142278)
     | > avg_loss_disc_real_2:[92m 0.31206064564841135 [0m(-0.004708230495452881)
     | > avg_loss_disc_real_3:[92m 0.26465937920979093 [0m(-0.04972601788384573)
     | > avg_loss_disc_real_4:[91m 0.2911596042769296 [0m(+0.0705906003713608)
     | > avg_loss_disc_real_5:[92m 0.24341799531664168 [0m(-0.05057702319962634)
     | > avg_loss_0:[92m 2.641646044594901 [0m(-0.2471217087336952)
     | > avg_loss_gen:[92m 2.617771795817784 [0m(-0.011076893125261389)
     | > avg_loss_kl:[92m 3.041128192629133 [0m(-0.030416420527866528)
     | > avg_loss_feat:[91m 6.346587112971714 [0m(+0.26631137302943575)
     | > avg_loss_mel:[92m 2

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.1302635669708252 [0m(+0.003076655524117594)
     | > avg_loss_disc:[91m 2.692521095275879 [0m(+0.050875050680978084)
     | > avg_loss_disc_real_0:[92m 0.15773083695343562 [0m(-0.08632923662662506)
     | > avg_loss_disc_real_1:[91m 0.2558035616363798 [0m(+0.07332812462534224)
     | > avg_loss_disc_real_2:[92m 0.30806434580257963 [0m(-0.003996299845831719)
     | > avg_loss_disc_real_3:[91m 0.29887473583221436 [0m(+0.03421535662242342)
     | > avg_loss_disc_real_4:[92m 0.2786478336368288 [0m(-0.012511770640100783)
     | > avg_loss_disc_real_5:[92m 0.2403206080198288 [0m(-0.003097387296812887)
     | > avg_loss_0:[91m 2.692521095275879 [0m(+0.050875050680978084)
     | > avg_loss_gen:[92m 2.535034213747297 [0m(-0.08273758207048676)
     | > avg_loss_kl:[91m 3.045203379222325 [0m(+0.004075186593191837)
     | > avg_loss_feat:[92m 6.124418531145368 [0m(-0.22216858182634613)
     | > avg_loss_mel:[9

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.14276323999677384 [0m(+0.012499673025948649)
     | > avg_loss_disc:[92m 2.5061142785208568 [0m(-0.18640681675502213)
     | > avg_loss_disc_real_0:[92m 0.1442760910306658 [0m(-0.013454745922769823)
     | > avg_loss_disc_real_1:[92m 0.23244055041245051 [0m(-0.02336301122392928)
     | > avg_loss_disc_real_2:[92m 0.23045780403273447 [0m(-0.07760654176984516)
     | > avg_loss_disc_real_3:[92m 0.2875706468309675 [0m(-0.011304089001246853)
     | > avg_loss_disc_real_4:[92m 0.15542784546102797 [0m(-0.12321998817580085)
     | > avg_loss_disc_real_5:[91m 0.2429791178022112 [0m(+0.002658509782382412)
     | > avg_loss_0:[92m 2.5061142785208568 [0m(-0.18640681675502213)
     | > avg_loss_gen:[92m 2.3259944575173512 [0m(-0.20903975622994597)
     | > avg_loss_kl:[91m 3.2179275240216936 [0m(+0.1727241447993686)
     | > avg_loss_feat:[91m 6.312818390982492 [0m(+0.1883998598371237)
     | > avg_loss_mel:[9

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.1358778817313058 [0m(-0.0068853582654680445)
     | > avg_loss_disc:[91m 2.652417080742972 [0m(+0.14630280222211534)
     | > avg_loss_disc_real_0:[91m 0.2853326222726277 [0m(+0.14105653124196188)
     | > avg_loss_disc_real_1:[91m 0.25365609782082693 [0m(+0.021215547408376417)
     | > avg_loss_disc_real_2:[91m 0.3265384691102164 [0m(+0.09608066507748192)
     | > avg_loss_disc_real_3:[91m 0.2962547881262643 [0m(+0.008684141295296821)
     | > avg_loss_disc_real_4:[91m 0.27490025971617016 [0m(+0.11947241425514218)
     | > avg_loss_disc_real_5:[92m 0.2381617192711149 [0m(-0.004817398531096306)
     | > avg_loss_0:[91m 2.652417080742972 [0m(+0.14630280222211534)
     | > avg_loss_gen:[91m 2.9727684429713657 [0m(+0.6467739854540144)
     | > avg_loss_kl:[92m 3.1909752573285783 [0m(-0.026952266693115234)
     | > avg_loss_feat:[91m 6.584563119070871 [0m(+0.2717447280883789)
     | > avg_loss_mel:[91m

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.14033034869602748 [0m(+0.00445246696472168)
     | > avg_loss_disc:[92m 2.6419238362993513 [0m(-0.010493244443620764)
     | > avg_loss_disc_real_0:[92m 0.10744277387857437 [0m(-0.1778898483940533)
     | > avg_loss_disc_real_1:[92m 0.2532898506947926 [0m(-0.00036624712603433585)
     | > avg_loss_disc_real_2:[92m 0.28850013869149344 [0m(-0.038038330418722954)
     | > avg_loss_disc_real_3:[92m 0.265205728156226 [0m(-0.031049059970038317)
     | > avg_loss_disc_real_4:[92m 0.2440787489925112 [0m(-0.030821510723658962)
     | > avg_loss_disc_real_5:[91m 0.2747383841446468 [0m(+0.036576664873531894)
     | > avg_loss_0:[92m 2.6419238362993513 [0m(-0.010493244443620764)
     | > avg_loss_gen:[92m 2.397883040564401 [0m(-0.5748854024069647)
     | > avg_loss_kl:[92m 3.0373901980263844 [0m(-0.1535850593021939)
     | > avg_loss_feat:[92m 6.357618944985526 [0m(-0.22694417408534484)
     | > avg_loss_mel:[

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.12812416894095285 [0m(-0.012206179755074625)
     | > avg_loss_disc:[91m 2.7181668281555176 [0m(+0.07624299185616623)
     | > avg_loss_disc_real_0:[91m 0.18584064074925014 [0m(+0.07839786687067576)
     | > avg_loss_disc_real_1:[92m 0.24170053218092238 [0m(-0.011589318513870212)
     | > avg_loss_disc_real_2:[91m 0.3281934176172529 [0m(+0.03969327892575947)
     | > avg_loss_disc_real_3:[91m 0.2835459198270525 [0m(+0.01834019167082651)
     | > avg_loss_disc_real_4:[91m 0.26049610333783285 [0m(+0.016417354345321655)
     | > avg_loss_disc_real_5:[92m 0.27366745684828075 [0m(-0.0010709272963660421)
     | > avg_loss_0:[91m 2.7181668281555176 [0m(+0.07624299185616623)
     | > avg_loss_gen:[91m 2.711683920451573 [0m(+0.3138008798871721)
     | > avg_loss_kl:[91m 3.243825742176601 [0m(+0.20643554415021637)
     | > avg_loss_feat:[91m 6.367231845855713 [0m(+0.009612900870187069)
     | > avg_loss_mel:

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.1764871052333287 [0m(+0.04836293629237584)
     | > avg_loss_disc:[92m 2.612050873892648 [0m(-0.10611595426286957)
     | > avg_loss_disc_real_0:[91m 0.22899827999728067 [0m(+0.04315763924803054)
     | > avg_loss_disc_real_1:[91m 0.34822078687804087 [0m(+0.10652025469711848)
     | > avg_loss_disc_real_2:[92m 0.26411922914641245 [0m(-0.06407418847084045)
     | > avg_loss_disc_real_3:[92m 0.261448472738266 [0m(-0.022097447088786526)
     | > avg_loss_disc_real_4:[91m 0.2855605866227831 [0m(+0.025064483284950256)
     | > avg_loss_disc_real_5:[92m 0.2173307899917875 [0m(-0.056336666856493245)
     | > avg_loss_0:[92m 2.612050873892648 [0m(-0.10611595426286957)
     | > avg_loss_gen:[91m 2.918531928743635 [0m(+0.20684800829206207)
     | > avg_loss_kl:[92m 3.0379106317247664 [0m(-0.20591511045183442)
     | > avg_loss_feat:[91m 6.871746471949986 [0m(+0.5045146260942728)
     | > avg_loss_mel:[92m 21

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.1596684796469552 [0m(-0.01681862558637348)
     | > avg_loss_disc:[91m 2.653525250298636 [0m(+0.04147437640598817)
     | > avg_loss_disc_real_0:[92m 0.11342521437576839 [0m(-0.11557306562151229)
     | > avg_loss_disc_real_1:[92m 0.3177406532423837 [0m(-0.03048013363565716)
     | > avg_loss_disc_real_2:[91m 0.31773024797439575 [0m(+0.0536110188279833)
     | > avg_loss_disc_real_3:[91m 0.2863197369234903 [0m(+0.024871264185224284)
     | > avg_loss_disc_real_4:[92m 0.2187829613685608 [0m(-0.06677762525422232)
     | > avg_loss_disc_real_5:[91m 0.26160457517419544 [0m(+0.04427378518240793)
     | > avg_loss_0:[91m 2.653525250298636 [0m(+0.04147437640598817)
     | > avg_loss_gen:[92m 2.5457182271139964 [0m(-0.37281370162963867)
     | > avg_loss_kl:[91m 3.097487654004778 [0m(+0.059577022280011604)
     | > avg_loss_feat:[92m 6.378389290400913 [0m(-0.49335718154907227)
     | > avg_loss_mel:[92m 20

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.12876370974949428 [0m(-0.030904769897460938)
     | > avg_loss_disc:[91m 2.8251101289476668 [0m(+0.1715848786490306)
     | > avg_loss_disc_real_0:[91m 0.2942438210759844 [0m(+0.18081860670021602)
     | > avg_loss_disc_real_1:[92m 0.2533615401812962 [0m(-0.06437911306108751)
     | > avg_loss_disc_real_2:[91m 0.33386428015572683 [0m(+0.01613403218133108)
     | > avg_loss_disc_real_3:[91m 0.30608077134404865 [0m(+0.019761034420558377)
     | > avg_loss_disc_real_4:[91m 0.28061490825244356 [0m(+0.06183194688388277)
     | > avg_loss_disc_real_5:[92m 0.23422473669052124 [0m(-0.0273798384836742)
     | > avg_loss_0:[91m 2.8251101289476668 [0m(+0.1715848786490306)
     | > avg_loss_gen:[91m 2.923511028289795 [0m(+0.3777928011757985)
     | > avg_loss_kl:[92m 3.096893787384033 [0m(-0.0005938666207447874)
     | > avg_loss_feat:[91m 6.557589530944824 [0m(+0.17920024054391082)
     | > avg_loss_mel:[91m 

In [15]:
# weighted_sampler_multipliers={}
# weighted_sampler_multipliers.get(0, None)

In [None]:
# from dataclasses import dataclass, field
# weighted_sampler_multipliers: dict = field(default_factory=lambda: {})

# print(weighted_sampler_multipliers)

# weighted_sampler_multipliers.get(0, None)

In [None]:
# @dataclass
# class MyClass:
#     weighted_sampler_multipliers: dict = field(default_factory=lambda: {})

# obj = MyClass()
# print(obj.weighted_sampler_multipliers)