In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!pip install jiwer



In [None]:
import torch
from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline

MODEL_PATH = "/content/drive/MyDrive/train_model/small/Whisper"

# Load model và processor từ thư mục đã lưu
processor = AutoProcessor.from_pretrained(MODEL_PATH)
model = AutoModelForSpeechSeq2Seq.from_pretrained(MODEL_PATH).to("cuda:0").half()  # Dùng float16 để tối ưu GPU

# Tạo pipeline
PIPE = pipeline(
    task="automatic-speech-recognition",
    model=model,
    tokenizer=processor.tokenizer,
    feature_extractor=processor.feature_extractor,
    device="cuda:0",
    torch_dtype=torch.float16
)

PIPE_KWARGS = {"language": "vi", "task": "transcribe"}

Device set to use cuda:0


In [None]:
import os
import torchaudio

DATASET_PATH = "/content/drive/MyDrive/train_model/Data_Goc"

def load_local_dataset(dataset_path):
    dataset = []
    for file in os.listdir(dataset_path):
        if file.endswith(".wav"):
            audio_path = os.path.join(dataset_path, file)
            text_path = audio_path.replace(".wav", ".txt")

            if os.path.exists(text_path):
                waveform, sample_rate = torchaudio.load(audio_path)
                with open(text_path, "r", encoding="utf-8") as f:
                    transcript = f.read().strip()

                dataset.append({"audio": waveform, "sentence": transcript})

    return dataset

my_data = load_local_dataset(DATASET_PATH)
print(f"Loaded {len(my_data)} samples.")

Loaded 2000 samples.


In [None]:
from tqdm import tqdm
import numpy as np
import jiwer

@torch.inference_mode()
def predict(data):
    y_pred = []
    y_true = []

    for sample in tqdm(data, desc="Processing audio"):
        audio_tensor = sample["audio"]
        print(audio_tensor.shape)
        output = PIPE(audio_tensor.squeeze(0).cpu().numpy(), generate_kwargs={**PIPE_KWARGS, "return_timestamps": True})

        y_pred.append(output["text"])
        y_true.append(sample["sentence"])

    return y_true, y_pred

y_true, y_pred = predict(my_data)

# Tính WER

JIWER_TRANS = jiwer.Compose([
    jiwer.ToLowerCase(),
    jiwer.RemoveKaldiNonWords(),
    jiwer.RemoveMultipleSpaces(),
    jiwer.Strip(),
    jiwer.RemovePunctuation(),
    jiwer.ReduceToListOfListOfWords(),
])

wer = 100 * jiwer.wer(
    reference=y_true,
    hypothesis=y_pred,
    reference_transform=JIWER_TRANS,
    hypothesis_transform=JIWER_TRANS,
)

print(f"WER on custom dataset = {wer:.1f}%")

Processing audio:   0%|          | 0/2000 [00:00<?, ?it/s]

torch.Size([1, 74400])


Processing audio:   0%|          | 1/2000 [00:00<17:02,  1.96it/s]

torch.Size([1, 309440])


Processing audio:   0%|          | 2/2000 [00:01<32:56,  1.01it/s]

torch.Size([1, 72160])


Processing audio:   0%|          | 3/2000 [00:02<23:53,  1.39it/s]

torch.Size([1, 275680])


Processing audio:   0%|          | 4/2000 [00:04<43:07,  1.30s/it]

torch.Size([1, 78880])


Processing audio:   0%|          | 5/2000 [00:04<33:35,  1.01s/it]

torch.Size([1, 357600])


Processing audio:   0%|          | 6/2000 [00:06<42:31,  1.28s/it]

torch.Size([1, 101280])


Processing audio:   0%|          | 7/2000 [00:07<34:36,  1.04s/it]

torch.Size([1, 30720])


Processing audio:   0%|          | 9/2000 [00:07<19:04,  1.74it/s]

torch.Size([1, 21440])
torch.Size([1, 211680])


Processing audio:   0%|          | 10/2000 [00:08<24:06,  1.38it/s]

torch.Size([1, 122240])


Processing audio:   1%|          | 11/2000 [00:09<23:19,  1.42it/s]

torch.Size([1, 51680])


Processing audio:   1%|          | 12/2000 [00:09<20:16,  1.63it/s]

torch.Size([1, 76160])


Processing audio:   1%|          | 13/2000 [00:09<16:14,  2.04it/s]

torch.Size([1, 64000])


Processing audio:   1%|          | 14/2000 [00:10<14:36,  2.27it/s]

torch.Size([1, 790240])


Processing audio:   1%|          | 15/2000 [00:14<50:12,  1.52s/it]

torch.Size([1, 96160])


Processing audio:   1%|          | 16/2000 [00:14<40:25,  1.22s/it]

torch.Size([1, 140480])


Processing audio:   1%|          | 17/2000 [00:15<34:51,  1.05s/it]

torch.Size([1, 74080])


Processing audio:   1%|          | 18/2000 [00:15<27:55,  1.18it/s]

torch.Size([1, 350560])


Processing audio:   1%|          | 19/2000 [00:18<41:58,  1.27s/it]

torch.Size([1, 207680])


Processing audio:   1%|          | 20/2000 [00:19<43:58,  1.33s/it]

torch.Size([1, 110080])


Processing audio:   1%|          | 21/2000 [00:20<36:17,  1.10s/it]

torch.Size([1, 204640])


Processing audio:   1%|          | 22/2000 [00:21<34:07,  1.03s/it]

torch.Size([1, 284320])


Processing audio:   1%|          | 23/2000 [00:22<35:41,  1.08s/it]

torch.Size([1, 72640])


Processing audio:   1%|          | 24/2000 [00:22<27:20,  1.20it/s]

torch.Size([1, 414400])


Processing audio:   1%|▏         | 25/2000 [00:24<42:10,  1.28s/it]

torch.Size([1, 114720])


Processing audio:   1%|▏         | 27/2000 [00:25<26:59,  1.22it/s]

torch.Size([1, 30080])
torch.Size([1, 140640])


Processing audio:   1%|▏         | 28/2000 [00:26<26:44,  1.23it/s]

torch.Size([1, 36480])


Processing audio:   1%|▏         | 29/2000 [00:26<20:56,  1.57it/s]

torch.Size([1, 195200])


Processing audio:   2%|▏         | 30/2000 [00:27<22:22,  1.47it/s]

torch.Size([1, 191360])


Processing audio:   2%|▏         | 31/2000 [00:28<22:19,  1.47it/s]

torch.Size([1, 66400])


Processing audio:   2%|▏         | 33/2000 [00:28<14:49,  2.21it/s]

torch.Size([1, 27200])
torch.Size([1, 153600])


Processing audio:   2%|▏         | 34/2000 [00:29<16:15,  2.01it/s]

torch.Size([1, 65920])


Processing audio:   2%|▏         | 35/2000 [00:29<16:32,  1.98it/s]

torch.Size([1, 125600])


Processing audio:   2%|▏         | 36/2000 [00:30<19:48,  1.65it/s]

torch.Size([1, 110880])


Processing audio:   2%|▏         | 37/2000 [00:31<21:00,  1.56it/s]

torch.Size([1, 62880])


Processing audio:   2%|▏         | 38/2000 [00:31<18:30,  1.77it/s]

torch.Size([1, 85280])


Processing audio:   2%|▏         | 39/2000 [00:32<18:55,  1.73it/s]

torch.Size([1, 68640])


Processing audio:   2%|▏         | 40/2000 [00:32<16:06,  2.03it/s]

torch.Size([1, 277440])


Processing audio:   2%|▏         | 41/2000 [00:33<23:57,  1.36it/s]

torch.Size([1, 102560])


Processing audio:   2%|▏         | 43/2000 [00:34<16:17,  2.00it/s]

torch.Size([1, 19840])
torch.Size([1, 102560])


Processing audio:   2%|▏         | 44/2000 [00:35<16:54,  1.93it/s]

torch.Size([1, 273760])


Processing audio:   2%|▏         | 45/2000 [00:36<24:55,  1.31it/s]

torch.Size([1, 116640])


Processing audio:   2%|▏         | 46/2000 [00:37<24:05,  1.35it/s]

torch.Size([1, 25120])


Processing audio:   2%|▏         | 47/2000 [00:37<19:06,  1.70it/s]

torch.Size([1, 52960])


Processing audio:   2%|▏         | 48/2000 [00:37<17:07,  1.90it/s]

torch.Size([1, 242720])


Processing audio:   2%|▏         | 49/2000 [00:38<23:51,  1.36it/s]

torch.Size([1, 210560])


Processing audio:   2%|▎         | 50/2000 [00:39<26:35,  1.22it/s]

torch.Size([1, 138880])


Processing audio:   3%|▎         | 51/2000 [00:40<25:27,  1.28it/s]

torch.Size([1, 113600])


Processing audio:   3%|▎         | 52/2000 [00:41<23:15,  1.40it/s]

torch.Size([1, 213280])


Processing audio:   3%|▎         | 53/2000 [00:42<25:28,  1.27it/s]

torch.Size([1, 497440])


Processing audio:   3%|▎         | 55/2000 [00:45<34:00,  1.05s/it]

torch.Size([1, 17280])
torch.Size([1, 110880])


Processing audio:   3%|▎         | 56/2000 [00:45<29:38,  1.09it/s]

torch.Size([1, 178400])


Processing audio:   3%|▎         | 57/2000 [00:46<30:16,  1.07it/s]

torch.Size([1, 397280])


Processing audio:   3%|▎         | 58/2000 [00:48<38:48,  1.20s/it]

torch.Size([1, 112800])


Processing audio:   3%|▎         | 59/2000 [00:49<32:18,  1.00it/s]

torch.Size([1, 102560])


Processing audio:   3%|▎         | 60/2000 [00:49<28:18,  1.14it/s]

torch.Size([1, 379840])


Processing audio:   3%|▎         | 61/2000 [00:51<37:43,  1.17s/it]

torch.Size([1, 156160])


Processing audio:   3%|▎         | 62/2000 [00:52<35:08,  1.09s/it]

torch.Size([1, 211520])


Processing audio:   3%|▎         | 63/2000 [00:53<32:17,  1.00s/it]

torch.Size([1, 109120])


Processing audio:   3%|▎         | 64/2000 [00:53<27:07,  1.19it/s]

torch.Size([1, 108960])


Processing audio:   3%|▎         | 65/2000 [00:54<24:35,  1.31it/s]

torch.Size([1, 108320])
torch.Size([1, 121440])


Processing audio:   3%|▎         | 67/2000 [00:55<18:48,  1.71it/s]

torch.Size([1, 377760])


Processing audio:   3%|▎         | 69/2000 [00:57<25:30,  1.26it/s]

torch.Size([1, 16320])
torch.Size([1, 111040])


Processing audio:   4%|▎         | 70/2000 [00:58<24:11,  1.33it/s]

torch.Size([1, 284480])


Processing audio:   4%|▎         | 71/2000 [00:59<28:53,  1.11it/s]

torch.Size([1, 122720])


Processing audio:   4%|▎         | 72/2000 [01:00<26:48,  1.20it/s]

torch.Size([1, 127360])


Processing audio:   4%|▎         | 73/2000 [01:00<23:46,  1.35it/s]

torch.Size([1, 104160])


Processing audio:   4%|▎         | 74/2000 [01:01<22:20,  1.44it/s]

torch.Size([1, 80480])


Processing audio:   4%|▍         | 75/2000 [01:01<20:29,  1.57it/s]

torch.Size([1, 240320])


Processing audio:   4%|▍         | 76/2000 [01:03<27:24,  1.17it/s]

torch.Size([1, 70720])


Processing audio:   4%|▍         | 77/2000 [01:03<22:52,  1.40it/s]

torch.Size([1, 60000])


Processing audio:   4%|▍         | 78/2000 [01:03<19:51,  1.61it/s]

torch.Size([1, 234240])


Processing audio:   4%|▍         | 79/2000 [01:05<24:27,  1.31it/s]

torch.Size([1, 100800])


Processing audio:   4%|▍         | 80/2000 [01:05<20:55,  1.53it/s]

torch.Size([1, 98240])


Processing audio:   4%|▍         | 81/2000 [01:05<19:37,  1.63it/s]

torch.Size([1, 164160])


Processing audio:   4%|▍         | 82/2000 [01:06<20:47,  1.54it/s]

torch.Size([1, 145440])


Processing audio:   4%|▍         | 83/2000 [01:07<22:17,  1.43it/s]

torch.Size([1, 10080])
torch.Size([1, 173120])


Processing audio:   4%|▍         | 85/2000 [01:08<19:41,  1.62it/s]

torch.Size([1, 125280])


Processing audio:   4%|▍         | 86/2000 [01:09<21:54,  1.46it/s]

torch.Size([1, 221120])


Processing audio:   4%|▍         | 87/2000 [01:10<27:35,  1.16it/s]

torch.Size([1, 81600])


Processing audio:   4%|▍         | 88/2000 [01:11<25:06,  1.27it/s]

torch.Size([1, 137920])


Processing audio:   4%|▍         | 89/2000 [01:12<25:01,  1.27it/s]

torch.Size([1, 54400])


Processing audio:   4%|▍         | 90/2000 [01:12<21:29,  1.48it/s]

torch.Size([1, 128320])


Processing audio:   5%|▍         | 91/2000 [01:13<19:35,  1.62it/s]

torch.Size([1, 257760])


Processing audio:   5%|▍         | 92/2000 [01:14<23:52,  1.33it/s]

torch.Size([1, 93760])


Processing audio:   5%|▍         | 94/2000 [01:14<16:27,  1.93it/s]

torch.Size([1, 28000])
torch.Size([1, 182240])


Processing audio:   5%|▍         | 96/2000 [01:15<15:12,  2.09it/s]

torch.Size([1, 37600])
torch.Size([1, 596320])


Processing audio:   5%|▍         | 97/2000 [01:19<41:29,  1.31s/it]

torch.Size([1, 113440])


Processing audio:   5%|▍         | 98/2000 [01:19<34:34,  1.09s/it]

torch.Size([1, 128480])


Processing audio:   5%|▍         | 99/2000 [01:20<30:44,  1.03it/s]

torch.Size([1, 74560])


Processing audio:   5%|▌         | 100/2000 [01:20<25:02,  1.26it/s]

torch.Size([1, 208640])


Processing audio:   5%|▌         | 101/2000 [01:21<25:27,  1.24it/s]

torch.Size([1, 66080])


Processing audio:   5%|▌         | 102/2000 [01:21<22:11,  1.43it/s]

torch.Size([1, 122560])


Processing audio:   5%|▌         | 103/2000 [01:23<25:31,  1.24it/s]

torch.Size([1, 101920])


Processing audio:   5%|▌         | 104/2000 [01:23<24:44,  1.28it/s]

torch.Size([1, 124480])


Processing audio:   5%|▌         | 106/2000 [01:24<18:38,  1.69it/s]

torch.Size([1, 41920])
torch.Size([1, 157120])


Processing audio:   5%|▌         | 107/2000 [01:25<17:06,  1.84it/s]

torch.Size([1, 415520])


Processing audio:   5%|▌         | 108/2000 [01:26<27:49,  1.13it/s]

torch.Size([1, 80000])


Processing audio:   5%|▌         | 109/2000 [01:27<22:29,  1.40it/s]

torch.Size([1, 113120])


Processing audio:   6%|▌         | 110/2000 [01:27<21:06,  1.49it/s]

torch.Size([1, 163040])


Processing audio:   6%|▌         | 111/2000 [01:28<21:38,  1.45it/s]

torch.Size([1, 335520])


Processing audio:   6%|▌         | 112/2000 [01:29<29:38,  1.06it/s]

torch.Size([1, 80000])


Processing audio:   6%|▌         | 113/2000 [01:30<24:55,  1.26it/s]

torch.Size([1, 608800])


Processing audio:   6%|▌         | 114/2000 [01:33<44:24,  1.41s/it]

torch.Size([1, 211840])


Processing audio:   6%|▌         | 115/2000 [01:34<42:59,  1.37s/it]

torch.Size([1, 173280])


Processing audio:   6%|▌         | 116/2000 [01:35<43:33,  1.39s/it]

torch.Size([1, 249600])


Processing audio:   6%|▌         | 117/2000 [01:37<44:29,  1.42s/it]

torch.Size([1, 312320])


Processing audio:   6%|▌         | 118/2000 [01:38<44:19,  1.41s/it]

torch.Size([1, 435200])


Processing audio:   6%|▌         | 119/2000 [01:40<47:56,  1.53s/it]

torch.Size([1, 76480])


Processing audio:   6%|▌         | 120/2000 [01:41<37:45,  1.21s/it]

torch.Size([1, 113280])


Processing audio:   6%|▌         | 121/2000 [01:41<30:30,  1.03it/s]

torch.Size([1, 61280])


Processing audio:   6%|▌         | 122/2000 [01:41<25:20,  1.24it/s]

torch.Size([1, 25760])


Processing audio:   6%|▌         | 123/2000 [01:42<19:50,  1.58it/s]

torch.Size([1, 97440])


Processing audio:   6%|▌         | 124/2000 [01:42<18:35,  1.68it/s]

torch.Size([1, 187360])


Processing audio:   6%|▋         | 125/2000 [01:43<21:11,  1.47it/s]

torch.Size([1, 163840])


Processing audio:   6%|▋         | 126/2000 [01:44<23:22,  1.34it/s]

torch.Size([1, 94080])


Processing audio:   6%|▋         | 127/2000 [01:44<21:26,  1.46it/s]

torch.Size([1, 137920])


Processing audio:   6%|▋         | 128/2000 [01:45<20:34,  1.52it/s]

torch.Size([1, 223360])


Processing audio:   6%|▋         | 130/2000 [01:46<18:35,  1.68it/s]

torch.Size([1, 28320])
torch.Size([1, 200160])


Processing audio:   7%|▋         | 131/2000 [01:48<24:49,  1.26it/s]

torch.Size([1, 96160])


Processing audio:   7%|▋         | 132/2000 [01:48<23:07,  1.35it/s]

torch.Size([1, 515680])


Processing audio:   7%|▋         | 133/2000 [01:52<52:32,  1.69s/it]

torch.Size([1, 79680])


Processing audio:   7%|▋         | 134/2000 [01:53<41:17,  1.33s/it]

torch.Size([1, 94400])


Processing audio:   7%|▋         | 135/2000 [01:53<32:45,  1.05s/it]

torch.Size([1, 65120])


Processing audio:   7%|▋         | 137/2000 [01:53<19:27,  1.60it/s]

torch.Size([1, 54720])
torch.Size([1, 138560])


Processing audio:   7%|▋         | 138/2000 [01:54<19:32,  1.59it/s]

torch.Size([1, 81600])


Processing audio:   7%|▋         | 139/2000 [01:54<16:54,  1.83it/s]

torch.Size([1, 94880])


Processing audio:   7%|▋         | 140/2000 [01:55<17:48,  1.74it/s]

torch.Size([1, 158080])


Processing audio:   7%|▋         | 141/2000 [01:56<19:15,  1.61it/s]

torch.Size([1, 201120])


Processing audio:   7%|▋         | 142/2000 [01:57<21:19,  1.45it/s]

torch.Size([1, 120160])


Processing audio:   7%|▋         | 143/2000 [01:57<20:49,  1.49it/s]

torch.Size([1, 188000])


Processing audio:   7%|▋         | 144/2000 [01:58<21:42,  1.42it/s]

torch.Size([1, 499040])


Processing audio:   7%|▋         | 145/2000 [02:01<41:06,  1.33s/it]

torch.Size([1, 472320])


Processing audio:   7%|▋         | 146/2000 [02:04<55:35,  1.80s/it]

torch.Size([1, 514240])


Processing audio:   7%|▋         | 147/2000 [02:06<1:04:10,  2.08s/it]

torch.Size([1, 44000])


Processing audio:   7%|▋         | 149/2000 [02:07<34:33,  1.12s/it]

torch.Size([1, 48000])
torch.Size([1, 141120])


Processing audio:   8%|▊         | 150/2000 [02:07<29:24,  1.05it/s]

torch.Size([1, 251360])


Processing audio:   8%|▊         | 151/2000 [02:08<29:46,  1.03it/s]

torch.Size([1, 116160])


Processing audio:   8%|▊         | 152/2000 [02:09<26:06,  1.18it/s]

torch.Size([1, 81280])


Processing audio:   8%|▊         | 153/2000 [02:09<22:44,  1.35it/s]

torch.Size([1, 103360])


Processing audio:   8%|▊         | 154/2000 [02:10<21:58,  1.40it/s]

torch.Size([1, 168480])


Processing audio:   8%|▊         | 155/2000 [02:11<22:35,  1.36it/s]

torch.Size([1, 84000])


Processing audio:   8%|▊         | 156/2000 [02:11<18:12,  1.69it/s]

torch.Size([1, 96960])


Processing audio:   8%|▊         | 157/2000 [02:12<17:54,  1.71it/s]

torch.Size([1, 314240])


Processing audio:   8%|▊         | 158/2000 [02:14<31:55,  1.04s/it]

torch.Size([1, 87680])


Processing audio:   8%|▊         | 159/2000 [02:15<29:51,  1.03it/s]

torch.Size([1, 287200])


Processing audio:   8%|▊         | 160/2000 [02:16<37:38,  1.23s/it]

torch.Size([1, 124000])


Processing audio:   8%|▊         | 161/2000 [02:17<31:58,  1.04s/it]

torch.Size([1, 325280])


Processing audio:   8%|▊         | 162/2000 [02:19<38:02,  1.24s/it]

torch.Size([1, 135040])


Processing audio:   8%|▊         | 163/2000 [02:20<33:18,  1.09s/it]

torch.Size([1, 156640])


Processing audio:   8%|▊         | 164/2000 [02:20<30:18,  1.01it/s]

torch.Size([1, 84640])


Processing audio:   8%|▊         | 165/2000 [02:21<24:57,  1.23it/s]

torch.Size([1, 306080])


Processing audio:   8%|▊         | 166/2000 [02:22<29:24,  1.04it/s]

torch.Size([1, 130880])


Processing audio:   8%|▊         | 167/2000 [02:23<26:43,  1.14it/s]

torch.Size([1, 439360])


Processing audio:   8%|▊         | 168/2000 [02:25<37:48,  1.24s/it]

torch.Size([1, 25120])


Processing audio:   8%|▊         | 169/2000 [02:25<28:21,  1.08it/s]

torch.Size([1, 97280])


Processing audio:   8%|▊         | 170/2000 [02:25<24:40,  1.24it/s]

torch.Size([1, 113760])


Processing audio:   9%|▊         | 171/2000 [02:26<23:29,  1.30it/s]

torch.Size([1, 138720])


Processing audio:   9%|▊         | 172/2000 [02:27<25:30,  1.19it/s]

torch.Size([1, 129280])


Processing audio:   9%|▊         | 173/2000 [02:28<26:43,  1.14it/s]

torch.Size([1, 168480])


Processing audio:   9%|▊         | 174/2000 [02:29<28:15,  1.08it/s]

torch.Size([1, 424640])


Processing audio:   9%|▉         | 176/2000 [02:31<28:00,  1.09it/s]

torch.Size([1, 19840])
torch.Size([1, 141440])


Processing audio:   9%|▉         | 177/2000 [02:32<24:35,  1.24it/s]

torch.Size([1, 322080])


Processing audio:   9%|▉         | 178/2000 [02:33<29:21,  1.03it/s]

torch.Size([1, 176320])


Processing audio:   9%|▉         | 179/2000 [02:34<28:18,  1.07it/s]

torch.Size([1, 145600])


Processing audio:   9%|▉         | 180/2000 [02:35<26:09,  1.16it/s]

torch.Size([1, 721120])


Processing audio:   9%|▉         | 181/2000 [02:42<1:25:23,  2.82s/it]

torch.Size([1, 39040])


Processing audio:   9%|▉         | 182/2000 [02:42<1:02:35,  2.07s/it]

torch.Size([1, 512000])


Processing audio:   9%|▉         | 183/2000 [02:45<1:07:05,  2.22s/it]

torch.Size([1, 506720])


Processing audio:   9%|▉         | 184/2000 [02:48<1:12:18,  2.39s/it]

torch.Size([1, 418880])


Processing audio:   9%|▉         | 185/2000 [02:50<1:10:11,  2.32s/it]

torch.Size([1, 182400])


Processing audio:   9%|▉         | 186/2000 [02:51<57:47,  1.91s/it]  

torch.Size([1, 156000])


Processing audio:   9%|▉         | 187/2000 [02:52<48:25,  1.60s/it]

torch.Size([1, 287680])


Processing audio:   9%|▉         | 188/2000 [02:53<46:33,  1.54s/it]

torch.Size([1, 264320])


Processing audio:   9%|▉         | 189/2000 [02:55<46:57,  1.56s/it]

torch.Size([1, 115520])


Processing audio:  10%|▉         | 190/2000 [02:55<38:07,  1.26s/it]

torch.Size([1, 200640])


Processing audio:  10%|▉         | 191/2000 [02:56<35:31,  1.18s/it]

torch.Size([1, 126400])


Processing audio:  10%|▉         | 192/2000 [02:57<31:00,  1.03s/it]

torch.Size([1, 117920])


Processing audio:  10%|▉         | 193/2000 [02:58<26:29,  1.14it/s]

torch.Size([1, 36000])


Processing audio:  10%|▉         | 194/2000 [02:58<21:01,  1.43it/s]

torch.Size([1, 56320])


Processing audio:  10%|▉         | 195/2000 [02:58<17:30,  1.72it/s]

torch.Size([1, 71840])


Processing audio:  10%|▉         | 196/2000 [02:59<16:11,  1.86it/s]

torch.Size([1, 125920])


Processing audio:  10%|▉         | 198/2000 [02:59<13:32,  2.22it/s]

torch.Size([1, 52800])
torch.Size([1, 64160])


Processing audio:  10%|▉         | 199/2000 [03:00<11:45,  2.55it/s]

torch.Size([1, 53760])


Processing audio:  10%|█         | 200/2000 [03:00<11:14,  2.67it/s]

torch.Size([1, 374880])


Processing audio:  10%|█         | 201/2000 [03:01<19:48,  1.51it/s]

torch.Size([1, 54400])


Processing audio:  10%|█         | 202/2000 [03:02<16:19,  1.84it/s]

torch.Size([1, 71040])


Processing audio:  10%|█         | 203/2000 [03:02<14:44,  2.03it/s]

torch.Size([1, 124480])


Processing audio:  10%|█         | 204/2000 [03:03<16:11,  1.85it/s]

torch.Size([1, 86400])


Processing audio:  10%|█         | 205/2000 [03:03<15:56,  1.88it/s]

torch.Size([1, 84000])


Processing audio:  10%|█         | 207/2000 [03:04<12:07,  2.47it/s]

torch.Size([1, 23200])
torch.Size([1, 104800])


Processing audio:  10%|█         | 208/2000 [03:04<11:18,  2.64it/s]

torch.Size([1, 152960])


Processing audio:  10%|█         | 209/2000 [03:05<16:11,  1.84it/s]

torch.Size([1, 572640])


Processing audio:  10%|█         | 210/2000 [03:09<43:21,  1.45s/it]

torch.Size([1, 446240])


Processing audio:  11%|█         | 211/2000 [03:11<51:06,  1.71s/it]

torch.Size([1, 85920])


Processing audio:  11%|█         | 212/2000 [03:11<40:35,  1.36s/it]

torch.Size([1, 122400])


Processing audio:  11%|█         | 213/2000 [03:12<33:20,  1.12s/it]

torch.Size([1, 106080])


Processing audio:  11%|█         | 214/2000 [03:12<26:49,  1.11it/s]

torch.Size([1, 91520])


Processing audio:  11%|█         | 215/2000 [03:13<22:47,  1.30it/s]

torch.Size([1, 279360])


Processing audio:  11%|█         | 216/2000 [03:14<25:37,  1.16it/s]

torch.Size([1, 141600])


Processing audio:  11%|█         | 217/2000 [03:14<20:52,  1.42it/s]

torch.Size([1, 88000])


Processing audio:  11%|█         | 218/2000 [03:15<18:29,  1.61it/s]

torch.Size([1, 112160])


Processing audio:  11%|█         | 219/2000 [03:15<18:22,  1.62it/s]

torch.Size([1, 130560])


Processing audio:  11%|█         | 220/2000 [03:16<19:12,  1.54it/s]

torch.Size([1, 55840])


Processing audio:  11%|█         | 221/2000 [03:16<16:24,  1.81it/s]

torch.Size([1, 180320])


Processing audio:  11%|█         | 222/2000 [03:17<18:14,  1.62it/s]

torch.Size([1, 98080])


Processing audio:  11%|█         | 223/2000 [03:18<18:01,  1.64it/s]

torch.Size([1, 97920])


Processing audio:  11%|█         | 224/2000 [03:18<19:00,  1.56it/s]

torch.Size([1, 380000])


Processing audio:  11%|█▏        | 225/2000 [03:21<34:46,  1.18s/it]

torch.Size([1, 365280])


Processing audio:  11%|█▏        | 226/2000 [03:22<38:25,  1.30s/it]

torch.Size([1, 184160])


Processing audio:  11%|█▏        | 227/2000 [03:23<33:56,  1.15s/it]

torch.Size([1, 124640])


Processing audio:  11%|█▏        | 228/2000 [03:24<30:33,  1.03s/it]

torch.Size([1, 159360])


Processing audio:  12%|█▏        | 230/2000 [03:25<21:48,  1.35it/s]

torch.Size([1, 40160])
torch.Size([1, 469600])


Processing audio:  12%|█▏        | 231/2000 [03:27<29:37,  1.00s/it]

torch.Size([1, 344800])


Processing audio:  12%|█▏        | 232/2000 [03:28<31:48,  1.08s/it]

torch.Size([1, 130880])


Processing audio:  12%|█▏        | 234/2000 [03:29<21:29,  1.37it/s]

torch.Size([1, 20320])
torch.Size([1, 102400])


Processing audio:  12%|█▏        | 235/2000 [03:29<19:13,  1.53it/s]

torch.Size([1, 73280])


Processing audio:  12%|█▏        | 236/2000 [03:30<16:07,  1.82it/s]

torch.Size([1, 82560])


Processing audio:  12%|█▏        | 237/2000 [03:30<15:02,  1.95it/s]

torch.Size([1, 48000])


Processing audio:  12%|█▏        | 238/2000 [03:30<12:58,  2.26it/s]

torch.Size([1, 70240])


Processing audio:  12%|█▏        | 239/2000 [03:31<13:25,  2.19it/s]

torch.Size([1, 89280])


Processing audio:  12%|█▏        | 240/2000 [03:31<14:59,  1.96it/s]

torch.Size([1, 104160])


Processing audio:  12%|█▏        | 241/2000 [03:32<17:27,  1.68it/s]

torch.Size([1, 249760])


Processing audio:  12%|█▏        | 242/2000 [03:34<26:47,  1.09it/s]

torch.Size([1, 147040])


Processing audio:  12%|█▏        | 243/2000 [03:35<24:50,  1.18it/s]

torch.Size([1, 125440])


Processing audio:  12%|█▏        | 244/2000 [03:35<20:45,  1.41it/s]

torch.Size([1, 146880])


Processing audio:  12%|█▏        | 245/2000 [03:36<21:08,  1.38it/s]

torch.Size([1, 154240])


Processing audio:  12%|█▏        | 246/2000 [03:36<20:01,  1.46it/s]

torch.Size([1, 114400])


Processing audio:  12%|█▏        | 247/2000 [03:37<19:24,  1.51it/s]

torch.Size([1, 134880])


Processing audio:  12%|█▏        | 248/2000 [03:37<18:25,  1.58it/s]

torch.Size([1, 187840])


Processing audio:  12%|█▏        | 249/2000 [03:38<19:36,  1.49it/s]

torch.Size([1, 45440])


Processing audio:  12%|█▎        | 250/2000 [03:39<16:05,  1.81it/s]

torch.Size([1, 146880])


Processing audio:  13%|█▎        | 251/2000 [03:39<17:34,  1.66it/s]

torch.Size([1, 302560])


Processing audio:  13%|█▎        | 252/2000 [03:41<23:38,  1.23it/s]

torch.Size([1, 339680])


Processing audio:  13%|█▎        | 253/2000 [03:42<31:26,  1.08s/it]

torch.Size([1, 55200])


Processing audio:  13%|█▎        | 254/2000 [03:43<25:15,  1.15it/s]

torch.Size([1, 221600])


Processing audio:  13%|█▎        | 255/2000 [03:44<27:23,  1.06it/s]

torch.Size([1, 35360])


Processing audio:  13%|█▎        | 256/2000 [03:44<21:24,  1.36it/s]

torch.Size([1, 156160])


Processing audio:  13%|█▎        | 257/2000 [03:45<23:41,  1.23it/s]

torch.Size([1, 119520])


Processing audio:  13%|█▎        | 258/2000 [03:46<23:58,  1.21it/s]

torch.Size([1, 207360])


Processing audio:  13%|█▎        | 260/2000 [03:47<20:36,  1.41it/s]

torch.Size([1, 35040])
torch.Size([1, 265120])


Processing audio:  13%|█▎        | 261/2000 [03:48<22:07,  1.31it/s]

torch.Size([1, 98240])


Processing audio:  13%|█▎        | 262/2000 [03:49<19:58,  1.45it/s]

torch.Size([1, 53600])


Processing audio:  13%|█▎        | 263/2000 [03:49<16:44,  1.73it/s]

torch.Size([1, 182240])


Processing audio:  13%|█▎        | 264/2000 [03:50<17:52,  1.62it/s]

torch.Size([1, 90720])


Processing audio:  13%|█▎        | 265/2000 [03:50<15:49,  1.83it/s]

torch.Size([1, 185440])


Processing audio:  13%|█▎        | 266/2000 [03:51<18:42,  1.54it/s]

torch.Size([1, 55680])


Processing audio:  13%|█▎        | 267/2000 [03:51<15:17,  1.89it/s]

torch.Size([1, 136320])


Processing audio:  13%|█▎        | 268/2000 [03:52<16:35,  1.74it/s]

torch.Size([1, 317760])


Processing audio:  13%|█▎        | 269/2000 [03:53<21:50,  1.32it/s]

torch.Size([1, 178400])


Processing audio:  14%|█▎        | 271/2000 [03:54<16:53,  1.71it/s]

torch.Size([1, 39040])
torch.Size([1, 68800])


Processing audio:  14%|█▎        | 273/2000 [03:54<11:16,  2.55it/s]

torch.Size([1, 25600])
torch.Size([1, 168320])


Processing audio:  14%|█▎        | 274/2000 [03:55<13:34,  2.12it/s]

torch.Size([1, 191520])


Processing audio:  14%|█▍        | 275/2000 [03:56<18:13,  1.58it/s]

torch.Size([1, 206880])


Processing audio:  14%|█▍        | 276/2000 [03:58<25:14,  1.14it/s]

torch.Size([1, 106240])


Processing audio:  14%|█▍        | 277/2000 [03:58<24:45,  1.16it/s]

torch.Size([1, 104000])


Processing audio:  14%|█▍        | 278/2000 [03:59<24:13,  1.19it/s]

torch.Size([1, 69760])


Processing audio:  14%|█▍        | 279/2000 [04:00<20:52,  1.37it/s]

torch.Size([1, 144160])


Processing audio:  14%|█▍        | 280/2000 [04:00<20:46,  1.38it/s]

torch.Size([1, 209120])


Processing audio:  14%|█▍        | 281/2000 [04:01<24:00,  1.19it/s]

torch.Size([1, 117920])


Processing audio:  14%|█▍        | 282/2000 [04:02<21:39,  1.32it/s]

torch.Size([1, 92800])


Processing audio:  14%|█▍        | 283/2000 [04:02<19:17,  1.48it/s]

torch.Size([1, 118400])


Processing audio:  14%|█▍        | 284/2000 [04:03<18:01,  1.59it/s]

torch.Size([1, 101440])


Processing audio:  14%|█▍        | 285/2000 [04:03<16:31,  1.73it/s]

torch.Size([1, 276640])


Processing audio:  14%|█▍        | 286/2000 [04:05<23:22,  1.22it/s]

torch.Size([1, 73440])


Processing audio:  14%|█▍        | 287/2000 [04:05<20:06,  1.42it/s]

torch.Size([1, 41600])


Processing audio:  14%|█▍        | 289/2000 [04:06<13:05,  2.18it/s]

torch.Size([1, 34080])
torch.Size([1, 101280])


Processing audio:  14%|█▍        | 290/2000 [04:06<13:30,  2.11it/s]

torch.Size([1, 181760])


Processing audio:  15%|█▍        | 291/2000 [04:07<13:49,  2.06it/s]

torch.Size([1, 253280])


Processing audio:  15%|█▍        | 292/2000 [04:07<15:41,  1.81it/s]

torch.Size([1, 236480])


Processing audio:  15%|█▍        | 293/2000 [04:08<17:35,  1.62it/s]

torch.Size([1, 205440])


Processing audio:  15%|█▍        | 294/2000 [04:09<19:01,  1.49it/s]

torch.Size([1, 112640])


Processing audio:  15%|█▍        | 295/2000 [04:10<19:49,  1.43it/s]

torch.Size([1, 217280])


Processing audio:  15%|█▍        | 296/2000 [04:11<27:20,  1.04it/s]

torch.Size([1, 73920])


Processing audio:  15%|█▍        | 297/2000 [04:12<24:45,  1.15it/s]

torch.Size([1, 47200])


Processing audio:  15%|█▍        | 298/2000 [04:12<20:02,  1.42it/s]

torch.Size([1, 110720])


Processing audio:  15%|█▍        | 299/2000 [04:13<18:46,  1.51it/s]

torch.Size([1, 155520])


Processing audio:  15%|█▌        | 300/2000 [04:14<19:02,  1.49it/s]

torch.Size([1, 194400])


Processing audio:  15%|█▌        | 301/2000 [04:14<20:28,  1.38it/s]

torch.Size([1, 240000])


Processing audio:  15%|█▌        | 302/2000 [04:16<24:55,  1.14it/s]

torch.Size([1, 74080])


Processing audio:  15%|█▌        | 303/2000 [04:16<20:03,  1.41it/s]

torch.Size([1, 91680])


Processing audio:  15%|█▌        | 304/2000 [04:16<17:25,  1.62it/s]

torch.Size([1, 175200])


Processing audio:  15%|█▌        | 305/2000 [04:17<19:10,  1.47it/s]

torch.Size([1, 189120])


Processing audio:  15%|█▌        | 306/2000 [04:18<20:38,  1.37it/s]

torch.Size([1, 365280])


Processing audio:  15%|█▌        | 307/2000 [04:20<29:21,  1.04s/it]

torch.Size([1, 99360])


Processing audio:  15%|█▌        | 309/2000 [04:21<19:01,  1.48it/s]

torch.Size([1, 26560])
torch.Size([1, 53600])


Processing audio:  16%|█▌        | 310/2000 [04:21<15:53,  1.77it/s]

torch.Size([1, 105440])


Processing audio:  16%|█▌        | 311/2000 [04:21<15:19,  1.84it/s]

torch.Size([1, 129120])


Processing audio:  16%|█▌        | 312/2000 [04:22<15:19,  1.84it/s]

torch.Size([1, 128160])


Processing audio:  16%|█▌        | 313/2000 [04:23<17:55,  1.57it/s]

torch.Size([1, 302400])


Processing audio:  16%|█▌        | 314/2000 [04:25<28:21,  1.01s/it]

torch.Size([1, 152480])


Processing audio:  16%|█▌        | 315/2000 [04:26<30:55,  1.10s/it]

torch.Size([1, 129440])


Processing audio:  16%|█▌        | 316/2000 [04:27<26:30,  1.06it/s]

torch.Size([1, 278880])


Processing audio:  16%|█▌        | 317/2000 [04:28<30:32,  1.09s/it]

torch.Size([1, 155680])


Processing audio:  16%|█▌        | 318/2000 [04:29<27:31,  1.02it/s]

torch.Size([1, 78880])


Processing audio:  16%|█▌        | 319/2000 [04:29<22:46,  1.23it/s]

torch.Size([1, 52960])


Processing audio:  16%|█▌        | 320/2000 [04:29<18:41,  1.50it/s]

torch.Size([1, 117120])


Processing audio:  16%|█▌        | 321/2000 [04:30<18:18,  1.53it/s]

torch.Size([1, 243840])


Processing audio:  16%|█▌        | 322/2000 [04:31<21:37,  1.29it/s]

torch.Size([1, 45440])


Processing audio:  16%|█▌        | 323/2000 [04:31<17:25,  1.60it/s]

torch.Size([1, 155040])


Processing audio:  16%|█▌        | 324/2000 [04:32<15:50,  1.76it/s]

torch.Size([1, 247200])


Processing audio:  16%|█▋        | 325/2000 [04:33<20:30,  1.36it/s]

torch.Size([1, 80160])


Processing audio:  16%|█▋        | 326/2000 [04:33<17:00,  1.64it/s]

torch.Size([1, 106880])


Processing audio:  16%|█▋        | 327/2000 [04:34<16:33,  1.68it/s]

torch.Size([1, 250400])


Processing audio:  16%|█▋        | 328/2000 [04:35<20:58,  1.33it/s]

torch.Size([1, 24000])
torch.Size([1, 135520])


Processing audio:  16%|█▋        | 330/2000 [04:36<16:11,  1.72it/s]

torch.Size([1, 76960])


Processing audio:  17%|█▋        | 331/2000 [04:36<16:11,  1.72it/s]

torch.Size([1, 79840])


Processing audio:  17%|█▋        | 332/2000 [04:37<17:04,  1.63it/s]

torch.Size([1, 107040])


Processing audio:  17%|█▋        | 333/2000 [04:38<17:33,  1.58it/s]

torch.Size([1, 357280])


Processing audio:  17%|█▋        | 335/2000 [04:40<21:44,  1.28it/s]

torch.Size([1, 32480])
torch.Size([1, 143360])


Processing audio:  17%|█▋        | 336/2000 [04:41<21:24,  1.30it/s]

torch.Size([1, 51520])


Processing audio:  17%|█▋        | 337/2000 [04:41<17:02,  1.63it/s]

torch.Size([1, 128160])


Processing audio:  17%|█▋        | 338/2000 [04:42<17:52,  1.55it/s]

torch.Size([1, 184640])


Processing audio:  17%|█▋        | 339/2000 [04:42<19:56,  1.39it/s]

torch.Size([1, 90880])


Processing audio:  17%|█▋        | 340/2000 [04:43<17:29,  1.58it/s]

torch.Size([1, 227200])


Processing audio:  17%|█▋        | 341/2000 [04:44<21:05,  1.31it/s]

torch.Size([1, 43040])


Processing audio:  17%|█▋        | 342/2000 [04:44<17:00,  1.62it/s]

torch.Size([1, 138400])


Processing audio:  17%|█▋        | 343/2000 [04:45<17:52,  1.54it/s]

torch.Size([1, 272000])


Processing audio:  17%|█▋        | 344/2000 [04:46<22:28,  1.23it/s]

torch.Size([1, 104000])


Processing audio:  17%|█▋        | 346/2000 [04:47<15:00,  1.84it/s]

torch.Size([1, 33760])
torch.Size([1, 307520])


Processing audio:  17%|█▋        | 347/2000 [04:48<20:35,  1.34it/s]

torch.Size([1, 121760])


Processing audio:  17%|█▋        | 348/2000 [04:49<21:09,  1.30it/s]

torch.Size([1, 477440])


Processing audio:  17%|█▋        | 349/2000 [04:52<41:26,  1.51s/it]

torch.Size([1, 137120])


Processing audio:  18%|█▊        | 350/2000 [04:53<34:01,  1.24s/it]

torch.Size([1, 293440])


Processing audio:  18%|█▊        | 351/2000 [04:54<37:08,  1.35s/it]

torch.Size([1, 290080])


Processing audio:  18%|█▊        | 352/2000 [04:56<36:32,  1.33s/it]

torch.Size([1, 346880])


Processing audio:  18%|█▊        | 353/2000 [04:57<38:57,  1.42s/it]

torch.Size([1, 151040])


Processing audio:  18%|█▊        | 354/2000 [04:58<33:17,  1.21s/it]

torch.Size([1, 113440])


Processing audio:  18%|█▊        | 355/2000 [04:58<27:49,  1.01s/it]

torch.Size([1, 75840])


Processing audio:  18%|█▊        | 356/2000 [04:59<22:42,  1.21it/s]

torch.Size([1, 209920])


Processing audio:  18%|█▊        | 357/2000 [05:00<23:54,  1.15it/s]

torch.Size([1, 93920])


Processing audio:  18%|█▊        | 358/2000 [05:00<20:38,  1.33it/s]

torch.Size([1, 70240])


Processing audio:  18%|█▊        | 359/2000 [05:01<18:02,  1.52it/s]

torch.Size([1, 187680])


Processing audio:  18%|█▊        | 360/2000 [05:02<20:52,  1.31it/s]

torch.Size([1, 106240])


Processing audio:  18%|█▊        | 361/2000 [05:02<20:41,  1.32it/s]

torch.Size([1, 21280])


Processing audio:  18%|█▊        | 362/2000 [05:03<16:21,  1.67it/s]

torch.Size([1, 33440])


Processing audio:  18%|█▊        | 364/2000 [05:03<10:47,  2.52it/s]

torch.Size([1, 25120])
torch.Size([1, 153280])


Processing audio:  18%|█▊        | 365/2000 [05:04<16:41,  1.63it/s]

torch.Size([1, 521920])


Processing audio:  18%|█▊        | 366/2000 [05:07<36:36,  1.34s/it]

torch.Size([1, 325440])


Processing audio:  18%|█▊        | 367/2000 [05:09<38:51,  1.43s/it]

torch.Size([1, 175040])


Processing audio:  18%|█▊        | 369/2000 [05:10<25:48,  1.05it/s]

torch.Size([1, 47520])
torch.Size([1, 111520])


Processing audio:  18%|█▊        | 370/2000 [05:11<22:23,  1.21it/s]

torch.Size([1, 156320])


Processing audio:  19%|█▊        | 371/2000 [05:11<18:08,  1.50it/s]

torch.Size([1, 92160])


Processing audio:  19%|█▊        | 372/2000 [05:11<16:59,  1.60it/s]

torch.Size([1, 188960])


Processing audio:  19%|█▊        | 373/2000 [05:12<19:30,  1.39it/s]

torch.Size([1, 142880])


Processing audio:  19%|█▊        | 374/2000 [05:13<16:45,  1.62it/s]

torch.Size([1, 413280])


Processing audio:  19%|█▉        | 375/2000 [05:15<30:56,  1.14s/it]

torch.Size([1, 198240])


Processing audio:  19%|█▉        | 376/2000 [05:17<33:47,  1.25s/it]

torch.Size([1, 162400])


Processing audio:  19%|█▉        | 377/2000 [05:17<29:55,  1.11s/it]

torch.Size([1, 158240])


Processing audio:  19%|█▉        | 378/2000 [05:18<27:05,  1.00s/it]

torch.Size([1, 408960])


Processing audio:  19%|█▉        | 379/2000 [05:20<34:05,  1.26s/it]

torch.Size([1, 206400])


Processing audio:  19%|█▉        | 380/2000 [05:21<31:36,  1.17s/it]

torch.Size([1, 502240])


Processing audio:  19%|█▉        | 381/2000 [05:23<41:22,  1.53s/it]

torch.Size([1, 511200])


Processing audio:  19%|█▉        | 382/2000 [05:25<46:24,  1.72s/it]

torch.Size([1, 177120])


Processing audio:  19%|█▉        | 383/2000 [05:26<39:39,  1.47s/it]

torch.Size([1, 63040])


Processing audio:  19%|█▉        | 384/2000 [05:27<30:14,  1.12s/it]

torch.Size([1, 148800])


Processing audio:  19%|█▉        | 385/2000 [05:28<28:38,  1.06s/it]

torch.Size([1, 213760])


Processing audio:  19%|█▉        | 386/2000 [05:29<32:48,  1.22s/it]

torch.Size([1, 144960])


Processing audio:  19%|█▉        | 387/2000 [05:30<30:41,  1.14s/it]

torch.Size([1, 157920])


Processing audio:  19%|█▉        | 388/2000 [05:31<26:13,  1.02it/s]

torch.Size([1, 268640])


Processing audio:  19%|█▉        | 389/2000 [05:32<28:11,  1.05s/it]

torch.Size([1, 162880])


Processing audio:  20%|█▉        | 390/2000 [05:33<26:32,  1.01it/s]

torch.Size([1, 30080])


Processing audio:  20%|█▉        | 391/2000 [05:33<20:44,  1.29it/s]

torch.Size([1, 376000])


Processing audio:  20%|█▉        | 392/2000 [05:35<28:49,  1.08s/it]

torch.Size([1, 93120])


Processing audio:  20%|█▉        | 393/2000 [05:35<25:01,  1.07it/s]

torch.Size([1, 96960])


Processing audio:  20%|█▉        | 394/2000 [05:36<20:36,  1.30it/s]

torch.Size([1, 172960])


Processing audio:  20%|█▉        | 395/2000 [05:37<20:28,  1.31it/s]

torch.Size([1, 284640])


Processing audio:  20%|█▉        | 396/2000 [05:38<23:42,  1.13it/s]

torch.Size([1, 66880])


Processing audio:  20%|█▉        | 398/2000 [05:38<14:40,  1.82it/s]

torch.Size([1, 24320])
torch.Size([1, 113440])


Processing audio:  20%|█▉        | 399/2000 [05:39<14:13,  1.87it/s]

torch.Size([1, 188000])


Processing audio:  20%|██        | 400/2000 [05:39<15:45,  1.69it/s]

torch.Size([1, 127520])


Processing audio:  20%|██        | 401/2000 [05:40<15:05,  1.77it/s]

torch.Size([1, 128320])


Processing audio:  20%|██        | 402/2000 [05:41<17:01,  1.56it/s]

torch.Size([1, 134720])


Processing audio:  20%|██        | 403/2000 [05:42<19:02,  1.40it/s]

torch.Size([1, 75040])


Processing audio:  20%|██        | 404/2000 [05:42<17:46,  1.50it/s]

torch.Size([1, 59840])


Processing audio:  20%|██        | 405/2000 [05:43<16:01,  1.66it/s]

torch.Size([1, 156160])


Processing audio:  20%|██        | 406/2000 [05:44<18:52,  1.41it/s]

torch.Size([1, 1290240])


Processing audio:  20%|██        | 407/2000 [05:52<1:19:18,  2.99s/it]

torch.Size([1, 290720])


Processing audio:  20%|██        | 408/2000 [05:54<1:08:25,  2.58s/it]

torch.Size([1, 164160])


Processing audio:  20%|██        | 409/2000 [05:55<57:28,  2.17s/it]  

torch.Size([1, 173280])


Processing audio:  20%|██        | 410/2000 [05:56<49:32,  1.87s/it]

torch.Size([1, 142400])


Processing audio:  21%|██        | 411/2000 [05:57<39:25,  1.49s/it]

torch.Size([1, 148480])


Processing audio:  21%|██        | 412/2000 [05:57<33:45,  1.28s/it]

torch.Size([1, 88000])


Processing audio:  21%|██        | 413/2000 [05:58<27:04,  1.02s/it]

torch.Size([1, 185920])


Processing audio:  21%|██        | 414/2000 [05:58<24:06,  1.10it/s]

torch.Size([1, 104800])


Processing audio:  21%|██        | 415/2000 [05:59<21:02,  1.26it/s]

torch.Size([1, 212480])


Processing audio:  21%|██        | 416/2000 [06:00<22:12,  1.19it/s]

torch.Size([1, 197120])


Processing audio:  21%|██        | 417/2000 [06:01<23:23,  1.13it/s]

torch.Size([1, 121280])


Processing audio:  21%|██        | 418/2000 [06:01<20:46,  1.27it/s]

torch.Size([1, 440960])


Processing audio:  21%|██        | 419/2000 [06:03<28:33,  1.08s/it]

torch.Size([1, 512160])


Processing audio:  21%|██        | 420/2000 [06:06<38:38,  1.47s/it]

torch.Size([1, 132160])


Processing audio:  21%|██        | 421/2000 [06:06<32:31,  1.24s/it]

torch.Size([1, 172960])


Processing audio:  21%|██        | 422/2000 [06:08<32:40,  1.24s/it]

torch.Size([1, 97280])


Processing audio:  21%|██        | 423/2000 [06:08<28:18,  1.08s/it]

torch.Size([1, 44800])


Processing audio:  21%|██        | 424/2000 [06:09<23:37,  1.11it/s]

torch.Size([1, 26720])


Processing audio:  21%|██▏       | 425/2000 [06:09<18:30,  1.42it/s]

torch.Size([1, 210240])


Processing audio:  21%|██▏       | 426/2000 [06:10<21:39,  1.21it/s]

torch.Size([1, 257440])


Processing audio:  21%|██▏       | 427/2000 [06:11<21:27,  1.22it/s]

torch.Size([1, 53440])


Processing audio:  21%|██▏       | 428/2000 [06:11<17:42,  1.48it/s]

torch.Size([1, 120960])


Processing audio:  21%|██▏       | 429/2000 [06:12<16:51,  1.55it/s]

torch.Size([1, 78080])


Processing audio:  22%|██▏       | 430/2000 [06:12<15:48,  1.66it/s]

torch.Size([1, 98240])


Processing audio:  22%|██▏       | 431/2000 [06:13<15:59,  1.64it/s]

torch.Size([1, 35520])


Processing audio:  22%|██▏       | 432/2000 [06:13<13:19,  1.96it/s]

torch.Size([1, 112640])


Processing audio:  22%|██▏       | 433/2000 [06:14<14:20,  1.82it/s]

torch.Size([1, 327840])


Processing audio:  22%|██▏       | 434/2000 [06:15<20:27,  1.28it/s]

torch.Size([1, 175840])


Processing audio:  22%|██▏       | 435/2000 [06:16<20:08,  1.29it/s]

torch.Size([1, 48480])


Processing audio:  22%|██▏       | 436/2000 [06:16<16:17,  1.60it/s]

torch.Size([1, 339840])


Processing audio:  22%|██▏       | 437/2000 [06:18<23:41,  1.10it/s]

torch.Size([1, 91200])


Processing audio:  22%|██▏       | 438/2000 [06:18<18:40,  1.39it/s]

torch.Size([1, 52800])


Processing audio:  22%|██▏       | 439/2000 [06:18<15:39,  1.66it/s]

torch.Size([1, 261920])


Processing audio:  22%|██▏       | 440/2000 [06:20<22:18,  1.17it/s]

torch.Size([1, 26240])


Processing audio:  22%|██▏       | 441/2000 [06:20<17:38,  1.47it/s]

torch.Size([1, 90560])


Processing audio:  22%|██▏       | 442/2000 [06:21<18:01,  1.44it/s]

torch.Size([1, 129280])


Processing audio:  22%|██▏       | 443/2000 [06:21<17:32,  1.48it/s]

torch.Size([1, 138880])


Processing audio:  22%|██▏       | 444/2000 [06:22<19:24,  1.34it/s]

torch.Size([1, 580640])


Processing audio:  22%|██▏       | 445/2000 [06:28<53:41,  2.07s/it]

torch.Size([1, 182720])


Processing audio:  22%|██▏       | 446/2000 [06:28<43:35,  1.68s/it]

torch.Size([1, 116960])


Processing audio:  22%|██▏       | 447/2000 [06:29<35:09,  1.36s/it]

torch.Size([1, 60640])


Processing audio:  22%|██▏       | 448/2000 [06:29<27:16,  1.05s/it]

torch.Size([1, 63520])


Processing audio:  22%|██▏       | 449/2000 [06:30<21:34,  1.20it/s]

torch.Size([1, 47520])


Processing audio:  22%|██▎       | 450/2000 [06:30<17:40,  1.46it/s]

torch.Size([1, 112800])


Processing audio:  23%|██▎       | 451/2000 [06:30<16:55,  1.53it/s]

torch.Size([1, 85440])


Processing audio:  23%|██▎       | 452/2000 [06:31<15:26,  1.67it/s]

torch.Size([1, 61760])


Processing audio:  23%|██▎       | 453/2000 [06:31<13:26,  1.92it/s]

torch.Size([1, 27040])


Processing audio:  23%|██▎       | 454/2000 [06:32<11:12,  2.30it/s]

torch.Size([1, 67840])


Processing audio:  23%|██▎       | 455/2000 [06:32<10:59,  2.34it/s]

torch.Size([1, 94240])


Processing audio:  23%|██▎       | 456/2000 [06:33<13:21,  1.93it/s]

torch.Size([1, 66560])


Processing audio:  23%|██▎       | 457/2000 [06:33<14:04,  1.83it/s]

torch.Size([1, 62720])


Processing audio:  23%|██▎       | 458/2000 [06:34<12:28,  2.06it/s]

torch.Size([1, 259840])


Processing audio:  23%|██▎       | 459/2000 [06:35<22:27,  1.14it/s]

torch.Size([1, 193440])


Processing audio:  23%|██▎       | 460/2000 [06:36<23:58,  1.07it/s]

torch.Size([1, 265600])


Processing audio:  23%|██▎       | 461/2000 [06:37<24:21,  1.05it/s]

torch.Size([1, 129440])


Processing audio:  23%|██▎       | 462/2000 [06:38<22:37,  1.13it/s]

torch.Size([1, 76800])


Processing audio:  23%|██▎       | 463/2000 [06:39<19:07,  1.34it/s]

torch.Size([1, 194560])


Processing audio:  23%|██▎       | 464/2000 [06:40<20:19,  1.26it/s]

torch.Size([1, 112800])


Processing audio:  23%|██▎       | 465/2000 [06:40<18:20,  1.39it/s]

torch.Size([1, 70080])


Processing audio:  23%|██▎       | 466/2000 [06:41<16:43,  1.53it/s]

torch.Size([1, 118880])


Processing audio:  23%|██▎       | 467/2000 [06:41<17:01,  1.50it/s]

torch.Size([1, 281760])


Processing audio:  23%|██▎       | 468/2000 [06:43<22:45,  1.12it/s]

torch.Size([1, 92320])


Processing audio:  23%|██▎       | 469/2000 [06:43<20:15,  1.26it/s]

torch.Size([1, 96160])


Processing audio:  24%|██▎       | 470/2000 [06:44<18:17,  1.39it/s]

torch.Size([1, 152320])


Processing audio:  24%|██▎       | 471/2000 [06:45<18:52,  1.35it/s]

torch.Size([1, 253120])


Processing audio:  24%|██▎       | 472/2000 [06:46<24:44,  1.03it/s]

torch.Size([1, 147520])


Processing audio:  24%|██▎       | 473/2000 [06:47<25:08,  1.01it/s]

torch.Size([1, 42400])


Processing audio:  24%|██▎       | 474/2000 [06:47<20:02,  1.27it/s]

torch.Size([1, 160000])


Processing audio:  24%|██▍       | 475/2000 [06:49<23:05,  1.10it/s]

torch.Size([1, 100320])


Processing audio:  24%|██▍       | 476/2000 [06:49<20:27,  1.24it/s]

torch.Size([1, 117920])


Processing audio:  24%|██▍       | 477/2000 [06:50<18:49,  1.35it/s]

torch.Size([1, 262720])


Processing audio:  24%|██▍       | 478/2000 [06:51<21:43,  1.17it/s]

torch.Size([1, 37920])


Processing audio:  24%|██▍       | 479/2000 [06:51<17:03,  1.49it/s]

torch.Size([1, 66880])


Processing audio:  24%|██▍       | 480/2000 [06:52<14:44,  1.72it/s]

torch.Size([1, 740320])


Processing audio:  24%|██▍       | 481/2000 [06:55<39:07,  1.55s/it]

torch.Size([1, 235040])


Processing audio:  24%|██▍       | 482/2000 [06:56<36:10,  1.43s/it]

torch.Size([1, 138240])


Processing audio:  24%|██▍       | 483/2000 [06:57<30:05,  1.19s/it]

torch.Size([1, 99520])


Processing audio:  24%|██▍       | 484/2000 [06:58<25:39,  1.02s/it]

torch.Size([1, 200960])


Processing audio:  24%|██▍       | 485/2000 [06:59<25:18,  1.00s/it]

torch.Size([1, 249280])


Processing audio:  24%|██▍       | 486/2000 [07:00<29:30,  1.17s/it]

torch.Size([1, 20800])


Processing audio:  24%|██▍       | 487/2000 [07:00<22:19,  1.13it/s]

torch.Size([1, 301440])


Processing audio:  24%|██▍       | 488/2000 [07:02<27:50,  1.10s/it]

torch.Size([1, 98880])


Processing audio:  24%|██▍       | 489/2000 [07:03<23:08,  1.09it/s]

torch.Size([1, 82400])


Processing audio:  24%|██▍       | 490/2000 [07:03<20:06,  1.25it/s]

torch.Size([1, 150080])


Processing audio:  25%|██▍       | 491/2000 [07:04<20:10,  1.25it/s]

torch.Size([1, 77440])


Processing audio:  25%|██▍       | 492/2000 [07:04<17:13,  1.46it/s]

torch.Size([1, 104160])


Processing audio:  25%|██▍       | 493/2000 [07:05<15:40,  1.60it/s]

torch.Size([1, 124000])


Processing audio:  25%|██▍       | 494/2000 [07:05<15:10,  1.65it/s]

torch.Size([1, 109280])


Processing audio:  25%|██▍       | 495/2000 [07:06<14:56,  1.68it/s]

torch.Size([1, 158880])


Processing audio:  25%|██▍       | 496/2000 [07:06<14:10,  1.77it/s]

torch.Size([1, 507680])


Processing audio:  25%|██▍       | 497/2000 [07:09<31:18,  1.25s/it]

torch.Size([1, 291680])


Processing audio:  25%|██▍       | 498/2000 [07:11<33:06,  1.32s/it]

torch.Size([1, 205920])


Processing audio:  25%|██▍       | 499/2000 [07:12<30:50,  1.23s/it]

torch.Size([1, 92320])


Processing audio:  25%|██▌       | 500/2000 [07:13<27:30,  1.10s/it]

torch.Size([1, 131680])


Processing audio:  25%|██▌       | 501/2000 [07:13<24:44,  1.01it/s]

torch.Size([1, 240000])


Processing audio:  25%|██▌       | 502/2000 [07:15<27:56,  1.12s/it]

torch.Size([1, 116640])


Processing audio:  25%|██▌       | 503/2000 [07:15<24:00,  1.04it/s]

torch.Size([1, 114880])


Processing audio:  25%|██▌       | 504/2000 [07:16<21:52,  1.14it/s]

torch.Size([1, 110400])


Processing audio:  25%|██▌       | 505/2000 [07:17<20:05,  1.24it/s]

torch.Size([1, 86720])


Processing audio:  25%|██▌       | 506/2000 [07:17<17:24,  1.43it/s]

torch.Size([1, 77440])


Processing audio:  25%|██▌       | 507/2000 [07:18<15:15,  1.63it/s]

torch.Size([1, 36000])


Processing audio:  25%|██▌       | 508/2000 [07:18<12:28,  1.99it/s]

torch.Size([1, 111360])


Processing audio:  25%|██▌       | 509/2000 [07:18<13:09,  1.89it/s]

torch.Size([1, 492160])


Processing audio:  26%|██▌       | 510/2000 [07:21<27:39,  1.11s/it]

torch.Size([1, 162240])


Processing audio:  26%|██▌       | 511/2000 [07:22<24:37,  1.01it/s]

torch.Size([1, 110880])


Processing audio:  26%|██▌       | 512/2000 [07:22<20:37,  1.20it/s]

torch.Size([1, 261760])


Processing audio:  26%|██▌       | 514/2000 [07:24<19:03,  1.30it/s]

torch.Size([1, 31200])
torch.Size([1, 318240])


Processing audio:  26%|██▌       | 515/2000 [07:25<23:39,  1.05it/s]

torch.Size([1, 141760])


Processing audio:  26%|██▌       | 516/2000 [07:26<24:05,  1.03it/s]

torch.Size([1, 126240])


Processing audio:  26%|██▌       | 517/2000 [07:27<22:17,  1.11it/s]

torch.Size([1, 80000])


Processing audio:  26%|██▌       | 518/2000 [07:27<20:04,  1.23it/s]

torch.Size([1, 229120])


Processing audio:  26%|██▌       | 519/2000 [07:28<22:12,  1.11it/s]

torch.Size([1, 134880])


Processing audio:  26%|██▌       | 520/2000 [07:29<19:46,  1.25it/s]

torch.Size([1, 183680])


Processing audio:  26%|██▌       | 522/2000 [07:30<14:58,  1.64it/s]

torch.Size([1, 25600])
torch.Size([1, 119840])


Processing audio:  26%|██▌       | 523/2000 [07:30<13:22,  1.84it/s]

torch.Size([1, 116320])


Processing audio:  26%|██▌       | 524/2000 [07:31<13:28,  1.83it/s]

torch.Size([1, 295840])


Processing audio:  26%|██▋       | 525/2000 [07:32<18:15,  1.35it/s]

torch.Size([1, 117120])


Processing audio:  26%|██▋       | 526/2000 [07:33<17:12,  1.43it/s]

torch.Size([1, 213280])


Processing audio:  26%|██▋       | 527/2000 [07:34<18:00,  1.36it/s]

torch.Size([1, 17120])
torch.Size([1, 160960])


Processing audio:  26%|██▋       | 529/2000 [07:34<13:42,  1.79it/s]

torch.Size([1, 36320])


Processing audio:  27%|██▋       | 531/2000 [07:35<09:42,  2.52it/s]

torch.Size([1, 27840])
torch.Size([1, 52320])


Processing audio:  27%|██▋       | 532/2000 [07:35<09:15,  2.64it/s]

torch.Size([1, 233920])


Processing audio:  27%|██▋       | 533/2000 [07:36<13:59,  1.75it/s]

torch.Size([1, 84320])


Processing audio:  27%|██▋       | 534/2000 [07:37<12:59,  1.88it/s]

torch.Size([1, 299680])


Processing audio:  27%|██▋       | 535/2000 [07:38<22:24,  1.09it/s]

torch.Size([1, 65440])


Processing audio:  27%|██▋       | 536/2000 [07:39<19:35,  1.25it/s]

torch.Size([1, 266560])


Processing audio:  27%|██▋       | 537/2000 [07:41<25:45,  1.06s/it]

torch.Size([1, 105120])


Processing audio:  27%|██▋       | 538/2000 [07:41<21:33,  1.13it/s]

torch.Size([1, 369440])


Processing audio:  27%|██▋       | 539/2000 [07:43<28:20,  1.16s/it]

torch.Size([1, 170400])


Processing audio:  27%|██▋       | 540/2000 [07:44<26:00,  1.07s/it]

torch.Size([1, 171200])


Processing audio:  27%|██▋       | 541/2000 [07:44<22:58,  1.06it/s]

torch.Size([1, 177760])


Processing audio:  27%|██▋       | 542/2000 [07:45<20:32,  1.18it/s]

torch.Size([1, 113600])


Processing audio:  27%|██▋       | 543/2000 [07:46<19:20,  1.26it/s]

torch.Size([1, 90720])


Processing audio:  27%|██▋       | 544/2000 [07:46<17:25,  1.39it/s]

torch.Size([1, 154400])


Processing audio:  27%|██▋       | 545/2000 [07:47<16:50,  1.44it/s]

torch.Size([1, 67840])


Processing audio:  27%|██▋       | 547/2000 [07:47<11:09,  2.17it/s]

torch.Size([1, 53440])
torch.Size([1, 97920])


Processing audio:  27%|██▋       | 548/2000 [07:48<11:55,  2.03it/s]

torch.Size([1, 120160])


Processing audio:  27%|██▋       | 549/2000 [07:49<13:53,  1.74it/s]

torch.Size([1, 104320])


Processing audio:  28%|██▊       | 550/2000 [07:49<14:16,  1.69it/s]

torch.Size([1, 91840])


Processing audio:  28%|██▊       | 552/2000 [07:50<10:57,  2.20it/s]

torch.Size([1, 18400])
torch.Size([1, 94240])


Processing audio:  28%|██▊       | 553/2000 [07:51<12:52,  1.87it/s]

torch.Size([1, 255840])


Processing audio:  28%|██▊       | 554/2000 [07:53<22:36,  1.07it/s]

torch.Size([1, 33120])


Processing audio:  28%|██▊       | 555/2000 [07:53<17:43,  1.36it/s]

torch.Size([1, 93920])


Processing audio:  28%|██▊       | 556/2000 [07:53<16:26,  1.46it/s]

torch.Size([1, 50720])


Processing audio:  28%|██▊       | 557/2000 [07:54<13:49,  1.74it/s]

torch.Size([1, 223520])


Processing audio:  28%|██▊       | 558/2000 [07:55<18:05,  1.33it/s]

torch.Size([1, 93920])


Processing audio:  28%|██▊       | 559/2000 [07:55<16:15,  1.48it/s]

torch.Size([1, 432480])


Processing audio:  28%|██▊       | 560/2000 [07:58<27:05,  1.13s/it]

torch.Size([1, 195680])


Processing audio:  28%|██▊       | 561/2000 [07:58<25:14,  1.05s/it]

torch.Size([1, 186080])


Processing audio:  28%|██▊       | 562/2000 [07:59<24:50,  1.04s/it]

torch.Size([1, 139200])


Processing audio:  28%|██▊       | 563/2000 [08:00<22:02,  1.09it/s]

torch.Size([1, 63360])


Processing audio:  28%|██▊       | 564/2000 [08:00<17:13,  1.39it/s]

torch.Size([1, 124320])


Processing audio:  28%|██▊       | 565/2000 [08:01<17:28,  1.37it/s]

torch.Size([1, 15520])
torch.Size([1, 42880])


Processing audio:  28%|██▊       | 567/2000 [08:01<11:17,  2.12it/s]

torch.Size([1, 308000])


Processing audio:  28%|██▊       | 568/2000 [08:03<18:04,  1.32it/s]

torch.Size([1, 115360])


Processing audio:  28%|██▊       | 569/2000 [08:04<18:58,  1.26it/s]

torch.Size([1, 36480])


Processing audio:  28%|██▊       | 570/2000 [08:04<16:47,  1.42it/s]

torch.Size([1, 152640])


Processing audio:  29%|██▊       | 571/2000 [08:06<20:23,  1.17it/s]

torch.Size([1, 177920])


Processing audio:  29%|██▊       | 572/2000 [08:07<20:24,  1.17it/s]

torch.Size([1, 37280])


Processing audio:  29%|██▊       | 573/2000 [08:07<16:13,  1.47it/s]

torch.Size([1, 171040])


Processing audio:  29%|██▊       | 574/2000 [08:08<17:33,  1.35it/s]

torch.Size([1, 123840])


Processing audio:  29%|██▉       | 575/2000 [08:08<16:07,  1.47it/s]

torch.Size([1, 59200])


Processing audio:  29%|██▉       | 576/2000 [08:09<13:18,  1.78it/s]

torch.Size([1, 83040])


Processing audio:  29%|██▉       | 577/2000 [08:09<12:38,  1.88it/s]

torch.Size([1, 72320])


Processing audio:  29%|██▉       | 578/2000 [08:09<11:37,  2.04it/s]

torch.Size([1, 171680])


Processing audio:  29%|██▉       | 579/2000 [08:10<14:24,  1.64it/s]

torch.Size([1, 369120])


Processing audio:  29%|██▉       | 580/2000 [08:12<23:33,  1.00it/s]

torch.Size([1, 83360])


Processing audio:  29%|██▉       | 581/2000 [08:13<19:14,  1.23it/s]

torch.Size([1, 93120])


Processing audio:  29%|██▉       | 582/2000 [08:13<17:11,  1.37it/s]

torch.Size([1, 208160])


Processing audio:  29%|██▉       | 583/2000 [08:14<19:28,  1.21it/s]

torch.Size([1, 111040])


Processing audio:  29%|██▉       | 584/2000 [08:15<17:25,  1.35it/s]

torch.Size([1, 39040])


Processing audio:  29%|██▉       | 585/2000 [08:15<14:12,  1.66it/s]

torch.Size([1, 129760])


Processing audio:  29%|██▉       | 586/2000 [08:16<14:09,  1.67it/s]

torch.Size([1, 44000])


Processing audio:  29%|██▉       | 587/2000 [08:16<11:46,  2.00it/s]

torch.Size([1, 61760])


Processing audio:  29%|██▉       | 588/2000 [08:16<11:23,  2.06it/s]

torch.Size([1, 128160])


Processing audio:  29%|██▉       | 589/2000 [08:17<12:15,  1.92it/s]

torch.Size([1, 124800])


Processing audio:  30%|██▉       | 590/2000 [08:18<16:29,  1.42it/s]

torch.Size([1, 277280])


Processing audio:  30%|██▉       | 591/2000 [08:20<22:24,  1.05it/s]

torch.Size([1, 59200])


Processing audio:  30%|██▉       | 592/2000 [08:20<17:40,  1.33it/s]

torch.Size([1, 230400])


Processing audio:  30%|██▉       | 593/2000 [08:21<19:34,  1.20it/s]

torch.Size([1, 192000])


Processing audio:  30%|██▉       | 594/2000 [08:22<20:32,  1.14it/s]

torch.Size([1, 130720])


Processing audio:  30%|██▉       | 595/2000 [08:23<19:24,  1.21it/s]

torch.Size([1, 160480])


Processing audio:  30%|██▉       | 596/2000 [08:23<18:27,  1.27it/s]

torch.Size([1, 117280])


Processing audio:  30%|██▉       | 598/2000 [08:24<13:07,  1.78it/s]

torch.Size([1, 43200])
torch.Size([1, 256640])


Processing audio:  30%|██▉       | 599/2000 [08:26<19:57,  1.17it/s]

torch.Size([1, 150400])


Processing audio:  30%|███       | 600/2000 [08:26<17:38,  1.32it/s]

torch.Size([1, 100800])


Processing audio:  30%|███       | 601/2000 [08:27<16:02,  1.45it/s]

torch.Size([1, 144320])


Processing audio:  30%|███       | 602/2000 [08:27<15:40,  1.49it/s]

torch.Size([1, 158560])


Processing audio:  30%|███       | 603/2000 [08:28<16:01,  1.45it/s]

torch.Size([1, 40000])


Processing audio:  30%|███       | 604/2000 [08:28<13:04,  1.78it/s]

torch.Size([1, 99840])


Processing audio:  30%|███       | 605/2000 [08:29<13:02,  1.78it/s]

torch.Size([1, 45600])


Processing audio:  30%|███       | 606/2000 [08:29<11:41,  1.99it/s]

torch.Size([1, 119040])


Processing audio:  30%|███       | 607/2000 [08:30<14:11,  1.64it/s]

torch.Size([1, 183200])


Processing audio:  30%|███       | 608/2000 [08:31<18:45,  1.24it/s]

torch.Size([1, 120800])


Processing audio:  30%|███       | 609/2000 [08:32<20:05,  1.15it/s]

torch.Size([1, 86880])


Processing audio:  30%|███       | 610/2000 [08:33<17:21,  1.33it/s]

torch.Size([1, 92960])


Processing audio:  31%|███       | 611/2000 [08:33<15:32,  1.49it/s]

torch.Size([1, 132960])


Processing audio:  31%|███       | 612/2000 [08:34<16:42,  1.38it/s]

torch.Size([1, 146240])


Processing audio:  31%|███       | 613/2000 [08:35<17:19,  1.33it/s]

torch.Size([1, 102880])


Processing audio:  31%|███       | 615/2000 [08:35<11:40,  1.98it/s]

torch.Size([1, 16640])
torch.Size([1, 153600])


Processing audio:  31%|███       | 616/2000 [08:36<13:48,  1.67it/s]

torch.Size([1, 105440])


Processing audio:  31%|███       | 617/2000 [08:37<12:35,  1.83it/s]

torch.Size([1, 33760])


Processing audio:  31%|███       | 618/2000 [08:37<10:13,  2.25it/s]

torch.Size([1, 977760])


Processing audio:  31%|███       | 619/2000 [08:42<39:30,  1.72s/it]

torch.Size([1, 84160])


Processing audio:  31%|███       | 620/2000 [08:42<31:46,  1.38s/it]

torch.Size([1, 157280])


Processing audio:  31%|███       | 621/2000 [08:43<30:16,  1.32s/it]

torch.Size([1, 161120])


Processing audio:  31%|███       | 622/2000 [08:44<28:07,  1.22s/it]

torch.Size([1, 217760])


Processing audio:  31%|███       | 624/2000 [08:46<21:43,  1.06it/s]

torch.Size([1, 32160])
torch.Size([1, 78720])


Processing audio:  31%|███▏      | 625/2000 [08:46<18:01,  1.27it/s]

torch.Size([1, 191520])


Processing audio:  31%|███▏      | 626/2000 [08:47<19:49,  1.16it/s]

torch.Size([1, 175520])


Processing audio:  31%|███▏      | 627/2000 [08:48<20:36,  1.11it/s]

torch.Size([1, 542080])


Processing audio:  31%|███▏      | 628/2000 [08:51<32:39,  1.43s/it]

torch.Size([1, 138560])


Processing audio:  31%|███▏      | 629/2000 [08:52<27:17,  1.19s/it]

torch.Size([1, 71040])


Processing audio:  32%|███▏      | 630/2000 [08:52<21:55,  1.04it/s]

torch.Size([1, 104960])


Processing audio:  32%|███▏      | 631/2000 [08:53<19:20,  1.18it/s]

torch.Size([1, 108640])


Processing audio:  32%|███▏      | 633/2000 [08:53<13:26,  1.70it/s]

torch.Size([1, 49920])
torch.Size([1, 146400])


Processing audio:  32%|███▏      | 634/2000 [08:54<13:42,  1.66it/s]

torch.Size([1, 93440])


Processing audio:  32%|███▏      | 635/2000 [08:55<13:07,  1.73it/s]

torch.Size([1, 80640])


Processing audio:  32%|███▏      | 636/2000 [08:55<11:18,  2.01it/s]

torch.Size([1, 168640])


Processing audio:  32%|███▏      | 637/2000 [08:56<14:59,  1.52it/s]

torch.Size([1, 157920])


Processing audio:  32%|███▏      | 638/2000 [08:57<17:58,  1.26it/s]

torch.Size([1, 201120])


Processing audio:  32%|███▏      | 639/2000 [08:58<21:51,  1.04it/s]

torch.Size([1, 514880])


Processing audio:  32%|███▏      | 640/2000 [09:00<28:34,  1.26s/it]

torch.Size([1, 45120])


Processing audio:  32%|███▏      | 641/2000 [09:01<21:50,  1.04it/s]

torch.Size([1, 109920])


Processing audio:  32%|███▏      | 642/2000 [09:01<19:01,  1.19it/s]

torch.Size([1, 114720])


Processing audio:  32%|███▏      | 643/2000 [09:02<16:53,  1.34it/s]

torch.Size([1, 128160])


Processing audio:  32%|███▏      | 644/2000 [09:02<14:35,  1.55it/s]

torch.Size([1, 170400])


Processing audio:  32%|███▏      | 645/2000 [09:03<15:37,  1.44it/s]

torch.Size([1, 217920])


Processing audio:  32%|███▏      | 646/2000 [09:04<18:38,  1.21it/s]

torch.Size([1, 192640])


Processing audio:  32%|███▏      | 647/2000 [09:05<19:32,  1.15it/s]

torch.Size([1, 74880])


Processing audio:  32%|███▏      | 648/2000 [09:05<16:01,  1.41it/s]

torch.Size([1, 170560])


Processing audio:  32%|███▏      | 649/2000 [09:06<17:17,  1.30it/s]

torch.Size([1, 204160])


Processing audio:  32%|███▎      | 650/2000 [09:07<18:57,  1.19it/s]

torch.Size([1, 147360])


Processing audio:  33%|███▎      | 651/2000 [09:08<18:32,  1.21it/s]

torch.Size([1, 27840])
torch.Size([1, 110560])


Processing audio:  33%|███▎      | 653/2000 [09:09<14:01,  1.60it/s]

torch.Size([1, 139520])


Processing audio:  33%|███▎      | 654/2000 [09:10<15:23,  1.46it/s]

torch.Size([1, 169120])


Processing audio:  33%|███▎      | 655/2000 [09:11<18:24,  1.22it/s]

torch.Size([1, 109600])


Processing audio:  33%|███▎      | 656/2000 [09:12<17:37,  1.27it/s]

torch.Size([1, 576640])


Processing audio:  33%|███▎      | 657/2000 [09:14<30:30,  1.36s/it]

torch.Size([1, 102400])


Processing audio:  33%|███▎      | 658/2000 [09:15<24:30,  1.10s/it]

torch.Size([1, 220480])


Processing audio:  33%|███▎      | 659/2000 [09:16<24:30,  1.10s/it]

torch.Size([1, 254720])


Processing audio:  33%|███▎      | 660/2000 [09:17<25:59,  1.16s/it]

torch.Size([1, 79520])


Processing audio:  33%|███▎      | 661/2000 [09:18<20:25,  1.09it/s]

torch.Size([1, 168640])


Processing audio:  33%|███▎      | 662/2000 [09:18<19:11,  1.16it/s]

torch.Size([1, 153920])


Processing audio:  33%|███▎      | 663/2000 [09:19<18:21,  1.21it/s]

torch.Size([1, 113120])


Processing audio:  33%|███▎      | 664/2000 [09:20<17:39,  1.26it/s]

torch.Size([1, 65600])


Processing audio:  33%|███▎      | 665/2000 [09:20<14:26,  1.54it/s]

torch.Size([1, 45440])


Processing audio:  33%|███▎      | 666/2000 [09:21<12:34,  1.77it/s]

torch.Size([1, 44480])


Processing audio:  33%|███▎      | 667/2000 [09:21<10:34,  2.10it/s]

torch.Size([1, 190080])


Processing audio:  33%|███▎      | 669/2000 [09:22<12:07,  1.83it/s]

torch.Size([1, 26720])
torch.Size([1, 147040])


Processing audio:  34%|███▎      | 670/2000 [09:23<16:25,  1.35it/s]

torch.Size([1, 112160])


Processing audio:  34%|███▎      | 671/2000 [09:24<18:23,  1.20it/s]

torch.Size([1, 212640])


Processing audio:  34%|███▎      | 672/2000 [09:25<19:38,  1.13it/s]

torch.Size([1, 280160])


Processing audio:  34%|███▎      | 673/2000 [09:27<21:57,  1.01it/s]

torch.Size([1, 41120])


Processing audio:  34%|███▎      | 674/2000 [09:27<17:05,  1.29it/s]

torch.Size([1, 151200])


Processing audio:  34%|███▍      | 675/2000 [09:28<17:07,  1.29it/s]

torch.Size([1, 88160])


Processing audio:  34%|███▍      | 676/2000 [09:28<15:22,  1.44it/s]

torch.Size([1, 90720])


Processing audio:  34%|███▍      | 677/2000 [09:29<13:59,  1.58it/s]

torch.Size([1, 120960])


Processing audio:  34%|███▍      | 678/2000 [09:29<14:10,  1.55it/s]

torch.Size([1, 111360])


Processing audio:  34%|███▍      | 679/2000 [09:30<14:09,  1.56it/s]

torch.Size([1, 85280])


Processing audio:  34%|███▍      | 680/2000 [09:31<12:57,  1.70it/s]

torch.Size([1, 158400])


Processing audio:  34%|███▍      | 681/2000 [09:31<13:21,  1.64it/s]

torch.Size([1, 146240])


Processing audio:  34%|███▍      | 682/2000 [09:32<14:01,  1.57it/s]

torch.Size([1, 191040])


Processing audio:  34%|███▍      | 683/2000 [09:33<14:26,  1.52it/s]

torch.Size([1, 79680])


Processing audio:  34%|███▍      | 684/2000 [09:33<13:35,  1.61it/s]

torch.Size([1, 240800])


Processing audio:  34%|███▍      | 685/2000 [09:34<16:43,  1.31it/s]

torch.Size([1, 209120])


Processing audio:  34%|███▍      | 686/2000 [09:36<22:00,  1.00s/it]

torch.Size([1, 157280])


Processing audio:  34%|███▍      | 687/2000 [09:37<22:51,  1.04s/it]

torch.Size([1, 389600])


Processing audio:  34%|███▍      | 688/2000 [09:39<31:00,  1.42s/it]

torch.Size([1, 322880])


Processing audio:  34%|███▍      | 689/2000 [09:41<32:55,  1.51s/it]

torch.Size([1, 114400])


Processing audio:  34%|███▍      | 690/2000 [09:42<27:35,  1.26s/it]

torch.Size([1, 345440])


Processing audio:  35%|███▍      | 691/2000 [09:43<31:04,  1.42s/it]

torch.Size([1, 167200])


Processing audio:  35%|███▍      | 692/2000 [09:44<28:35,  1.31s/it]

torch.Size([1, 98560])


Processing audio:  35%|███▍      | 693/2000 [09:45<22:43,  1.04s/it]

torch.Size([1, 184960])


Processing audio:  35%|███▍      | 694/2000 [09:46<21:04,  1.03it/s]

torch.Size([1, 306720])


Processing audio:  35%|███▍      | 695/2000 [09:47<25:14,  1.16s/it]

torch.Size([1, 132000])


Processing audio:  35%|███▍      | 696/2000 [09:48<22:24,  1.03s/it]

torch.Size([1, 54880])


Processing audio:  35%|███▍      | 697/2000 [09:48<18:43,  1.16it/s]

torch.Size([1, 150080])


Processing audio:  35%|███▍      | 698/2000 [09:50<20:31,  1.06it/s]

torch.Size([1, 85760])


Processing audio:  35%|███▍      | 699/2000 [09:50<18:43,  1.16it/s]

torch.Size([1, 118400])


Processing audio:  35%|███▌      | 700/2000 [09:51<17:00,  1.27it/s]

torch.Size([1, 88800])


Processing audio:  35%|███▌      | 702/2000 [09:52<11:17,  1.92it/s]

torch.Size([1, 29440])
torch.Size([1, 268800])


Processing audio:  35%|███▌      | 703/2000 [09:53<15:45,  1.37it/s]

torch.Size([1, 218720])


Processing audio:  35%|███▌      | 704/2000 [09:54<18:15,  1.18it/s]

torch.Size([1, 46400])


Processing audio:  35%|███▌      | 705/2000 [09:54<14:13,  1.52it/s]

torch.Size([1, 130720])


Processing audio:  35%|███▌      | 706/2000 [09:55<13:25,  1.61it/s]

torch.Size([1, 104640])


Processing audio:  35%|███▌      | 707/2000 [09:55<11:55,  1.81it/s]

torch.Size([1, 111040])


Processing audio:  35%|███▌      | 708/2000 [09:55<11:28,  1.88it/s]

torch.Size([1, 296320])


Processing audio:  35%|███▌      | 709/2000 [09:57<17:48,  1.21it/s]

torch.Size([1, 124960])


Processing audio:  36%|███▌      | 710/2000 [09:58<18:08,  1.19it/s]

torch.Size([1, 190240])


Processing audio:  36%|███▌      | 711/2000 [09:59<18:31,  1.16it/s]

torch.Size([1, 240000])


Processing audio:  36%|███▌      | 713/2000 [10:00<14:24,  1.49it/s]

torch.Size([1, 32160])
torch.Size([1, 319520])


Processing audio:  36%|███▌      | 714/2000 [10:02<21:24,  1.00it/s]

torch.Size([1, 124160])


Processing audio:  36%|███▌      | 715/2000 [10:02<18:32,  1.16it/s]

torch.Size([1, 311200])


Processing audio:  36%|███▌      | 716/2000 [10:04<24:15,  1.13s/it]

torch.Size([1, 324320])


Processing audio:  36%|███▌      | 717/2000 [10:06<27:29,  1.29s/it]

torch.Size([1, 56000])


Processing audio:  36%|███▌      | 718/2000 [10:06<21:31,  1.01s/it]

torch.Size([1, 87680])


Processing audio:  36%|███▌      | 719/2000 [10:06<18:20,  1.16it/s]

torch.Size([1, 83360])


Processing audio:  36%|███▌      | 720/2000 [10:07<16:04,  1.33it/s]

torch.Size([1, 93120])


Processing audio:  36%|███▌      | 721/2000 [10:07<13:49,  1.54it/s]

torch.Size([1, 121600])


Processing audio:  36%|███▌      | 722/2000 [10:08<12:57,  1.64it/s]

torch.Size([1, 56160])


Processing audio:  36%|███▌      | 723/2000 [10:08<10:49,  1.97it/s]

torch.Size([1, 119520])


Processing audio:  36%|███▌      | 724/2000 [10:09<11:49,  1.80it/s]

torch.Size([1, 116160])


Processing audio:  36%|███▋      | 725/2000 [10:09<11:47,  1.80it/s]

torch.Size([1, 159040])


Processing audio:  36%|███▋      | 726/2000 [10:10<13:19,  1.59it/s]

torch.Size([1, 70560])


Processing audio:  36%|███▋      | 727/2000 [10:11<11:51,  1.79it/s]

torch.Size([1, 42880])


Processing audio:  36%|███▋      | 728/2000 [10:11<09:57,  2.13it/s]

torch.Size([1, 183040])


Processing audio:  36%|███▋      | 729/2000 [10:12<12:21,  1.71it/s]

torch.Size([1, 105760])


Processing audio:  36%|███▋      | 730/2000 [10:12<12:59,  1.63it/s]

torch.Size([1, 259200])


Processing audio:  37%|███▋      | 731/2000 [10:13<16:07,  1.31it/s]

torch.Size([1, 660640])


Processing audio:  37%|███▋      | 732/2000 [10:20<51:53,  2.46s/it]

torch.Size([1, 97600])


Processing audio:  37%|███▋      | 733/2000 [10:20<39:15,  1.86s/it]

torch.Size([1, 1013760])


Processing audio:  37%|███▋      | 734/2000 [10:28<1:14:57,  3.55s/it]

torch.Size([1, 111680])


Processing audio:  37%|███▋      | 735/2000 [10:29<57:22,  2.72s/it]  

torch.Size([1, 173600])


Processing audio:  37%|███▋      | 736/2000 [10:30<47:18,  2.25s/it]

torch.Size([1, 60640])


Processing audio:  37%|███▋      | 738/2000 [10:30<25:36,  1.22s/it]

torch.Size([1, 19200])
torch.Size([1, 55040])


Processing audio:  37%|███▋      | 739/2000 [10:31<19:49,  1.06it/s]

torch.Size([1, 128960])


Processing audio:  37%|███▋      | 740/2000 [10:31<16:51,  1.25it/s]

torch.Size([1, 135360])


Processing audio:  37%|███▋      | 742/2000 [10:32<12:44,  1.65it/s]

torch.Size([1, 29760])
torch.Size([1, 207200])


Processing audio:  37%|███▋      | 743/2000 [10:33<15:36,  1.34it/s]

torch.Size([1, 150240])


Processing audio:  37%|███▋      | 744/2000 [10:34<15:41,  1.33it/s]

torch.Size([1, 118400])


Processing audio:  37%|███▋      | 745/2000 [10:35<16:27,  1.27it/s]

torch.Size([1, 141600])


Processing audio:  37%|███▋      | 746/2000 [10:35<15:43,  1.33it/s]

torch.Size([1, 256960])


Processing audio:  37%|███▋      | 747/2000 [10:37<19:15,  1.08it/s]

torch.Size([1, 150080])


Processing audio:  37%|███▋      | 748/2000 [10:38<18:53,  1.10it/s]

torch.Size([1, 76800])


Processing audio:  37%|███▋      | 749/2000 [10:38<16:01,  1.30it/s]

torch.Size([1, 61760])


Processing audio:  38%|███▊      | 750/2000 [10:38<13:05,  1.59it/s]

torch.Size([1, 185600])


Processing audio:  38%|███▊      | 751/2000 [10:39<13:48,  1.51it/s]

torch.Size([1, 164480])


Processing audio:  38%|███▊      | 752/2000 [10:40<17:09,  1.21it/s]

torch.Size([1, 162880])


Processing audio:  38%|███▊      | 753/2000 [10:41<18:17,  1.14it/s]

torch.Size([1, 121920])


Processing audio:  38%|███▊      | 754/2000 [10:42<19:13,  1.08it/s]

torch.Size([1, 215200])


Processing audio:  38%|███▊      | 755/2000 [10:44<21:23,  1.03s/it]

torch.Size([1, 138720])


Processing audio:  38%|███▊      | 756/2000 [10:44<19:45,  1.05it/s]

torch.Size([1, 61600])


Processing audio:  38%|███▊      | 757/2000 [10:45<15:28,  1.34it/s]

torch.Size([1, 257600])


Processing audio:  38%|███▊      | 758/2000 [10:46<18:22,  1.13it/s]

torch.Size([1, 166080])


Processing audio:  38%|███▊      | 759/2000 [10:47<19:05,  1.08it/s]

torch.Size([1, 64480])


Processing audio:  38%|███▊      | 760/2000 [10:47<15:47,  1.31it/s]

torch.Size([1, 471840])


Processing audio:  38%|███▊      | 761/2000 [10:50<26:14,  1.27s/it]

torch.Size([1, 137600])


Processing audio:  38%|███▊      | 762/2000 [10:50<22:42,  1.10s/it]

torch.Size([1, 120320])


Processing audio:  38%|███▊      | 763/2000 [10:51<19:53,  1.04it/s]

torch.Size([1, 258560])


Processing audio:  38%|███▊      | 764/2000 [10:53<23:19,  1.13s/it]

torch.Size([1, 145440])


Processing audio:  38%|███▊      | 765/2000 [10:54<23:28,  1.14s/it]

torch.Size([1, 338720])


Processing audio:  38%|███▊      | 766/2000 [10:56<30:25,  1.48s/it]

torch.Size([1, 87520])


Processing audio:  38%|███▊      | 767/2000 [10:56<23:29,  1.14s/it]

torch.Size([1, 85600])


Processing audio:  38%|███▊      | 769/2000 [10:57<14:32,  1.41it/s]

torch.Size([1, 115840])
torch.Size([1, 288960])


Processing audio:  38%|███▊      | 770/2000 [10:59<21:21,  1.04s/it]

torch.Size([1, 81280])


Processing audio:  39%|███▊      | 771/2000 [10:59<17:49,  1.15it/s]

torch.Size([1, 116480])


Processing audio:  39%|███▊      | 772/2000 [11:00<15:39,  1.31it/s]

torch.Size([1, 236320])


Processing audio:  39%|███▊      | 774/2000 [11:01<14:23,  1.42it/s]

torch.Size([1, 29440])
torch.Size([1, 147680])


Processing audio:  39%|███▉      | 775/2000 [11:02<14:23,  1.42it/s]

torch.Size([1, 216640])


Processing audio:  39%|███▉      | 776/2000 [11:03<16:15,  1.25it/s]

torch.Size([1, 49440])


Processing audio:  39%|███▉      | 777/2000 [11:03<13:17,  1.53it/s]

torch.Size([1, 108800])


Processing audio:  39%|███▉      | 778/2000 [11:04<12:47,  1.59it/s]

torch.Size([1, 160640])


Processing audio:  39%|███▉      | 779/2000 [11:05<13:25,  1.52it/s]

torch.Size([1, 87200])


Processing audio:  39%|███▉      | 780/2000 [11:05<13:10,  1.54it/s]

torch.Size([1, 322080])


Processing audio:  39%|███▉      | 781/2000 [11:07<22:10,  1.09s/it]

torch.Size([1, 142000])


Processing audio:  39%|███▉      | 782/2000 [11:08<21:39,  1.07s/it]

torch.Size([1, 121920])


Processing audio:  39%|███▉      | 783/2000 [11:09<20:18,  1.00s/it]

torch.Size([1, 15040])
torch.Size([1, 141920])


Processing audio:  39%|███▉      | 785/2000 [11:10<14:28,  1.40it/s]

torch.Size([1, 128320])


Processing audio:  39%|███▉      | 786/2000 [11:11<14:51,  1.36it/s]

torch.Size([1, 47840])


Processing audio:  39%|███▉      | 787/2000 [11:11<12:25,  1.63it/s]

torch.Size([1, 345280])


Processing audio:  39%|███▉      | 788/2000 [11:13<20:19,  1.01s/it]

torch.Size([1, 81280])


Processing audio:  39%|███▉      | 789/2000 [11:14<17:35,  1.15it/s]

torch.Size([1, 157120])


Processing audio:  40%|███▉      | 790/2000 [11:15<18:36,  1.08it/s]

torch.Size([1, 197440])


Processing audio:  40%|███▉      | 791/2000 [11:16<19:24,  1.04it/s]

torch.Size([1, 157120])


Processing audio:  40%|███▉      | 792/2000 [11:17<18:40,  1.08it/s]

torch.Size([1, 60960])


Processing audio:  40%|███▉      | 793/2000 [11:17<15:19,  1.31it/s]

torch.Size([1, 327040])


Processing audio:  40%|███▉      | 794/2000 [11:18<19:42,  1.02it/s]

torch.Size([1, 121760])


Processing audio:  40%|███▉      | 795/2000 [11:19<19:02,  1.05it/s]

torch.Size([1, 162240])


Processing audio:  40%|███▉      | 796/2000 [11:21<20:52,  1.04s/it]

torch.Size([1, 113920])


Processing audio:  40%|███▉      | 797/2000 [11:22<19:57,  1.00it/s]

torch.Size([1, 27200])


Processing audio:  40%|███▉      | 798/2000 [11:22<15:51,  1.26it/s]

torch.Size([1, 133120])


Processing audio:  40%|███▉      | 799/2000 [11:23<15:23,  1.30it/s]

torch.Size([1, 128000])


Processing audio:  40%|████      | 800/2000 [11:23<15:41,  1.27it/s]

torch.Size([1, 78400])


Processing audio:  40%|████      | 801/2000 [11:24<13:24,  1.49it/s]

torch.Size([1, 133120])


Processing audio:  40%|████      | 802/2000 [11:24<13:15,  1.51it/s]

torch.Size([1, 331840])


Processing audio:  40%|████      | 803/2000 [11:26<18:37,  1.07it/s]

torch.Size([1, 225280])


Processing audio:  40%|████      | 804/2000 [11:27<18:32,  1.07it/s]

torch.Size([1, 75680])


Processing audio:  40%|████      | 805/2000 [11:27<15:30,  1.28it/s]

torch.Size([1, 127200])


Processing audio:  40%|████      | 806/2000 [11:28<15:24,  1.29it/s]

torch.Size([1, 69440])


Processing audio:  40%|████      | 807/2000 [11:28<13:08,  1.51it/s]

torch.Size([1, 133600])


Processing audio:  40%|████      | 808/2000 [11:29<13:38,  1.46it/s]

torch.Size([1, 51040])


Processing audio:  40%|████      | 809/2000 [11:30<11:25,  1.74it/s]

torch.Size([1, 193440])


Processing audio:  40%|████      | 810/2000 [11:31<13:45,  1.44it/s]

torch.Size([1, 641920])


Processing audio:  41%|████      | 811/2000 [11:35<35:53,  1.81s/it]

torch.Size([1, 222240])


Processing audio:  41%|████      | 812/2000 [11:36<32:36,  1.65s/it]

torch.Size([1, 141760])


Processing audio:  41%|████      | 813/2000 [11:37<27:44,  1.40s/it]

torch.Size([1, 212320])


Processing audio:  41%|████      | 814/2000 [11:38<26:12,  1.33s/it]

torch.Size([1, 264320])


Processing audio:  41%|████      | 815/2000 [11:39<26:04,  1.32s/it]

torch.Size([1, 77760])


Processing audio:  41%|████      | 816/2000 [11:40<20:50,  1.06s/it]

torch.Size([1, 194400])


Processing audio:  41%|████      | 817/2000 [11:41<18:45,  1.05it/s]

torch.Size([1, 36000])


Processing audio:  41%|████      | 818/2000 [11:41<14:45,  1.33it/s]

torch.Size([1, 336480])


Processing audio:  41%|████      | 819/2000 [11:43<21:49,  1.11s/it]

torch.Size([1, 43040])


Processing audio:  41%|████      | 820/2000 [11:43<17:01,  1.16it/s]

torch.Size([1, 148000])


Processing audio:  41%|████      | 821/2000 [11:44<16:25,  1.20it/s]

torch.Size([1, 133920])


Processing audio:  41%|████      | 822/2000 [11:45<15:32,  1.26it/s]

torch.Size([1, 64320])


Processing audio:  41%|████      | 824/2000 [11:45<10:25,  1.88it/s]

torch.Size([1, 24800])
torch.Size([1, 348320])


Processing audio:  41%|████▏     | 825/2000 [11:48<23:03,  1.18s/it]

torch.Size([1, 182400])


Processing audio:  41%|████▏     | 826/2000 [11:49<21:51,  1.12s/it]

torch.Size([1, 614240])


Processing audio:  41%|████▏     | 827/2000 [11:54<48:11,  2.46s/it]

torch.Size([1, 319680])


Processing audio:  41%|████▏     | 828/2000 [11:56<44:38,  2.29s/it]

torch.Size([1, 64800])


Processing audio:  41%|████▏     | 829/2000 [11:57<33:23,  1.71s/it]

torch.Size([1, 363840])


Processing audio:  42%|████▏     | 830/2000 [11:59<34:53,  1.79s/it]

torch.Size([1, 28640])


Processing audio:  42%|████▏     | 831/2000 [11:59<25:54,  1.33s/it]

torch.Size([1, 314560])


Processing audio:  42%|████▏     | 832/2000 [12:01<30:43,  1.58s/it]

torch.Size([1, 118080])


Processing audio:  42%|████▏     | 833/2000 [12:02<25:09,  1.29s/it]

torch.Size([1, 152800])


Processing audio:  42%|████▏     | 834/2000 [12:03<22:10,  1.14s/it]

torch.Size([1, 59520])


Processing audio:  42%|████▏     | 835/2000 [12:03<17:39,  1.10it/s]

torch.Size([1, 283520])


Processing audio:  42%|████▏     | 836/2000 [12:04<19:52,  1.02s/it]

torch.Size([1, 106240])


Processing audio:  42%|████▏     | 837/2000 [12:05<17:32,  1.11it/s]

torch.Size([1, 322560])


Processing audio:  42%|████▏     | 838/2000 [12:06<20:52,  1.08s/it]

torch.Size([1, 466720])


Processing audio:  42%|████▏     | 839/2000 [12:09<29:36,  1.53s/it]

torch.Size([1, 59840])


Processing audio:  42%|████▏     | 840/2000 [12:09<23:14,  1.20s/it]

torch.Size([1, 182240])


Processing audio:  42%|████▏     | 841/2000 [12:10<21:45,  1.13s/it]

torch.Size([1, 47040])


Processing audio:  42%|████▏     | 842/2000 [12:11<17:14,  1.12it/s]

torch.Size([1, 121120])


Processing audio:  42%|████▏     | 843/2000 [12:12<17:26,  1.11it/s]

torch.Size([1, 303360])


Processing audio:  42%|████▏     | 844/2000 [12:14<25:21,  1.32s/it]

torch.Size([1, 134720])


Processing audio:  42%|████▏     | 845/2000 [12:15<21:52,  1.14s/it]

torch.Size([1, 140960])


Processing audio:  42%|████▏     | 847/2000 [12:15<14:36,  1.32it/s]

torch.Size([1, 26720])
torch.Size([1, 197920])


Processing audio:  42%|████▏     | 848/2000 [12:16<16:02,  1.20it/s]

torch.Size([1, 70720])


Processing audio:  42%|████▏     | 849/2000 [12:17<13:05,  1.47it/s]

torch.Size([1, 113120])


Processing audio:  42%|████▎     | 850/2000 [12:17<12:51,  1.49it/s]

torch.Size([1, 221920])


Processing audio:  43%|████▎     | 851/2000 [12:19<17:58,  1.07it/s]

torch.Size([1, 207680])


Processing audio:  43%|████▎     | 852/2000 [12:20<18:47,  1.02it/s]

torch.Size([1, 172000])


Processing audio:  43%|████▎     | 853/2000 [12:21<16:31,  1.16it/s]

torch.Size([1, 207040])


Processing audio:  43%|████▎     | 854/2000 [12:22<17:17,  1.10it/s]

torch.Size([1, 105440])


Processing audio:  43%|████▎     | 855/2000 [12:22<15:09,  1.26it/s]

torch.Size([1, 123040])


Processing audio:  43%|████▎     | 856/2000 [12:23<13:54,  1.37it/s]

torch.Size([1, 200480])


Processing audio:  43%|████▎     | 857/2000 [12:24<16:11,  1.18it/s]

torch.Size([1, 102880])


Processing audio:  43%|████▎     | 858/2000 [12:25<15:56,  1.19it/s]

torch.Size([1, 114080])


Processing audio:  43%|████▎     | 859/2000 [12:25<15:33,  1.22it/s]

torch.Size([1, 180320])


Processing audio:  43%|████▎     | 860/2000 [12:27<18:17,  1.04it/s]

torch.Size([1, 66560])


Processing audio:  43%|████▎     | 861/2000 [12:27<15:40,  1.21it/s]

torch.Size([1, 125120])


Processing audio:  43%|████▎     | 862/2000 [12:28<14:32,  1.30it/s]

torch.Size([1, 279840])


Processing audio:  43%|████▎     | 863/2000 [12:29<17:12,  1.10it/s]

torch.Size([1, 143520])


Processing audio:  43%|████▎     | 864/2000 [12:30<16:01,  1.18it/s]

torch.Size([1, 320320])


Processing audio:  43%|████▎     | 865/2000 [12:32<20:33,  1.09s/it]

torch.Size([1, 130400])


Processing audio:  43%|████▎     | 866/2000 [12:32<18:08,  1.04it/s]

torch.Size([1, 40160])


Processing audio:  43%|████▎     | 867/2000 [12:32<13:56,  1.36it/s]

torch.Size([1, 137920])


Processing audio:  43%|████▎     | 868/2000 [12:33<13:50,  1.36it/s]

torch.Size([1, 219520])


Processing audio:  43%|████▎     | 869/2000 [12:34<15:37,  1.21it/s]

torch.Size([1, 37280])


Processing audio:  44%|████▎     | 870/2000 [12:34<12:28,  1.51it/s]

torch.Size([1, 100800])


Processing audio:  44%|████▎     | 871/2000 [12:35<11:43,  1.60it/s]

torch.Size([1, 104800])


Processing audio:  44%|████▎     | 872/2000 [12:36<11:22,  1.65it/s]

torch.Size([1, 194560])


Processing audio:  44%|████▎     | 873/2000 [12:36<12:54,  1.46it/s]

torch.Size([1, 156160])


Processing audio:  44%|████▎     | 874/2000 [12:37<14:00,  1.34it/s]

torch.Size([1, 72800])


Processing audio:  44%|████▍     | 875/2000 [12:38<13:58,  1.34it/s]

torch.Size([1, 44320])


Processing audio:  44%|████▍     | 876/2000 [12:38<11:37,  1.61it/s]

torch.Size([1, 48960])


Processing audio:  44%|████▍     | 877/2000 [12:39<11:06,  1.68it/s]

torch.Size([1, 198240])


Processing audio:  44%|████▍     | 878/2000 [12:40<15:56,  1.17it/s]

torch.Size([1, 101280])


Processing audio:  44%|████▍     | 879/2000 [12:41<14:05,  1.33it/s]

torch.Size([1, 97920])


Processing audio:  44%|████▍     | 880/2000 [12:41<12:07,  1.54it/s]

torch.Size([1, 53280])


Processing audio:  44%|████▍     | 881/2000 [12:42<10:48,  1.73it/s]

torch.Size([1, 113600])


Processing audio:  44%|████▍     | 882/2000 [12:42<10:44,  1.74it/s]

torch.Size([1, 116160])


Processing audio:  44%|████▍     | 883/2000 [12:43<10:49,  1.72it/s]

torch.Size([1, 426080])


Processing audio:  44%|████▍     | 884/2000 [12:45<19:11,  1.03s/it]

torch.Size([1, 46400])


Processing audio:  44%|████▍     | 885/2000 [12:45<15:05,  1.23it/s]

torch.Size([1, 85440])


Processing audio:  44%|████▍     | 886/2000 [12:46<13:31,  1.37it/s]

torch.Size([1, 100640])


Processing audio:  44%|████▍     | 887/2000 [12:46<11:16,  1.64it/s]

torch.Size([1, 233120])


Processing audio:  44%|████▍     | 888/2000 [12:47<15:00,  1.23it/s]

torch.Size([1, 152480])


Processing audio:  44%|████▍     | 889/2000 [12:48<14:59,  1.23it/s]

torch.Size([1, 37440])


Processing audio:  44%|████▍     | 890/2000 [12:48<11:54,  1.55it/s]

torch.Size([1, 338560])


Processing audio:  45%|████▍     | 891/2000 [12:50<17:19,  1.07it/s]

torch.Size([1, 74080])


Processing audio:  45%|████▍     | 892/2000 [12:50<14:20,  1.29it/s]

torch.Size([1, 124000])


Processing audio:  45%|████▍     | 893/2000 [12:51<14:44,  1.25it/s]

torch.Size([1, 428160])


Processing audio:  45%|████▍     | 894/2000 [12:54<22:46,  1.24s/it]

torch.Size([1, 60640])


Processing audio:  45%|████▍     | 895/2000 [12:54<17:50,  1.03it/s]

torch.Size([1, 188960])


Processing audio:  45%|████▍     | 896/2000 [12:55<17:08,  1.07it/s]

torch.Size([1, 53440])


Processing audio:  45%|████▍     | 897/2000 [12:55<13:43,  1.34it/s]

torch.Size([1, 199040])


Processing audio:  45%|████▍     | 898/2000 [12:56<15:29,  1.19it/s]

torch.Size([1, 203520])


Processing audio:  45%|████▍     | 899/2000 [12:57<16:39,  1.10it/s]

torch.Size([1, 172000])


Processing audio:  45%|████▌     | 900/2000 [12:58<15:18,  1.20it/s]

torch.Size([1, 101120])


Processing audio:  45%|████▌     | 901/2000 [12:58<13:33,  1.35it/s]

torch.Size([1, 157760])


Processing audio:  45%|████▌     | 902/2000 [12:59<14:07,  1.30it/s]

torch.Size([1, 103520])


Processing audio:  45%|████▌     | 903/2000 [13:00<13:10,  1.39it/s]

torch.Size([1, 130560])


Processing audio:  45%|████▌     | 904/2000 [13:00<12:45,  1.43it/s]

torch.Size([1, 75200])


Processing audio:  45%|████▌     | 905/2000 [13:01<10:46,  1.69it/s]

torch.Size([1, 91200])


Processing audio:  45%|████▌     | 906/2000 [13:01<10:28,  1.74it/s]

torch.Size([1, 223680])


Processing audio:  45%|████▌     | 907/2000 [13:02<13:19,  1.37it/s]

torch.Size([1, 59680])


Processing audio:  45%|████▌     | 908/2000 [13:03<11:30,  1.58it/s]

torch.Size([1, 154720])


Processing audio:  45%|████▌     | 909/2000 [13:04<11:49,  1.54it/s]

torch.Size([1, 106080])


Processing audio:  46%|████▌     | 911/2000 [13:05<09:49,  1.85it/s]

torch.Size([1, 17120])
torch.Size([1, 112640])


Processing audio:  46%|████▌     | 912/2000 [13:05<11:27,  1.58it/s]

torch.Size([1, 100160])


Processing audio:  46%|████▌     | 913/2000 [13:06<11:52,  1.53it/s]

torch.Size([1, 60320])


Processing audio:  46%|████▌     | 914/2000 [13:07<11:07,  1.63it/s]

torch.Size([1, 147840])


Processing audio:  46%|████▌     | 915/2000 [13:07<11:38,  1.55it/s]

torch.Size([1, 38240])


Processing audio:  46%|████▌     | 916/2000 [13:08<09:22,  1.93it/s]

torch.Size([1, 278400])


Processing audio:  46%|████▌     | 917/2000 [13:09<13:04,  1.38it/s]

torch.Size([1, 310720])


Processing audio:  46%|████▌     | 919/2000 [13:11<13:22,  1.35it/s]

torch.Size([1, 29280])
torch.Size([1, 108480])


Processing audio:  46%|████▌     | 920/2000 [13:11<12:11,  1.48it/s]

torch.Size([1, 108960])


Processing audio:  46%|████▌     | 921/2000 [13:12<11:36,  1.55it/s]

torch.Size([1, 214560])


Processing audio:  46%|████▌     | 922/2000 [13:13<13:58,  1.29it/s]

torch.Size([1, 384160])


Processing audio:  46%|████▌     | 924/2000 [13:15<15:07,  1.19it/s]

torch.Size([1, 67040])
torch.Size([1, 142400])


Processing audio:  46%|████▋     | 925/2000 [13:15<13:22,  1.34it/s]

torch.Size([1, 169760])


Processing audio:  46%|████▋     | 926/2000 [13:16<14:06,  1.27it/s]

torch.Size([1, 117920])


Processing audio:  46%|████▋     | 927/2000 [13:17<13:54,  1.29it/s]

torch.Size([1, 161760])


Processing audio:  46%|████▋     | 928/2000 [13:18<12:58,  1.38it/s]

torch.Size([1, 181440])


Processing audio:  46%|████▋     | 929/2000 [13:19<15:53,  1.12it/s]

torch.Size([1, 89920])
torch.Size([1, 159680])


Processing audio:  47%|████▋     | 931/2000 [13:20<13:28,  1.32it/s]

torch.Size([1, 109600])


Processing audio:  47%|████▋     | 932/2000 [13:21<12:08,  1.47it/s]

torch.Size([1, 82080])


Processing audio:  47%|████▋     | 933/2000 [13:21<11:05,  1.60it/s]

torch.Size([1, 32320])


Processing audio:  47%|████▋     | 934/2000 [13:21<09:12,  1.93it/s]

torch.Size([1, 86400])


Processing audio:  47%|████▋     | 936/2000 [13:22<07:19,  2.42it/s]

torch.Size([1, 42240])
torch.Size([1, 75680])
torch.Size([1, 197280])


Processing audio:  47%|████▋     | 938/2000 [13:23<08:04,  2.19it/s]

torch.Size([1, 95520])


Processing audio:  47%|████▋     | 939/2000 [13:24<08:50,  2.00it/s]

torch.Size([1, 80480])


Processing audio:  47%|████▋     | 940/2000 [13:24<08:57,  1.97it/s]

torch.Size([1, 124000])


Processing audio:  47%|████▋     | 941/2000 [13:25<08:57,  1.97it/s]

torch.Size([1, 93920])


Processing audio:  47%|████▋     | 942/2000 [13:25<08:04,  2.18it/s]

torch.Size([1, 72800])


Processing audio:  47%|████▋     | 943/2000 [13:25<07:35,  2.32it/s]

torch.Size([1, 185760])


Processing audio:  47%|████▋     | 944/2000 [13:26<10:23,  1.69it/s]

torch.Size([1, 178080])


Processing audio:  47%|████▋     | 945/2000 [13:27<11:24,  1.54it/s]

torch.Size([1, 236800])


Processing audio:  47%|████▋     | 946/2000 [13:28<14:16,  1.23it/s]

torch.Size([1, 61600])


Processing audio:  47%|████▋     | 948/2000 [13:29<08:51,  1.98it/s]

torch.Size([1, 22720])
torch.Size([1, 166720])


Processing audio:  48%|████▊     | 950/2000 [13:30<08:17,  2.11it/s]

torch.Size([1, 23680])
torch.Size([1, 123520])


Processing audio:  48%|████▊     | 951/2000 [13:31<11:09,  1.57it/s]

torch.Size([1, 198720])


Processing audio:  48%|████▊     | 952/2000 [13:32<13:35,  1.28it/s]

torch.Size([1, 114080])


Processing audio:  48%|████▊     | 953/2000 [13:33<14:14,  1.22it/s]

torch.Size([1, 56320])


Processing audio:  48%|████▊     | 954/2000 [13:33<11:17,  1.54it/s]

torch.Size([1, 105600])


Processing audio:  48%|████▊     | 955/2000 [13:33<10:34,  1.65it/s]

torch.Size([1, 449440])


Processing audio:  48%|████▊     | 956/2000 [13:36<19:46,  1.14s/it]

torch.Size([1, 87200])


Processing audio:  48%|████▊     | 957/2000 [13:36<16:05,  1.08it/s]

torch.Size([1, 94560])


Processing audio:  48%|████▊     | 958/2000 [13:37<13:41,  1.27it/s]

torch.Size([1, 115520])


Processing audio:  48%|████▊     | 959/2000 [13:37<12:10,  1.42it/s]

torch.Size([1, 140640])


Processing audio:  48%|████▊     | 960/2000 [13:38<11:52,  1.46it/s]

torch.Size([1, 120640])


Processing audio:  48%|████▊     | 961/2000 [13:39<11:34,  1.50it/s]

torch.Size([1, 212960])


Processing audio:  48%|████▊     | 962/2000 [13:40<13:11,  1.31it/s]

torch.Size([1, 193120])


Processing audio:  48%|████▊     | 963/2000 [13:40<14:18,  1.21it/s]

torch.Size([1, 103040])


Processing audio:  48%|████▊     | 964/2000 [13:41<13:20,  1.29it/s]

torch.Size([1, 465440])


Processing audio:  48%|████▊     | 965/2000 [13:45<27:19,  1.58s/it]

torch.Size([1, 79360])


Processing audio:  48%|████▊     | 966/2000 [13:45<22:15,  1.29s/it]

torch.Size([1, 84480])


Processing audio:  48%|████▊     | 967/2000 [13:46<19:36,  1.14s/it]

torch.Size([1, 114400])


Processing audio:  48%|████▊     | 968/2000 [13:46<15:42,  1.10it/s]

torch.Size([1, 118880])


Processing audio:  48%|████▊     | 969/2000 [13:47<13:32,  1.27it/s]

torch.Size([1, 294080])


Processing audio:  48%|████▊     | 970/2000 [13:48<16:09,  1.06it/s]

torch.Size([1, 98720])


Processing audio:  49%|████▊     | 971/2000 [13:49<13:21,  1.28it/s]

torch.Size([1, 110080])


Processing audio:  49%|████▊     | 972/2000 [13:49<11:46,  1.45it/s]

torch.Size([1, 57760])


Processing audio:  49%|████▊     | 973/2000 [13:49<10:18,  1.66it/s]

torch.Size([1, 148480])


Processing audio:  49%|████▊     | 974/2000 [13:50<10:42,  1.60it/s]

torch.Size([1, 104640])


Processing audio:  49%|████▉     | 975/2000 [13:51<10:11,  1.68it/s]

torch.Size([1, 89440])


Processing audio:  49%|████▉     | 977/2000 [13:51<07:59,  2.14it/s]

torch.Size([1, 32480])
torch.Size([1, 72160])


Processing audio:  49%|████▉     | 978/2000 [13:52<07:30,  2.27it/s]

torch.Size([1, 17120])
torch.Size([1, 135520])


Processing audio:  49%|████▉     | 980/2000 [13:52<06:24,  2.65it/s]

torch.Size([1, 279200])


Processing audio:  49%|████▉     | 981/2000 [13:54<09:53,  1.72it/s]

torch.Size([1, 182400])


Processing audio:  49%|████▉     | 982/2000 [13:54<10:48,  1.57it/s]

torch.Size([1, 17120])
torch.Size([1, 120800])


Processing audio:  49%|████▉     | 984/2000 [13:55<08:54,  1.90it/s]

torch.Size([1, 222720])


Processing audio:  49%|████▉     | 985/2000 [13:57<14:44,  1.15it/s]

torch.Size([1, 110880])


Processing audio:  49%|████▉     | 986/2000 [13:58<14:20,  1.18it/s]

torch.Size([1, 106240])


Processing audio:  49%|████▉     | 987/2000 [13:59<14:35,  1.16it/s]

torch.Size([1, 74240])


Processing audio:  49%|████▉     | 988/2000 [13:59<12:28,  1.35it/s]

torch.Size([1, 168320])


Processing audio:  49%|████▉     | 989/2000 [14:00<12:27,  1.35it/s]

torch.Size([1, 344640])


Processing audio:  50%|████▉     | 990/2000 [14:02<16:08,  1.04it/s]

torch.Size([1, 150400])


Processing audio:  50%|████▉     | 992/2000 [14:02<11:18,  1.48it/s]

torch.Size([1, 37120])
torch.Size([1, 148960])


Processing audio:  50%|████▉     | 993/2000 [14:03<11:39,  1.44it/s]

torch.Size([1, 99520])


Processing audio:  50%|████▉     | 994/2000 [14:04<10:53,  1.54it/s]

torch.Size([1, 53920])


Processing audio:  50%|████▉     | 995/2000 [14:04<09:08,  1.83it/s]

torch.Size([1, 182560])


Processing audio:  50%|████▉     | 996/2000 [14:05<10:56,  1.53it/s]

torch.Size([1, 248640])


Processing audio:  50%|████▉     | 997/2000 [14:06<13:22,  1.25it/s]

torch.Size([1, 367680])


Processing audio:  50%|████▉     | 998/2000 [14:08<18:04,  1.08s/it]

torch.Size([1, 81920])


Processing audio:  50%|████▉     | 999/2000 [14:08<14:07,  1.18it/s]

torch.Size([1, 120160])


Processing audio:  50%|█████     | 1000/2000 [14:08<11:54,  1.40it/s]

torch.Size([1, 63040])


Processing audio:  50%|█████     | 1001/2000 [14:09<10:25,  1.60it/s]

torch.Size([1, 165760])


Processing audio:  50%|█████     | 1002/2000 [14:10<11:23,  1.46it/s]

torch.Size([1, 196800])


Processing audio:  50%|█████     | 1003/2000 [14:11<14:58,  1.11it/s]

torch.Size([1, 83360])


Processing audio:  50%|█████     | 1004/2000 [14:12<14:01,  1.18it/s]

torch.Size([1, 120320])


Processing audio:  50%|█████     | 1005/2000 [14:12<12:34,  1.32it/s]

torch.Size([1, 193280])


Processing audio:  50%|█████     | 1006/2000 [14:13<13:42,  1.21it/s]

torch.Size([1, 61120])


Processing audio:  50%|█████     | 1008/2000 [14:14<08:47,  1.88it/s]

torch.Size([1, 26720])
torch.Size([1, 315200])


Processing audio:  50%|█████     | 1009/2000 [14:15<13:39,  1.21it/s]

torch.Size([1, 185920])


Processing audio:  50%|█████     | 1010/2000 [14:16<14:58,  1.10it/s]

torch.Size([1, 357120])


Processing audio:  51%|█████     | 1011/2000 [14:18<18:52,  1.14s/it]

torch.Size([1, 114880])


Processing audio:  51%|█████     | 1012/2000 [14:19<15:41,  1.05it/s]

torch.Size([1, 381760])


Processing audio:  51%|█████     | 1013/2000 [14:20<18:52,  1.15s/it]

torch.Size([1, 199360])


Processing audio:  51%|█████     | 1014/2000 [14:21<18:18,  1.11s/it]

torch.Size([1, 111840])


Processing audio:  51%|█████     | 1015/2000 [14:22<15:40,  1.05it/s]

torch.Size([1, 135360])


Processing audio:  51%|█████     | 1016/2000 [14:23<14:44,  1.11it/s]

torch.Size([1, 293440])


Processing audio:  51%|█████     | 1017/2000 [14:25<20:18,  1.24s/it]

torch.Size([1, 193440])


Processing audio:  51%|█████     | 1018/2000 [14:26<18:54,  1.16s/it]

torch.Size([1, 195040])


Processing audio:  51%|█████     | 1019/2000 [14:27<17:39,  1.08s/it]

torch.Size([1, 227840])


Processing audio:  51%|█████     | 1020/2000 [14:28<18:05,  1.11s/it]

torch.Size([1, 166080])


Processing audio:  51%|█████     | 1021/2000 [14:29<16:26,  1.01s/it]

torch.Size([1, 89600])


Processing audio:  51%|█████     | 1022/2000 [14:29<13:50,  1.18it/s]

torch.Size([1, 106720])


Processing audio:  51%|█████     | 1023/2000 [14:30<12:35,  1.29it/s]

torch.Size([1, 82720])


Processing audio:  51%|█████     | 1024/2000 [14:30<11:16,  1.44it/s]

torch.Size([1, 88960])


Processing audio:  51%|█████▏    | 1025/2000 [14:31<10:19,  1.58it/s]

torch.Size([1, 209600])


Processing audio:  51%|█████▏    | 1026/2000 [14:32<11:51,  1.37it/s]

torch.Size([1, 101120])


Processing audio:  51%|█████▏    | 1027/2000 [14:32<10:38,  1.52it/s]

torch.Size([1, 136000])


Processing audio:  51%|█████▏    | 1028/2000 [14:33<11:14,  1.44it/s]

torch.Size([1, 106720])


Processing audio:  51%|█████▏    | 1029/2000 [14:33<10:24,  1.56it/s]

torch.Size([1, 88640])


Processing audio:  52%|█████▏    | 1030/2000 [14:34<09:26,  1.71it/s]

torch.Size([1, 183040])


Processing audio:  52%|█████▏    | 1031/2000 [14:35<10:46,  1.50it/s]

torch.Size([1, 156640])


Processing audio:  52%|█████▏    | 1032/2000 [14:36<12:54,  1.25it/s]

torch.Size([1, 94720])


Processing audio:  52%|█████▏    | 1033/2000 [14:36<12:26,  1.30it/s]

torch.Size([1, 56320])


Processing audio:  52%|█████▏    | 1034/2000 [14:37<11:27,  1.40it/s]

torch.Size([1, 218880])


Processing audio:  52%|█████▏    | 1036/2000 [14:39<10:50,  1.48it/s]

torch.Size([1, 42880])
torch.Size([1, 426240])


Processing audio:  52%|█████▏    | 1037/2000 [14:41<18:31,  1.15s/it]

torch.Size([1, 118400])


Processing audio:  52%|█████▏    | 1038/2000 [14:41<15:49,  1.01it/s]

torch.Size([1, 191840])


Processing audio:  52%|█████▏    | 1039/2000 [14:42<15:52,  1.01it/s]

torch.Size([1, 68320])


Processing audio:  52%|█████▏    | 1040/2000 [14:43<12:46,  1.25it/s]

torch.Size([1, 325120])


Processing audio:  52%|█████▏    | 1041/2000 [14:44<16:37,  1.04s/it]

torch.Size([1, 371680])


Processing audio:  52%|█████▏    | 1042/2000 [14:46<19:16,  1.21s/it]

torch.Size([1, 759520])


Processing audio:  52%|█████▏    | 1043/2000 [14:52<40:17,  2.53s/it]

torch.Size([1, 58720])


Processing audio:  52%|█████▏    | 1044/2000 [14:52<29:55,  1.88s/it]

torch.Size([1, 44640])


Processing audio:  52%|█████▏    | 1045/2000 [14:52<22:02,  1.38s/it]

torch.Size([1, 148160])


Processing audio:  52%|█████▏    | 1046/2000 [14:53<19:06,  1.20s/it]

torch.Size([1, 350080])


Processing audio:  52%|█████▏    | 1047/2000 [14:54<20:38,  1.30s/it]

torch.Size([1, 137120])


Processing audio:  52%|█████▏    | 1048/2000 [14:55<18:01,  1.14s/it]

torch.Size([1, 114880])


Processing audio:  52%|█████▏    | 1049/2000 [14:56<15:06,  1.05it/s]

torch.Size([1, 51360])


Processing audio:  52%|█████▎    | 1050/2000 [14:56<12:06,  1.31it/s]

torch.Size([1, 245760])


Processing audio:  53%|█████▎    | 1051/2000 [14:57<13:15,  1.19it/s]

torch.Size([1, 190720])


Processing audio:  53%|█████▎    | 1052/2000 [14:58<13:55,  1.13it/s]

torch.Size([1, 179040])


Processing audio:  53%|█████▎    | 1053/2000 [14:59<14:19,  1.10it/s]

torch.Size([1, 141600])


Processing audio:  53%|█████▎    | 1054/2000 [15:00<12:52,  1.22it/s]

torch.Size([1, 58880])


Processing audio:  53%|█████▎    | 1055/2000 [15:00<11:02,  1.43it/s]

torch.Size([1, 46240])


Processing audio:  53%|█████▎    | 1056/2000 [15:00<08:58,  1.75it/s]

torch.Size([1, 159520])


Processing audio:  53%|█████▎    | 1057/2000 [15:01<11:11,  1.40it/s]

torch.Size([1, 104320])


Processing audio:  53%|█████▎    | 1058/2000 [15:02<11:46,  1.33it/s]

torch.Size([1, 93760])


Processing audio:  53%|█████▎    | 1059/2000 [15:03<11:25,  1.37it/s]

torch.Size([1, 48960])


Processing audio:  53%|█████▎    | 1060/2000 [15:03<10:18,  1.52it/s]

torch.Size([1, 114560])


Processing audio:  53%|█████▎    | 1061/2000 [15:04<11:14,  1.39it/s]

torch.Size([1, 81280])


Processing audio:  53%|█████▎    | 1062/2000 [15:05<09:42,  1.61it/s]

torch.Size([1, 367680])


Processing audio:  53%|█████▎    | 1063/2000 [15:06<13:48,  1.13it/s]

torch.Size([1, 106080])


Processing audio:  53%|█████▎    | 1064/2000 [15:07<12:46,  1.22it/s]

torch.Size([1, 96000])


Processing audio:  53%|█████▎    | 1065/2000 [15:07<11:06,  1.40it/s]

torch.Size([1, 76800])


Processing audio:  53%|█████▎    | 1066/2000 [15:08<09:21,  1.66it/s]

torch.Size([1, 137600])


Processing audio:  53%|█████▎    | 1067/2000 [15:08<09:32,  1.63it/s]

torch.Size([1, 148960])


Processing audio:  53%|█████▎    | 1068/2000 [15:09<09:17,  1.67it/s]

torch.Size([1, 71040])


Processing audio:  53%|█████▎    | 1069/2000 [15:09<08:21,  1.86it/s]

torch.Size([1, 58560])


Processing audio:  54%|█████▎    | 1070/2000 [15:09<07:10,  2.16it/s]

torch.Size([1, 99520])


Processing audio:  54%|█████▎    | 1071/2000 [15:10<07:33,  2.05it/s]

torch.Size([1, 87840])


Processing audio:  54%|█████▎    | 1072/2000 [15:11<07:41,  2.01it/s]

torch.Size([1, 96000])


Processing audio:  54%|█████▎    | 1073/2000 [15:11<07:40,  2.01it/s]

torch.Size([1, 177600])


Processing audio:  54%|█████▎    | 1074/2000 [15:12<09:42,  1.59it/s]

torch.Size([1, 46880])


Processing audio:  54%|█████▍    | 1075/2000 [15:12<08:07,  1.90it/s]

torch.Size([1, 105600])


Processing audio:  54%|█████▍    | 1076/2000 [15:13<08:27,  1.82it/s]

torch.Size([1, 152800])


Processing audio:  54%|█████▍    | 1077/2000 [15:14<09:55,  1.55it/s]

torch.Size([1, 94560])


Processing audio:  54%|█████▍    | 1078/2000 [15:14<09:15,  1.66it/s]

torch.Size([1, 48480])


Processing audio:  54%|█████▍    | 1079/2000 [15:15<07:54,  1.94it/s]

torch.Size([1, 110080])


Processing audio:  54%|█████▍    | 1080/2000 [15:15<08:48,  1.74it/s]

torch.Size([1, 165120])


Processing audio:  54%|█████▍    | 1081/2000 [15:16<09:17,  1.65it/s]

torch.Size([1, 205440])


Processing audio:  54%|█████▍    | 1082/2000 [15:17<12:43,  1.20it/s]

torch.Size([1, 186240])


Processing audio:  54%|█████▍    | 1083/2000 [15:18<13:40,  1.12it/s]

torch.Size([1, 45600])


Processing audio:  54%|█████▍    | 1084/2000 [15:19<10:35,  1.44it/s]

torch.Size([1, 132000])


Processing audio:  54%|█████▍    | 1085/2000 [15:19<10:46,  1.42it/s]

torch.Size([1, 105440])


Processing audio:  54%|█████▍    | 1086/2000 [15:20<10:37,  1.43it/s]

torch.Size([1, 76000])


Processing audio:  54%|█████▍    | 1087/2000 [15:20<09:09,  1.66it/s]

torch.Size([1, 103520])


Processing audio:  54%|█████▍    | 1088/2000 [15:21<09:03,  1.68it/s]

torch.Size([1, 96800])


Processing audio:  54%|█████▍    | 1089/2000 [15:21<08:43,  1.74it/s]

torch.Size([1, 248000])


Processing audio:  55%|█████▍    | 1090/2000 [15:23<11:39,  1.30it/s]

torch.Size([1, 199840])


Processing audio:  55%|█████▍    | 1091/2000 [15:24<13:24,  1.13it/s]

torch.Size([1, 114880])


Processing audio:  55%|█████▍    | 1092/2000 [15:25<12:33,  1.21it/s]

torch.Size([1, 92160])


Processing audio:  55%|█████▍    | 1094/2000 [15:25<08:23,  1.80it/s]

torch.Size([1, 28480])
torch.Size([1, 246240])


Processing audio:  55%|█████▍    | 1095/2000 [15:26<10:18,  1.46it/s]

torch.Size([1, 74400])


Processing audio:  55%|█████▍    | 1096/2000 [15:27<09:25,  1.60it/s]

torch.Size([1, 30720])


Processing audio:  55%|█████▍    | 1097/2000 [15:27<07:39,  1.97it/s]

torch.Size([1, 67040])


Processing audio:  55%|█████▍    | 1098/2000 [15:27<07:48,  1.93it/s]

torch.Size([1, 62880])


Processing audio:  55%|█████▍    | 1099/2000 [15:28<07:13,  2.08it/s]

torch.Size([1, 126880])


Processing audio:  55%|█████▌    | 1100/2000 [15:29<08:46,  1.71it/s]

torch.Size([1, 167200])


Processing audio:  55%|█████▌    | 1101/2000 [15:30<11:26,  1.31it/s]

torch.Size([1, 72640])


Processing audio:  55%|█████▌    | 1102/2000 [15:30<10:04,  1.49it/s]

torch.Size([1, 123680])


Processing audio:  55%|█████▌    | 1104/2000 [15:31<08:21,  1.78it/s]

torch.Size([1, 23360])
torch.Size([1, 163040])


Processing audio:  55%|█████▌    | 1105/2000 [15:32<09:19,  1.60it/s]

torch.Size([1, 68320])


Processing audio:  55%|█████▌    | 1106/2000 [15:32<07:59,  1.86it/s]

torch.Size([1, 366720])


Processing audio:  55%|█████▌    | 1107/2000 [15:34<14:02,  1.06it/s]

torch.Size([1, 80640])


Processing audio:  55%|█████▌    | 1108/2000 [15:35<11:27,  1.30it/s]

torch.Size([1, 103680])


Processing audio:  55%|█████▌    | 1109/2000 [15:35<10:15,  1.45it/s]

torch.Size([1, 737600])


Processing audio:  56%|█████▌    | 1110/2000 [15:39<21:53,  1.48s/it]

torch.Size([1, 96320])


Processing audio:  56%|█████▌    | 1111/2000 [15:39<17:44,  1.20s/it]

torch.Size([1, 799200])


Processing audio:  56%|█████▌    | 1112/2000 [15:44<34:19,  2.32s/it]

torch.Size([1, 230240])


Processing audio:  56%|█████▌    | 1113/2000 [15:45<28:47,  1.95s/it]

torch.Size([1, 73760])


Processing audio:  56%|█████▌    | 1114/2000 [15:46<21:59,  1.49s/it]

torch.Size([1, 186080])


Processing audio:  56%|█████▌    | 1115/2000 [15:46<19:33,  1.33s/it]

torch.Size([1, 376960])


Processing audio:  56%|█████▌    | 1116/2000 [15:48<20:50,  1.42s/it]

torch.Size([1, 120640])


Processing audio:  56%|█████▌    | 1117/2000 [15:49<16:39,  1.13s/it]

torch.Size([1, 51840])


Processing audio:  56%|█████▌    | 1119/2000 [15:49<09:42,  1.51it/s]

torch.Size([1, 55360])
torch.Size([1, 181600])


Processing audio:  56%|█████▌    | 1120/2000 [15:50<10:47,  1.36it/s]

torch.Size([1, 178240])


Processing audio:  56%|█████▌    | 1121/2000 [15:51<11:18,  1.29it/s]

torch.Size([1, 286240])


Processing audio:  56%|█████▌    | 1122/2000 [15:52<12:27,  1.18it/s]

torch.Size([1, 117760])


Processing audio:  56%|█████▌    | 1123/2000 [15:52<11:21,  1.29it/s]

torch.Size([1, 86720])


Processing audio:  56%|█████▌    | 1124/2000 [15:53<09:57,  1.47it/s]

torch.Size([1, 100000])


Processing audio:  56%|█████▋    | 1125/2000 [15:54<10:44,  1.36it/s]

torch.Size([1, 125280])


Processing audio:  56%|█████▋    | 1126/2000 [15:55<11:55,  1.22it/s]

torch.Size([1, 181600])


Processing audio:  56%|█████▋    | 1127/2000 [15:56<14:05,  1.03it/s]

torch.Size([1, 52160])


Processing audio:  56%|█████▋    | 1128/2000 [15:56<10:52,  1.34it/s]

torch.Size([1, 64640])


Processing audio:  56%|█████▋    | 1129/2000 [15:57<09:23,  1.54it/s]

torch.Size([1, 77920])


Processing audio:  57%|█████▋    | 1131/2000 [15:57<06:37,  2.19it/s]

torch.Size([1, 22720])
torch.Size([1, 63840])


Processing audio:  57%|█████▋    | 1132/2000 [15:58<06:36,  2.19it/s]

torch.Size([1, 115040])


Processing audio:  57%|█████▋    | 1133/2000 [15:58<07:00,  2.06it/s]

torch.Size([1, 86080])


Processing audio:  57%|█████▋    | 1134/2000 [15:59<06:52,  2.10it/s]

torch.Size([1, 401600])


Processing audio:  57%|█████▋    | 1135/2000 [16:00<12:02,  1.20it/s]

torch.Size([1, 335680])


Processing audio:  57%|█████▋    | 1136/2000 [16:02<15:28,  1.07s/it]

torch.Size([1, 66560])


Processing audio:  57%|█████▋    | 1137/2000 [16:02<12:07,  1.19it/s]

torch.Size([1, 62880])


Processing audio:  57%|█████▋    | 1138/2000 [16:03<10:01,  1.43it/s]

torch.Size([1, 35680])


Processing audio:  57%|█████▋    | 1139/2000 [16:03<08:14,  1.74it/s]

torch.Size([1, 188160])


Processing audio:  57%|█████▋    | 1140/2000 [16:04<09:25,  1.52it/s]

torch.Size([1, 201120])


Processing audio:  57%|█████▋    | 1141/2000 [16:05<10:53,  1.31it/s]

torch.Size([1, 60480])


Processing audio:  57%|█████▋    | 1142/2000 [16:05<09:12,  1.55it/s]

torch.Size([1, 99520])


Processing audio:  57%|█████▋    | 1144/2000 [16:06<06:50,  2.09it/s]

torch.Size([1, 20640])
torch.Size([1, 227040])


Processing audio:  57%|█████▋    | 1145/2000 [16:07<11:07,  1.28it/s]

torch.Size([1, 296160])


Processing audio:  57%|█████▋    | 1146/2000 [16:09<14:44,  1.04s/it]

torch.Size([1, 115840])


Processing audio:  57%|█████▋    | 1147/2000 [16:10<13:32,  1.05it/s]

torch.Size([1, 55840])


Processing audio:  57%|█████▋    | 1148/2000 [16:10<10:23,  1.37it/s]

torch.Size([1, 148800])


Processing audio:  57%|█████▊    | 1150/2000 [16:11<08:04,  1.75it/s]

torch.Size([1, 64640])
torch.Size([1, 127680])


Processing audio:  58%|█████▊    | 1151/2000 [16:12<07:55,  1.78it/s]

torch.Size([1, 182400])


Processing audio:  58%|█████▊    | 1152/2000 [16:12<08:57,  1.58it/s]

torch.Size([1, 215200])


Processing audio:  58%|█████▊    | 1153/2000 [16:13<09:28,  1.49it/s]

torch.Size([1, 77440])


Processing audio:  58%|█████▊    | 1154/2000 [16:14<08:28,  1.66it/s]

torch.Size([1, 66240])


Processing audio:  58%|█████▊    | 1155/2000 [16:14<07:43,  1.82it/s]

torch.Size([1, 215200])


Processing audio:  58%|█████▊    | 1156/2000 [16:15<10:01,  1.40it/s]

torch.Size([1, 371680])


Processing audio:  58%|█████▊    | 1157/2000 [16:16<11:55,  1.18it/s]

torch.Size([1, 109440])


Processing audio:  58%|█████▊    | 1158/2000 [16:17<11:06,  1.26it/s]

torch.Size([1, 125920])


Processing audio:  58%|█████▊    | 1159/2000 [16:17<10:27,  1.34it/s]

torch.Size([1, 112640])


Processing audio:  58%|█████▊    | 1160/2000 [16:18<09:03,  1.54it/s]

torch.Size([1, 151680])


Processing audio:  58%|█████▊    | 1161/2000 [16:19<09:40,  1.44it/s]

torch.Size([1, 55360])


Processing audio:  58%|█████▊    | 1162/2000 [16:19<08:35,  1.63it/s]

torch.Size([1, 67680])


Processing audio:  58%|█████▊    | 1163/2000 [16:20<07:39,  1.82it/s]

torch.Size([1, 147840])


Processing audio:  58%|█████▊    | 1164/2000 [16:21<10:02,  1.39it/s]

torch.Size([1, 102880])


Processing audio:  58%|█████▊    | 1165/2000 [16:21<09:32,  1.46it/s]

torch.Size([1, 136960])


Processing audio:  58%|█████▊    | 1166/2000 [16:22<11:04,  1.26it/s]

torch.Size([1, 60800])


Processing audio:  58%|█████▊    | 1167/2000 [16:23<09:11,  1.51it/s]

torch.Size([1, 438720])


Processing audio:  58%|█████▊    | 1168/2000 [16:25<16:13,  1.17s/it]

torch.Size([1, 129760])


Processing audio:  58%|█████▊    | 1169/2000 [16:26<13:58,  1.01s/it]

torch.Size([1, 70400])


Processing audio:  59%|█████▊    | 1171/2000 [16:26<08:27,  1.63it/s]

torch.Size([1, 18080])
torch.Size([1, 75360])


Processing audio:  59%|█████▊    | 1172/2000 [16:27<07:20,  1.88it/s]

torch.Size([1, 60800])


Processing audio:  59%|█████▊    | 1173/2000 [16:27<06:24,  2.15it/s]

torch.Size([1, 131680])


Processing audio:  59%|█████▊    | 1174/2000 [16:27<06:37,  2.08it/s]

torch.Size([1, 245120])


Processing audio:  59%|█████▉    | 1175/2000 [16:28<09:22,  1.47it/s]

torch.Size([1, 77440])


Processing audio:  59%|█████▉    | 1176/2000 [16:29<08:07,  1.69it/s]

torch.Size([1, 152320])


Processing audio:  59%|█████▉    | 1177/2000 [16:29<08:04,  1.70it/s]

torch.Size([1, 573120])


Processing audio:  59%|█████▉    | 1178/2000 [16:32<16:50,  1.23s/it]

torch.Size([1, 148160])


Processing audio:  59%|█████▉    | 1179/2000 [16:33<14:57,  1.09s/it]

torch.Size([1, 72800])


Processing audio:  59%|█████▉    | 1180/2000 [16:34<13:02,  1.05it/s]

torch.Size([1, 72320])
torch.Size([1, 160320])


Processing audio:  59%|█████▉    | 1182/2000 [16:34<09:35,  1.42it/s]

torch.Size([1, 165600])


Processing audio:  59%|█████▉    | 1183/2000 [16:35<10:10,  1.34it/s]

torch.Size([1, 330720])


Processing audio:  59%|█████▉    | 1184/2000 [16:37<13:18,  1.02it/s]

torch.Size([1, 106880])


Processing audio:  59%|█████▉    | 1185/2000 [16:37<11:33,  1.18it/s]

torch.Size([1, 74880])


Processing audio:  59%|█████▉    | 1186/2000 [16:38<09:43,  1.40it/s]

torch.Size([1, 67680])


Processing audio:  59%|█████▉    | 1187/2000 [16:38<08:24,  1.61it/s]

torch.Size([1, 108000])


Processing audio:  59%|█████▉    | 1188/2000 [16:39<07:14,  1.87it/s]

torch.Size([1, 167680])


Processing audio:  59%|█████▉    | 1189/2000 [16:39<07:45,  1.74it/s]

torch.Size([1, 229600])


Processing audio:  60%|█████▉    | 1190/2000 [16:40<10:00,  1.35it/s]

torch.Size([1, 372000])


Processing audio:  60%|█████▉    | 1191/2000 [16:42<13:57,  1.04s/it]

torch.Size([1, 147200])


Processing audio:  60%|█████▉    | 1192/2000 [16:43<13:03,  1.03it/s]

torch.Size([1, 89920])


Processing audio:  60%|█████▉    | 1193/2000 [16:43<10:58,  1.23it/s]

torch.Size([1, 158400])


Processing audio:  60%|█████▉    | 1194/2000 [16:44<10:51,  1.24it/s]

torch.Size([1, 312160])


Processing audio:  60%|█████▉    | 1195/2000 [16:45<12:45,  1.05it/s]

torch.Size([1, 91680])


Processing audio:  60%|█████▉    | 1196/2000 [16:46<11:39,  1.15it/s]

torch.Size([1, 54400])


Processing audio:  60%|█████▉    | 1197/2000 [16:47<10:02,  1.33it/s]

torch.Size([1, 144960])


Processing audio:  60%|█████▉    | 1198/2000 [16:47<09:35,  1.39it/s]

torch.Size([1, 68800])


Processing audio:  60%|█████▉    | 1199/2000 [16:48<09:51,  1.36it/s]

torch.Size([1, 128640])


Processing audio:  60%|██████    | 1200/2000 [16:49<10:09,  1.31it/s]

torch.Size([1, 211360])


Processing audio:  60%|██████    | 1201/2000 [16:50<10:12,  1.30it/s]

torch.Size([1, 225440])


Processing audio:  60%|██████    | 1202/2000 [16:51<11:54,  1.12it/s]

torch.Size([1, 360800])


Processing audio:  60%|██████    | 1203/2000 [16:52<14:27,  1.09s/it]

torch.Size([1, 95200])


Processing audio:  60%|██████    | 1204/2000 [16:53<12:07,  1.09it/s]

torch.Size([1, 34880])


Processing audio:  60%|██████    | 1205/2000 [16:53<09:23,  1.41it/s]

torch.Size([1, 88640])


Processing audio:  60%|██████    | 1206/2000 [16:53<08:20,  1.59it/s]

torch.Size([1, 132320])


Processing audio:  60%|██████    | 1207/2000 [16:54<08:23,  1.57it/s]

torch.Size([1, 289760])


Processing audio:  60%|██████    | 1208/2000 [16:56<11:20,  1.16it/s]

torch.Size([1, 112480])


Processing audio:  60%|██████    | 1209/2000 [16:56<10:47,  1.22it/s]

torch.Size([1, 75680])


Processing audio:  61%|██████    | 1211/2000 [16:57<07:02,  1.87it/s]

torch.Size([1, 20960])
torch.Size([1, 119360])


Processing audio:  61%|██████    | 1212/2000 [16:57<07:18,  1.80it/s]

torch.Size([1, 143200])


Processing audio:  61%|██████    | 1213/2000 [16:58<08:32,  1.53it/s]

torch.Size([1, 204960])


Processing audio:  61%|██████    | 1214/2000 [17:00<12:00,  1.09it/s]

torch.Size([1, 518720])


Processing audio:  61%|██████    | 1215/2000 [17:05<30:34,  2.34s/it]

torch.Size([1, 168160])


Processing audio:  61%|██████    | 1216/2000 [17:06<24:39,  1.89s/it]

torch.Size([1, 145920])


Processing audio:  61%|██████    | 1217/2000 [17:07<20:09,  1.55s/it]

torch.Size([1, 115680])


Processing audio:  61%|██████    | 1218/2000 [17:08<16:16,  1.25s/it]

torch.Size([1, 43840])


Processing audio:  61%|██████    | 1219/2000 [17:08<12:41,  1.03it/s]

torch.Size([1, 172320])


Processing audio:  61%|██████    | 1220/2000 [17:09<12:20,  1.05it/s]

torch.Size([1, 100000])


Processing audio:  61%|██████    | 1221/2000 [17:09<10:53,  1.19it/s]

torch.Size([1, 350080])


Processing audio:  61%|██████    | 1222/2000 [17:11<14:35,  1.12s/it]

torch.Size([1, 158080])


Processing audio:  61%|██████    | 1223/2000 [17:12<15:07,  1.17s/it]

torch.Size([1, 145440])


Processing audio:  61%|██████    | 1224/2000 [17:14<14:49,  1.15s/it]

torch.Size([1, 146560])


Processing audio:  61%|██████▏   | 1225/2000 [17:15<14:05,  1.09s/it]

torch.Size([1, 57600])


Processing audio:  61%|██████▏   | 1226/2000 [17:15<10:57,  1.18it/s]

torch.Size([1, 123840])


Processing audio:  61%|██████▏   | 1227/2000 [17:16<10:25,  1.24it/s]

torch.Size([1, 179360])


Processing audio:  61%|██████▏   | 1228/2000 [17:17<11:08,  1.15it/s]

torch.Size([1, 241600])


Processing audio:  61%|██████▏   | 1229/2000 [17:18<12:00,  1.07it/s]

torch.Size([1, 9120])
torch.Size([1, 125600])


Processing audio:  62%|██████▏   | 1231/2000 [17:18<08:41,  1.47it/s]

torch.Size([1, 101920])


Processing audio:  62%|██████▏   | 1232/2000 [17:19<08:08,  1.57it/s]

torch.Size([1, 345440])


Processing audio:  62%|██████▏   | 1233/2000 [17:20<11:13,  1.14it/s]

torch.Size([1, 45760])


Processing audio:  62%|██████▏   | 1234/2000 [17:21<09:02,  1.41it/s]

torch.Size([1, 80160])


Processing audio:  62%|██████▏   | 1235/2000 [17:21<08:05,  1.57it/s]

torch.Size([1, 119680])


Processing audio:  62%|██████▏   | 1236/2000 [17:22<08:04,  1.58it/s]

torch.Size([1, 160160])


Processing audio:  62%|██████▏   | 1237/2000 [17:22<07:58,  1.59it/s]

torch.Size([1, 80640])


Processing audio:  62%|██████▏   | 1238/2000 [17:23<07:25,  1.71it/s]

torch.Size([1, 102560])


Processing audio:  62%|██████▏   | 1239/2000 [17:23<06:52,  1.85it/s]

torch.Size([1, 200160])


Processing audio:  62%|██████▏   | 1240/2000 [17:24<08:38,  1.47it/s]

torch.Size([1, 376480])


Processing audio:  62%|██████▏   | 1241/2000 [17:27<14:36,  1.15s/it]

torch.Size([1, 121760])


Processing audio:  62%|██████▏   | 1242/2000 [17:28<13:35,  1.08s/it]

torch.Size([1, 148960])


Processing audio:  62%|██████▏   | 1243/2000 [17:28<12:00,  1.05it/s]

torch.Size([1, 314400])


Processing audio:  62%|██████▏   | 1244/2000 [17:30<14:21,  1.14s/it]

torch.Size([1, 76160])


Processing audio:  62%|██████▏   | 1245/2000 [17:30<11:48,  1.07it/s]

torch.Size([1, 64480])


Processing audio:  62%|██████▏   | 1246/2000 [17:31<09:46,  1.29it/s]

torch.Size([1, 74880])


Processing audio:  62%|██████▏   | 1247/2000 [17:31<08:23,  1.50it/s]

torch.Size([1, 198400])


Processing audio:  62%|██████▏   | 1248/2000 [17:32<09:17,  1.35it/s]

torch.Size([1, 55040])


Processing audio:  62%|██████▏   | 1249/2000 [17:32<07:52,  1.59it/s]

torch.Size([1, 140160])


Processing audio:  62%|██████▎   | 1250/2000 [17:33<07:33,  1.65it/s]

torch.Size([1, 176160])


Processing audio:  63%|██████▎   | 1251/2000 [17:34<08:23,  1.49it/s]

torch.Size([1, 84960])


Processing audio:  63%|██████▎   | 1252/2000 [17:34<07:52,  1.58it/s]

torch.Size([1, 93760])


Processing audio:  63%|██████▎   | 1253/2000 [17:35<07:31,  1.65it/s]

torch.Size([1, 118720])


Processing audio:  63%|██████▎   | 1254/2000 [17:35<07:41,  1.61it/s]

torch.Size([1, 132800])


Processing audio:  63%|██████▎   | 1255/2000 [17:36<08:47,  1.41it/s]

torch.Size([1, 138560])


Processing audio:  63%|██████▎   | 1256/2000 [17:37<08:22,  1.48it/s]

torch.Size([1, 178720])


Processing audio:  63%|██████▎   | 1257/2000 [17:38<09:51,  1.26it/s]

torch.Size([1, 129120])


Processing audio:  63%|██████▎   | 1258/2000 [17:39<09:55,  1.25it/s]

torch.Size([1, 97440])


Processing audio:  63%|██████▎   | 1259/2000 [17:40<09:39,  1.28it/s]

torch.Size([1, 91520])


Processing audio:  63%|██████▎   | 1260/2000 [17:40<09:09,  1.35it/s]

torch.Size([1, 77600])


Processing audio:  63%|██████▎   | 1262/2000 [17:41<06:39,  1.85it/s]

torch.Size([1, 21760])
torch.Size([1, 19360])


Processing audio:  63%|██████▎   | 1263/2000 [17:41<05:07,  2.40it/s]

torch.Size([1, 379840])


Processing audio:  63%|██████▎   | 1264/2000 [17:43<11:02,  1.11it/s]

torch.Size([1, 97600])


Processing audio:  63%|██████▎   | 1266/2000 [17:44<07:18,  1.67it/s]

torch.Size([1, 31680])
torch.Size([1, 56480])


Processing audio:  63%|██████▎   | 1267/2000 [17:44<06:28,  1.88it/s]

torch.Size([1, 86560])


Processing audio:  63%|██████▎   | 1269/2000 [17:45<05:08,  2.37it/s]

torch.Size([1, 18080])
torch.Size([1, 331680])


Processing audio:  64%|██████▎   | 1270/2000 [17:47<10:11,  1.19it/s]

torch.Size([1, 100960])


Processing audio:  64%|██████▎   | 1271/2000 [17:47<09:34,  1.27it/s]

torch.Size([1, 111360])


Processing audio:  64%|██████▎   | 1272/2000 [17:48<09:09,  1.32it/s]

torch.Size([1, 251200])


Processing audio:  64%|██████▎   | 1273/2000 [17:49<10:58,  1.10it/s]

torch.Size([1, 168800])


Processing audio:  64%|██████▎   | 1274/2000 [17:50<10:35,  1.14it/s]

torch.Size([1, 40160])


Processing audio:  64%|██████▍   | 1275/2000 [17:50<08:11,  1.48it/s]

torch.Size([1, 148640])


Processing audio:  64%|██████▍   | 1276/2000 [17:51<09:36,  1.26it/s]

torch.Size([1, 66560])


Processing audio:  64%|██████▍   | 1277/2000 [17:52<08:41,  1.39it/s]

torch.Size([1, 58080])


Processing audio:  64%|██████▍   | 1278/2000 [17:52<07:49,  1.54it/s]

torch.Size([1, 71360])


Processing audio:  64%|██████▍   | 1279/2000 [17:53<07:22,  1.63it/s]

torch.Size([1, 107520])


Processing audio:  64%|██████▍   | 1280/2000 [17:54<08:14,  1.46it/s]

torch.Size([1, 89600])


Processing audio:  64%|██████▍   | 1281/2000 [17:54<07:38,  1.57it/s]

torch.Size([1, 55520])


Processing audio:  64%|██████▍   | 1282/2000 [17:55<06:26,  1.86it/s]

torch.Size([1, 45600])


Processing audio:  64%|██████▍   | 1283/2000 [17:55<05:42,  2.09it/s]

torch.Size([1, 494240])


Processing audio:  64%|██████▍   | 1284/2000 [17:58<13:12,  1.11s/it]

torch.Size([1, 59520])


Processing audio:  64%|██████▍   | 1285/2000 [17:58<10:41,  1.11it/s]

torch.Size([1, 223040])


Processing audio:  64%|██████▍   | 1286/2000 [17:59<10:40,  1.11it/s]

torch.Size([1, 201600])


Processing audio:  64%|██████▍   | 1287/2000 [18:00<10:23,  1.14it/s]

torch.Size([1, 168640])


Processing audio:  64%|██████▍   | 1288/2000 [18:01<10:43,  1.11it/s]

torch.Size([1, 79200])


Processing audio:  64%|██████▍   | 1289/2000 [18:01<08:38,  1.37it/s]

torch.Size([1, 163200])


Processing audio:  64%|██████▍   | 1290/2000 [18:02<08:35,  1.38it/s]

torch.Size([1, 168320])


Processing audio:  65%|██████▍   | 1291/2000 [18:03<08:56,  1.32it/s]

torch.Size([1, 332480])


Processing audio:  65%|██████▍   | 1292/2000 [18:04<12:11,  1.03s/it]

torch.Size([1, 122880])


Processing audio:  65%|██████▍   | 1293/2000 [18:05<11:32,  1.02it/s]

torch.Size([1, 77600])


Processing audio:  65%|██████▍   | 1294/2000 [18:06<10:33,  1.11it/s]

torch.Size([1, 155520])


Processing audio:  65%|██████▍   | 1295/2000 [18:07<11:28,  1.02it/s]

torch.Size([1, 181280])


Processing audio:  65%|██████▍   | 1296/2000 [18:08<11:50,  1.01s/it]

torch.Size([1, 229280])


Processing audio:  65%|██████▍   | 1297/2000 [18:09<12:27,  1.06s/it]

torch.Size([1, 113280])


Processing audio:  65%|██████▍   | 1298/2000 [18:10<10:41,  1.10it/s]

torch.Size([1, 80000])


Processing audio:  65%|██████▍   | 1299/2000 [18:10<08:56,  1.31it/s]

torch.Size([1, 31200])


Processing audio:  65%|██████▌   | 1300/2000 [18:10<07:05,  1.65it/s]

torch.Size([1, 82240])


Processing audio:  65%|██████▌   | 1301/2000 [18:11<06:27,  1.80it/s]

torch.Size([1, 29440])


Processing audio:  65%|██████▌   | 1302/2000 [18:11<05:18,  2.19it/s]

torch.Size([1, 137920])


Processing audio:  65%|██████▌   | 1303/2000 [18:12<06:10,  1.88it/s]

torch.Size([1, 133120])


Processing audio:  65%|██████▌   | 1304/2000 [18:12<06:37,  1.75it/s]

torch.Size([1, 372000])


Processing audio:  65%|██████▌   | 1305/2000 [18:14<10:58,  1.06it/s]

torch.Size([1, 109280])


Processing audio:  65%|██████▌   | 1306/2000 [18:15<09:51,  1.17it/s]

torch.Size([1, 439680])


Processing audio:  65%|██████▌   | 1307/2000 [18:17<14:47,  1.28s/it]

torch.Size([1, 60640])


Processing audio:  65%|██████▌   | 1308/2000 [18:18<12:13,  1.06s/it]

torch.Size([1, 41600])


Processing audio:  65%|██████▌   | 1309/2000 [18:18<10:15,  1.12it/s]

torch.Size([1, 192000])


Processing audio:  66%|██████▌   | 1311/2000 [18:20<09:09,  1.25it/s]

torch.Size([1, 20800])
torch.Size([1, 211520])


Processing audio:  66%|██████▌   | 1312/2000 [18:21<10:40,  1.07it/s]

torch.Size([1, 176320])


Processing audio:  66%|██████▌   | 1313/2000 [18:22<10:22,  1.10it/s]

torch.Size([1, 122400])


Processing audio:  66%|██████▌   | 1314/2000 [18:23<09:16,  1.23it/s]

torch.Size([1, 148960])


Processing audio:  66%|██████▌   | 1315/2000 [18:23<09:01,  1.26it/s]

torch.Size([1, 175200])


Processing audio:  66%|██████▌   | 1316/2000 [18:24<09:42,  1.17it/s]

torch.Size([1, 111360])


Processing audio:  66%|██████▌   | 1317/2000 [18:25<09:00,  1.26it/s]

torch.Size([1, 96800])


Processing audio:  66%|██████▌   | 1318/2000 [18:26<08:06,  1.40it/s]

torch.Size([1, 118240])


Processing audio:  66%|██████▌   | 1319/2000 [18:26<07:40,  1.48it/s]

torch.Size([1, 493280])


Processing audio:  66%|██████▌   | 1320/2000 [18:29<13:51,  1.22s/it]

torch.Size([1, 672960])


Processing audio:  66%|██████▌   | 1321/2000 [18:33<23:22,  2.07s/it]

torch.Size([1, 240640])


Processing audio:  66%|██████▌   | 1322/2000 [18:34<21:31,  1.91s/it]

torch.Size([1, 57920])


Processing audio:  66%|██████▌   | 1323/2000 [18:34<16:03,  1.42s/it]

torch.Size([1, 129120])


Processing audio:  66%|██████▌   | 1324/2000 [18:35<13:33,  1.20s/it]

torch.Size([1, 281120])


Processing audio:  66%|██████▋   | 1325/2000 [18:37<14:22,  1.28s/it]

torch.Size([1, 97920])


Processing audio:  66%|██████▋   | 1326/2000 [18:37<11:58,  1.07s/it]

torch.Size([1, 382080])


Processing audio:  66%|██████▋   | 1327/2000 [18:39<14:04,  1.26s/it]

torch.Size([1, 184960])


Processing audio:  66%|██████▋   | 1328/2000 [18:39<11:29,  1.03s/it]

torch.Size([1, 94400])


Processing audio:  66%|██████▋   | 1329/2000 [18:40<09:55,  1.13it/s]

torch.Size([1, 145760])


Processing audio:  66%|██████▋   | 1330/2000 [18:41<09:34,  1.17it/s]

torch.Size([1, 112000])


Processing audio:  67%|██████▋   | 1331/2000 [18:41<08:51,  1.26it/s]

torch.Size([1, 105440])


Processing audio:  67%|██████▋   | 1332/2000 [18:42<07:52,  1.41it/s]

torch.Size([1, 101600])


Processing audio:  67%|██████▋   | 1333/2000 [18:42<07:14,  1.53it/s]

torch.Size([1, 90400])


Processing audio:  67%|██████▋   | 1334/2000 [18:43<06:53,  1.61it/s]

torch.Size([1, 116800])


Processing audio:  67%|██████▋   | 1335/2000 [18:44<07:12,  1.54it/s]

torch.Size([1, 94400])


Processing audio:  67%|██████▋   | 1336/2000 [18:44<07:49,  1.41it/s]

torch.Size([1, 67680])


Processing audio:  67%|██████▋   | 1337/2000 [18:45<07:20,  1.51it/s]

torch.Size([1, 166240])


Processing audio:  67%|██████▋   | 1338/2000 [18:46<09:08,  1.21it/s]

torch.Size([1, 400160])


Processing audio:  67%|██████▋   | 1339/2000 [18:49<13:53,  1.26s/it]

torch.Size([1, 107200])


Processing audio:  67%|██████▋   | 1340/2000 [18:49<11:27,  1.04s/it]

torch.Size([1, 195360])


Processing audio:  67%|██████▋   | 1341/2000 [18:50<11:04,  1.01s/it]

torch.Size([1, 86080])


Processing audio:  67%|██████▋   | 1342/2000 [18:50<09:13,  1.19it/s]

torch.Size([1, 188960])


Processing audio:  67%|██████▋   | 1343/2000 [18:52<09:56,  1.10it/s]

torch.Size([1, 48160])


Processing audio:  67%|██████▋   | 1345/2000 [18:52<06:04,  1.80it/s]

torch.Size([1, 22560])
torch.Size([1, 567680])


Processing audio:  67%|██████▋   | 1346/2000 [18:54<11:13,  1.03s/it]

torch.Size([1, 41760])


Processing audio:  67%|██████▋   | 1347/2000 [18:54<08:43,  1.25it/s]

torch.Size([1, 121440])


Processing audio:  67%|██████▋   | 1348/2000 [18:55<07:52,  1.38it/s]

torch.Size([1, 47840])


Processing audio:  67%|██████▋   | 1349/2000 [18:55<06:26,  1.68it/s]

torch.Size([1, 134880])


Processing audio:  68%|██████▊   | 1350/2000 [18:56<06:33,  1.65it/s]

torch.Size([1, 45120])


Processing audio:  68%|██████▊   | 1351/2000 [18:56<05:30,  1.96it/s]

torch.Size([1, 53280])


Processing audio:  68%|██████▊   | 1352/2000 [18:57<05:04,  2.13it/s]

torch.Size([1, 155520])


Processing audio:  68%|██████▊   | 1353/2000 [18:57<06:12,  1.74it/s]

torch.Size([1, 40960])


Processing audio:  68%|██████▊   | 1354/2000 [18:58<05:41,  1.89it/s]

torch.Size([1, 143680])


Processing audio:  68%|██████▊   | 1355/2000 [18:58<06:11,  1.74it/s]

torch.Size([1, 85440])


Processing audio:  68%|██████▊   | 1356/2000 [18:59<06:45,  1.59it/s]

torch.Size([1, 183680])


Processing audio:  68%|██████▊   | 1357/2000 [19:01<08:58,  1.19it/s]

torch.Size([1, 44960])


Processing audio:  68%|██████▊   | 1358/2000 [19:01<07:11,  1.49it/s]

torch.Size([1, 152960])


Processing audio:  68%|██████▊   | 1360/2000 [19:02<05:51,  1.82it/s]

torch.Size([1, 36320])
torch.Size([1, 98240])


Processing audio:  68%|██████▊   | 1361/2000 [19:02<05:48,  1.83it/s]

torch.Size([1, 265760])


Processing audio:  68%|██████▊   | 1362/2000 [19:03<07:04,  1.50it/s]

torch.Size([1, 58720])


Processing audio:  68%|██████▊   | 1364/2000 [19:04<04:41,  2.26it/s]

torch.Size([1, 29760])
torch.Size([1, 142240])


Processing audio:  68%|██████▊   | 1365/2000 [19:04<05:40,  1.86it/s]

torch.Size([1, 42720])


Processing audio:  68%|██████▊   | 1366/2000 [19:05<04:50,  2.19it/s]

torch.Size([1, 78560])


Processing audio:  68%|██████▊   | 1367/2000 [19:05<04:23,  2.40it/s]

torch.Size([1, 90880])


Processing audio:  68%|██████▊   | 1368/2000 [19:06<04:26,  2.37it/s]

torch.Size([1, 202400])


Processing audio:  68%|██████▊   | 1369/2000 [19:07<06:24,  1.64it/s]

torch.Size([1, 21440])


Processing audio:  68%|██████▊   | 1370/2000 [19:07<05:07,  2.05it/s]

torch.Size([1, 50240])


Processing audio:  69%|██████▊   | 1371/2000 [19:07<04:53,  2.15it/s]

torch.Size([1, 48480])


Processing audio:  69%|██████▊   | 1372/2000 [19:07<04:09,  2.52it/s]

torch.Size([1, 156000])


Processing audio:  69%|██████▊   | 1374/2000 [19:08<03:46,  2.76it/s]

torch.Size([1, 26400])
torch.Size([1, 245600])


Processing audio:  69%|██████▉   | 1375/2000 [19:09<06:37,  1.57it/s]

torch.Size([1, 175360])


Processing audio:  69%|██████▉   | 1376/2000 [19:10<07:57,  1.31it/s]

torch.Size([1, 101760])


Processing audio:  69%|██████▉   | 1377/2000 [19:11<07:29,  1.39it/s]

torch.Size([1, 222880])


Processing audio:  69%|██████▉   | 1378/2000 [19:12<09:26,  1.10it/s]

torch.Size([1, 161120])


Processing audio:  69%|██████▉   | 1379/2000 [19:14<10:09,  1.02it/s]

torch.Size([1, 215520])


Processing audio:  69%|██████▉   | 1380/2000 [19:15<10:33,  1.02s/it]

torch.Size([1, 108800])


Processing audio:  69%|██████▉   | 1381/2000 [19:15<08:26,  1.22it/s]

torch.Size([1, 126080])


Processing audio:  69%|██████▉   | 1382/2000 [19:16<07:34,  1.36it/s]

torch.Size([1, 352960])


Processing audio:  69%|██████▉   | 1383/2000 [19:17<10:42,  1.04s/it]

torch.Size([1, 169600])


Processing audio:  69%|██████▉   | 1384/2000 [19:18<10:22,  1.01s/it]

torch.Size([1, 17120])
torch.Size([1, 28800])


Processing audio:  69%|██████▉   | 1386/2000 [19:19<06:25,  1.59it/s]

torch.Size([1, 76000])


Processing audio:  69%|██████▉   | 1387/2000 [19:19<05:54,  1.73it/s]

torch.Size([1, 373760])


Processing audio:  69%|██████▉   | 1388/2000 [19:21<08:40,  1.18it/s]

torch.Size([1, 108480])


Processing audio:  69%|██████▉   | 1389/2000 [19:21<07:35,  1.34it/s]

torch.Size([1, 113440])


Processing audio:  70%|██████▉   | 1390/2000 [19:21<06:16,  1.62it/s]

torch.Size([1, 73120])


Processing audio:  70%|██████▉   | 1391/2000 [19:22<05:48,  1.75it/s]

torch.Size([1, 113600])


Processing audio:  70%|██████▉   | 1392/2000 [19:22<05:43,  1.77it/s]

torch.Size([1, 240640])


Processing audio:  70%|██████▉   | 1393/2000 [19:24<08:30,  1.19it/s]

torch.Size([1, 129280])


Processing audio:  70%|██████▉   | 1394/2000 [19:25<08:27,  1.19it/s]

torch.Size([1, 51360])


Processing audio:  70%|██████▉   | 1395/2000 [19:25<07:11,  1.40it/s]

torch.Size([1, 23680])


Processing audio:  70%|██████▉   | 1396/2000 [19:25<05:49,  1.73it/s]

torch.Size([1, 103040])


Processing audio:  70%|██████▉   | 1397/2000 [19:26<06:11,  1.62it/s]

torch.Size([1, 822720])


Processing audio:  70%|██████▉   | 1398/2000 [19:32<23:10,  2.31s/it]

torch.Size([1, 235680])


Processing audio:  70%|██████▉   | 1399/2000 [19:33<19:13,  1.92s/it]

torch.Size([1, 97120])


Processing audio:  70%|███████   | 1400/2000 [19:34<14:59,  1.50s/it]

torch.Size([1, 312000])


Processing audio:  70%|███████   | 1401/2000 [19:35<13:21,  1.34s/it]

torch.Size([1, 324000])


Processing audio:  70%|███████   | 1403/2000 [19:36<09:47,  1.02it/s]

torch.Size([1, 22080])
torch.Size([1, 18560])


Processing audio:  70%|███████   | 1404/2000 [19:37<07:19,  1.36it/s]

torch.Size([1, 144320])


Processing audio:  70%|███████   | 1405/2000 [19:38<07:52,  1.26it/s]

torch.Size([1, 323840])


Processing audio:  70%|███████   | 1406/2000 [19:39<11:16,  1.14s/it]

torch.Size([1, 200640])


Processing audio:  70%|███████   | 1407/2000 [19:40<10:45,  1.09s/it]

torch.Size([1, 431040])


Processing audio:  70%|███████   | 1408/2000 [19:43<14:15,  1.45s/it]

torch.Size([1, 112960])


Processing audio:  70%|███████   | 1409/2000 [19:43<11:48,  1.20s/it]

torch.Size([1, 59840])


Processing audio:  71%|███████   | 1411/2000 [19:44<06:56,  1.41it/s]

torch.Size([1, 34560])
torch.Size([1, 107520])


Processing audio:  71%|███████   | 1412/2000 [19:44<06:36,  1.48it/s]

torch.Size([1, 193120])


Processing audio:  71%|███████   | 1413/2000 [19:45<07:43,  1.27it/s]

torch.Size([1, 146880])


Processing audio:  71%|███████   | 1414/2000 [19:46<07:41,  1.27it/s]

torch.Size([1, 123520])


Processing audio:  71%|███████   | 1415/2000 [19:47<07:09,  1.36it/s]

torch.Size([1, 124800])


Processing audio:  71%|███████   | 1416/2000 [19:47<06:38,  1.47it/s]

torch.Size([1, 125600])


Processing audio:  71%|███████   | 1417/2000 [19:48<05:47,  1.68it/s]

torch.Size([1, 54400])


Processing audio:  71%|███████   | 1419/2000 [19:48<03:38,  2.66it/s]

torch.Size([1, 20640])
torch.Size([1, 87520])


Processing audio:  71%|███████   | 1420/2000 [19:49<03:52,  2.49it/s]

torch.Size([1, 336640])


Processing audio:  71%|███████   | 1421/2000 [19:51<09:05,  1.06it/s]

torch.Size([1, 113760])


Processing audio:  71%|███████   | 1422/2000 [19:52<09:13,  1.05it/s]

torch.Size([1, 69440])


Processing audio:  71%|███████   | 1423/2000 [19:52<07:41,  1.25it/s]

torch.Size([1, 21920])
torch.Size([1, 97760])


Processing audio:  71%|███████▏  | 1425/2000 [19:53<05:27,  1.76it/s]

torch.Size([1, 63840])


Processing audio:  71%|███████▏  | 1426/2000 [19:53<04:59,  1.91it/s]

torch.Size([1, 272000])


Processing audio:  71%|███████▏  | 1427/2000 [19:54<06:42,  1.42it/s]

torch.Size([1, 214080])


Processing audio:  71%|███████▏  | 1429/2000 [19:56<06:08,  1.55it/s]

torch.Size([1, 17760])
torch.Size([1, 100000])


Processing audio:  72%|███████▏  | 1430/2000 [19:56<06:02,  1.57it/s]

torch.Size([1, 116160])


Processing audio:  72%|███████▏  | 1431/2000 [19:57<05:32,  1.71it/s]

torch.Size([1, 213760])


Processing audio:  72%|███████▏  | 1432/2000 [19:58<07:04,  1.34it/s]

torch.Size([1, 17120])
torch.Size([1, 148320])


Processing audio:  72%|███████▏  | 1434/2000 [19:59<05:53,  1.60it/s]

torch.Size([1, 159520])


Processing audio:  72%|███████▏  | 1435/2000 [20:00<06:14,  1.51it/s]

torch.Size([1, 67360])


Processing audio:  72%|███████▏  | 1436/2000 [20:00<05:31,  1.70it/s]

torch.Size([1, 96480])


Processing audio:  72%|███████▏  | 1437/2000 [20:01<05:22,  1.75it/s]

torch.Size([1, 134400])


Processing audio:  72%|███████▏  | 1438/2000 [20:01<05:44,  1.63it/s]

torch.Size([1, 153440])


Processing audio:  72%|███████▏  | 1439/2000 [20:02<06:03,  1.54it/s]

torch.Size([1, 108800])


Processing audio:  72%|███████▏  | 1440/2000 [20:03<07:00,  1.33it/s]

torch.Size([1, 72960])


Processing audio:  72%|███████▏  | 1441/2000 [20:04<06:56,  1.34it/s]

torch.Size([1, 37120])


Processing audio:  72%|███████▏  | 1442/2000 [20:04<05:32,  1.68it/s]

torch.Size([1, 114560])


Processing audio:  72%|███████▏  | 1443/2000 [20:05<05:43,  1.62it/s]

torch.Size([1, 65120])


Processing audio:  72%|███████▏  | 1444/2000 [20:05<05:26,  1.70it/s]

torch.Size([1, 90400])


Processing audio:  72%|███████▏  | 1445/2000 [20:06<05:04,  1.82it/s]

torch.Size([1, 103680])


Processing audio:  72%|███████▏  | 1446/2000 [20:06<05:08,  1.79it/s]

torch.Size([1, 123200])


Processing audio:  72%|███████▏  | 1447/2000 [20:07<05:03,  1.83it/s]

torch.Size([1, 127040])


Processing audio:  72%|███████▏  | 1448/2000 [20:07<05:12,  1.77it/s]

torch.Size([1, 98400])


Processing audio:  72%|███████▏  | 1449/2000 [20:08<05:18,  1.73it/s]

torch.Size([1, 114560])


Processing audio:  72%|███████▎  | 1450/2000 [20:09<05:27,  1.68it/s]

torch.Size([1, 141440])


Processing audio:  73%|███████▎  | 1451/2000 [20:09<05:50,  1.57it/s]

torch.Size([1, 100960])


Processing audio:  73%|███████▎  | 1452/2000 [20:10<05:29,  1.66it/s]

torch.Size([1, 134880])


Processing audio:  73%|███████▎  | 1454/2000 [20:11<04:38,  1.96it/s]

torch.Size([1, 26080])
torch.Size([1, 301120])


Processing audio:  73%|███████▎  | 1455/2000 [20:12<07:13,  1.26it/s]

torch.Size([1, 138720])


Processing audio:  73%|███████▎  | 1456/2000 [20:13<06:51,  1.32it/s]

torch.Size([1, 43040])


Processing audio:  73%|███████▎  | 1457/2000 [20:13<05:24,  1.67it/s]

torch.Size([1, 156800])


Processing audio:  73%|███████▎  | 1458/2000 [20:14<05:58,  1.51it/s]

torch.Size([1, 505920])


Processing audio:  73%|███████▎  | 1459/2000 [20:17<12:22,  1.37s/it]

torch.Size([1, 71680])


Processing audio:  73%|███████▎  | 1460/2000 [20:18<09:55,  1.10s/it]

torch.Size([1, 180960])


Processing audio:  73%|███████▎  | 1461/2000 [20:19<10:15,  1.14s/it]

torch.Size([1, 165920])


Processing audio:  73%|███████▎  | 1462/2000 [20:20<09:36,  1.07s/it]

torch.Size([1, 106400])


Processing audio:  73%|███████▎  | 1463/2000 [20:20<08:09,  1.10it/s]

torch.Size([1, 90880])


Processing audio:  73%|███████▎  | 1464/2000 [20:21<07:23,  1.21it/s]

torch.Size([1, 198080])


Processing audio:  73%|███████▎  | 1465/2000 [20:22<08:09,  1.09it/s]

torch.Size([1, 269920])


Processing audio:  73%|███████▎  | 1466/2000 [20:23<09:31,  1.07s/it]

torch.Size([1, 86560])


Processing audio:  73%|███████▎  | 1467/2000 [20:24<07:56,  1.12it/s]

torch.Size([1, 211360])


Processing audio:  73%|███████▎  | 1468/2000 [20:25<08:39,  1.02it/s]

torch.Size([1, 17120])


Processing audio:  73%|███████▎  | 1469/2000 [20:30<20:19,  2.30s/it]

torch.Size([1, 175360])


Processing audio:  74%|███████▎  | 1470/2000 [20:32<17:32,  1.99s/it]

torch.Size([1, 176960])


Processing audio:  74%|███████▎  | 1471/2000 [20:32<14:24,  1.63s/it]

torch.Size([1, 267680])


Processing audio:  74%|███████▎  | 1472/2000 [20:34<13:20,  1.52s/it]

torch.Size([1, 325120])


Processing audio:  74%|███████▎  | 1473/2000 [20:35<12:53,  1.47s/it]

torch.Size([1, 47520])


Processing audio:  74%|███████▎  | 1474/2000 [20:35<09:46,  1.11s/it]

torch.Size([1, 691680])


Processing audio:  74%|███████▍  | 1475/2000 [20:39<16:48,  1.92s/it]

torch.Size([1, 104320])


Processing audio:  74%|███████▍  | 1476/2000 [20:40<13:24,  1.53s/it]

torch.Size([1, 70080])


Processing audio:  74%|███████▍  | 1477/2000 [20:40<10:23,  1.19s/it]

torch.Size([1, 151040])


Processing audio:  74%|███████▍  | 1478/2000 [20:41<09:31,  1.10s/it]

torch.Size([1, 369600])


Processing audio:  74%|███████▍  | 1479/2000 [20:43<12:03,  1.39s/it]

torch.Size([1, 118240])


Processing audio:  74%|███████▍  | 1480/2000 [20:44<10:32,  1.22s/it]

torch.Size([1, 88800])


Processing audio:  74%|███████▍  | 1481/2000 [20:45<09:09,  1.06s/it]

torch.Size([1, 363840])


Processing audio:  74%|███████▍  | 1482/2000 [20:47<11:23,  1.32s/it]

torch.Size([1, 144160])


Processing audio:  74%|███████▍  | 1483/2000 [20:47<10:02,  1.16s/it]

torch.Size([1, 325440])


Processing audio:  74%|███████▍  | 1484/2000 [20:49<11:18,  1.32s/it]

torch.Size([1, 113920])


Processing audio:  74%|███████▍  | 1485/2000 [20:50<09:21,  1.09s/it]

torch.Size([1, 37280])


Processing audio:  74%|███████▍  | 1486/2000 [20:50<07:07,  1.20it/s]

torch.Size([1, 148960])


Processing audio:  74%|███████▍  | 1488/2000 [20:51<05:29,  1.55it/s]

torch.Size([1, 23360])
torch.Size([1, 145120])


Processing audio:  74%|███████▍  | 1489/2000 [20:52<05:42,  1.49it/s]

torch.Size([1, 62560])


Processing audio:  74%|███████▍  | 1490/2000 [20:52<05:14,  1.62it/s]

torch.Size([1, 175040])


Processing audio:  75%|███████▍  | 1491/2000 [20:53<05:50,  1.45it/s]

torch.Size([1, 166720])


Processing audio:  75%|███████▍  | 1492/2000 [20:54<06:06,  1.39it/s]

torch.Size([1, 90080])


Processing audio:  75%|███████▍  | 1493/2000 [20:54<05:28,  1.54it/s]

torch.Size([1, 132160])


Processing audio:  75%|███████▍  | 1494/2000 [20:55<05:47,  1.46it/s]

torch.Size([1, 131360])


Processing audio:  75%|███████▍  | 1495/2000 [20:56<06:00,  1.40it/s]

torch.Size([1, 22720])


Processing audio:  75%|███████▍  | 1496/2000 [20:56<04:45,  1.77it/s]

torch.Size([1, 183040])


Processing audio:  75%|███████▍  | 1497/2000 [20:57<06:20,  1.32it/s]

torch.Size([1, 51200])


Processing audio:  75%|███████▍  | 1498/2000 [20:58<05:46,  1.45it/s]

torch.Size([1, 118080])


Processing audio:  75%|███████▍  | 1499/2000 [20:58<05:40,  1.47it/s]

torch.Size([1, 169120])


Processing audio:  75%|███████▌  | 1500/2000 [20:59<06:11,  1.35it/s]

torch.Size([1, 162880])


Processing audio:  75%|███████▌  | 1501/2000 [21:00<06:16,  1.33it/s]

torch.Size([1, 97920])


Processing audio:  75%|███████▌  | 1502/2000 [21:01<05:49,  1.42it/s]

torch.Size([1, 37280])


Processing audio:  75%|███████▌  | 1503/2000 [21:01<04:43,  1.75it/s]

torch.Size([1, 175360])


Processing audio:  75%|███████▌  | 1505/2000 [21:02<04:27,  1.85it/s]

torch.Size([1, 36640])
torch.Size([1, 99040])


Processing audio:  75%|███████▌  | 1506/2000 [21:03<04:24,  1.87it/s]

torch.Size([1, 83040])


Processing audio:  75%|███████▌  | 1507/2000 [21:03<04:15,  1.93it/s]

torch.Size([1, 73120])


Processing audio:  75%|███████▌  | 1508/2000 [21:04<04:05,  2.00it/s]

torch.Size([1, 151360])


Processing audio:  75%|███████▌  | 1509/2000 [21:04<04:41,  1.74it/s]

torch.Size([1, 139200])


Processing audio:  76%|███████▌  | 1510/2000 [21:05<05:07,  1.59it/s]

torch.Size([1, 179040])


Processing audio:  76%|███████▌  | 1511/2000 [21:06<05:40,  1.44it/s]

torch.Size([1, 158400])


Processing audio:  76%|███████▌  | 1512/2000 [21:07<05:53,  1.38it/s]

torch.Size([1, 108960])


Processing audio:  76%|███████▌  | 1513/2000 [21:07<05:34,  1.45it/s]

torch.Size([1, 200640])


Processing audio:  76%|███████▌  | 1514/2000 [21:08<06:38,  1.22it/s]

torch.Size([1, 237280])


Processing audio:  76%|███████▌  | 1515/2000 [21:10<08:28,  1.05s/it]

torch.Size([1, 180320])


Processing audio:  76%|███████▌  | 1516/2000 [21:11<07:58,  1.01it/s]

torch.Size([1, 124480])


Processing audio:  76%|███████▌  | 1517/2000 [21:11<06:54,  1.17it/s]

torch.Size([1, 456160])


Processing audio:  76%|███████▌  | 1518/2000 [21:13<09:29,  1.18s/it]

torch.Size([1, 134240])


Processing audio:  76%|███████▌  | 1519/2000 [21:14<08:12,  1.02s/it]

torch.Size([1, 221120])


Processing audio:  76%|███████▌  | 1520/2000 [21:15<08:18,  1.04s/it]

torch.Size([1, 213920])


Processing audio:  76%|███████▌  | 1521/2000 [21:16<07:52,  1.01it/s]

torch.Size([1, 296800])


Processing audio:  76%|███████▌  | 1522/2000 [21:17<08:51,  1.11s/it]

torch.Size([1, 606080])


Processing audio:  76%|███████▌  | 1523/2000 [21:23<19:02,  2.40s/it]

torch.Size([1, 295200])


Processing audio:  76%|███████▌  | 1524/2000 [21:25<17:34,  2.21s/it]

torch.Size([1, 230240])


Processing audio:  76%|███████▋  | 1525/2000 [21:26<15:37,  1.97s/it]

torch.Size([1, 131360])


Processing audio:  76%|███████▋  | 1526/2000 [21:27<12:35,  1.59s/it]

torch.Size([1, 118880])


Processing audio:  76%|███████▋  | 1527/2000 [21:27<10:27,  1.33s/it]

torch.Size([1, 85600])


Processing audio:  76%|███████▋  | 1528/2000 [21:28<08:32,  1.09s/it]

torch.Size([1, 135200])


Processing audio:  76%|███████▋  | 1529/2000 [21:29<07:56,  1.01s/it]

torch.Size([1, 243840])


Processing audio:  76%|███████▋  | 1530/2000 [21:30<08:12,  1.05s/it]

torch.Size([1, 69440])


Processing audio:  77%|███████▋  | 1531/2000 [21:30<06:44,  1.16it/s]

torch.Size([1, 157440])


Processing audio:  77%|███████▋  | 1532/2000 [21:31<06:29,  1.20it/s]

torch.Size([1, 116160])


Processing audio:  77%|███████▋  | 1533/2000 [21:32<05:53,  1.32it/s]

torch.Size([1, 127040])


Processing audio:  77%|███████▋  | 1534/2000 [21:32<05:42,  1.36it/s]

torch.Size([1, 137600])


Processing audio:  77%|███████▋  | 1535/2000 [21:33<05:47,  1.34it/s]

torch.Size([1, 306400])


Processing audio:  77%|███████▋  | 1536/2000 [21:35<07:26,  1.04it/s]

torch.Size([1, 102560])


Processing audio:  77%|███████▋  | 1537/2000 [21:35<06:50,  1.13it/s]

torch.Size([1, 141760])


Processing audio:  77%|███████▋  | 1538/2000 [21:36<06:44,  1.14it/s]

torch.Size([1, 59200])


Processing audio:  77%|███████▋  | 1539/2000 [21:37<06:05,  1.26it/s]

torch.Size([1, 42240])


Processing audio:  77%|███████▋  | 1540/2000 [21:37<05:09,  1.49it/s]

torch.Size([1, 149600])


Processing audio:  77%|███████▋  | 1541/2000 [21:38<05:19,  1.43it/s]

torch.Size([1, 22080])


Processing audio:  77%|███████▋  | 1542/2000 [21:38<04:12,  1.82it/s]

torch.Size([1, 203520])


Processing audio:  77%|███████▋  | 1543/2000 [21:39<05:32,  1.37it/s]

torch.Size([1, 257120])


Processing audio:  77%|███████▋  | 1544/2000 [21:40<06:20,  1.20it/s]

torch.Size([1, 67520])


Processing audio:  77%|███████▋  | 1545/2000 [21:41<05:33,  1.37it/s]

torch.Size([1, 132480])


Processing audio:  77%|███████▋  | 1546/2000 [21:41<05:16,  1.43it/s]

torch.Size([1, 172480])


Processing audio:  77%|███████▋  | 1547/2000 [21:42<05:21,  1.41it/s]

torch.Size([1, 184800])


Processing audio:  77%|███████▋  | 1548/2000 [21:43<05:52,  1.28it/s]

torch.Size([1, 171840])


Processing audio:  77%|███████▋  | 1549/2000 [21:44<06:09,  1.22it/s]

torch.Size([1, 102080])


Processing audio:  78%|███████▊  | 1550/2000 [21:45<05:29,  1.37it/s]

torch.Size([1, 61600])


Processing audio:  78%|███████▊  | 1551/2000 [21:45<04:47,  1.56it/s]

torch.Size([1, 52160])


Processing audio:  78%|███████▊  | 1552/2000 [21:45<03:55,  1.90it/s]

torch.Size([1, 120960])


Processing audio:  78%|███████▊  | 1553/2000 [21:46<04:01,  1.85it/s]

torch.Size([1, 37120])


Processing audio:  78%|███████▊  | 1554/2000 [21:46<03:21,  2.21it/s]

torch.Size([1, 211200])


Processing audio:  78%|███████▊  | 1555/2000 [21:47<04:13,  1.75it/s]

torch.Size([1, 189920])


Processing audio:  78%|███████▊  | 1556/2000 [21:49<06:42,  1.10it/s]

torch.Size([1, 735680])


Processing audio:  78%|███████▊  | 1557/2000 [21:53<14:09,  1.92s/it]

torch.Size([1, 114560])


Processing audio:  78%|███████▊  | 1558/2000 [21:53<11:17,  1.53s/it]

torch.Size([1, 328160])


Processing audio:  78%|███████▊  | 1559/2000 [21:55<11:11,  1.52s/it]

torch.Size([1, 84800])


Processing audio:  78%|███████▊  | 1560/2000 [21:55<08:50,  1.20s/it]

torch.Size([1, 129760])


Processing audio:  78%|███████▊  | 1561/2000 [21:56<07:33,  1.03s/it]

torch.Size([1, 305920])


Processing audio:  78%|███████▊  | 1562/2000 [21:57<07:42,  1.06s/it]

torch.Size([1, 48320])


Processing audio:  78%|███████▊  | 1563/2000 [21:58<06:13,  1.17it/s]

torch.Size([1, 61920])


Processing audio:  78%|███████▊  | 1564/2000 [21:58<05:15,  1.38it/s]

torch.Size([1, 166240])


Processing audio:  78%|███████▊  | 1565/2000 [21:59<05:08,  1.41it/s]

torch.Size([1, 140160])


Processing audio:  78%|███████▊  | 1566/2000 [21:59<05:26,  1.33it/s]

torch.Size([1, 137440])


Processing audio:  78%|███████▊  | 1567/2000 [22:00<05:31,  1.31it/s]

torch.Size([1, 208800])


Processing audio:  78%|███████▊  | 1568/2000 [22:02<06:59,  1.03it/s]

torch.Size([1, 437600])


Processing audio:  78%|███████▊  | 1569/2000 [22:04<09:56,  1.38s/it]

torch.Size([1, 212960])


Processing audio:  78%|███████▊  | 1570/2000 [22:05<09:19,  1.30s/it]

torch.Size([1, 582080])


Processing audio:  79%|███████▊  | 1571/2000 [22:08<12:50,  1.79s/it]

torch.Size([1, 142400])


Processing audio:  79%|███████▊  | 1572/2000 [22:09<10:39,  1.49s/it]

torch.Size([1, 48000])


Processing audio:  79%|███████▊  | 1573/2000 [22:09<08:06,  1.14s/it]

torch.Size([1, 431360])


Processing audio:  79%|███████▊  | 1574/2000 [22:11<10:07,  1.43s/it]

torch.Size([1, 61600])


Processing audio:  79%|███████▉  | 1575/2000 [22:12<07:52,  1.11s/it]

torch.Size([1, 122560])


Processing audio:  79%|███████▉  | 1576/2000 [22:12<06:38,  1.07it/s]

torch.Size([1, 50080])


Processing audio:  79%|███████▉  | 1577/2000 [22:13<05:18,  1.33it/s]

torch.Size([1, 60160])


Processing audio:  79%|███████▉  | 1578/2000 [22:13<04:13,  1.67it/s]

torch.Size([1, 145600])


Processing audio:  79%|███████▉  | 1579/2000 [22:14<05:03,  1.39it/s]

torch.Size([1, 164960])


Processing audio:  79%|███████▉  | 1580/2000 [22:15<06:11,  1.13it/s]

torch.Size([1, 247360])


Processing audio:  79%|███████▉  | 1581/2000 [22:17<07:37,  1.09s/it]

torch.Size([1, 91040])


Processing audio:  79%|███████▉  | 1582/2000 [22:17<06:28,  1.08it/s]

torch.Size([1, 98240])


Processing audio:  79%|███████▉  | 1583/2000 [22:18<05:39,  1.23it/s]

torch.Size([1, 262080])


Processing audio:  79%|███████▉  | 1585/2000 [22:19<04:41,  1.47it/s]

torch.Size([1, 21600])
torch.Size([1, 148800])


Processing audio:  79%|███████▉  | 1586/2000 [22:20<04:53,  1.41it/s]

torch.Size([1, 80160])


Processing audio:  79%|███████▉  | 1587/2000 [22:20<04:24,  1.56it/s]

torch.Size([1, 74880])


Processing audio:  79%|███████▉  | 1588/2000 [22:21<03:53,  1.76it/s]

torch.Size([1, 172800])


Processing audio:  79%|███████▉  | 1589/2000 [22:22<04:26,  1.54it/s]

torch.Size([1, 145600])


Processing audio:  80%|███████▉  | 1590/2000 [22:22<04:36,  1.49it/s]

torch.Size([1, 89280])


Processing audio:  80%|███████▉  | 1591/2000 [22:23<04:08,  1.65it/s]

torch.Size([1, 115680])


Processing audio:  80%|███████▉  | 1592/2000 [22:23<04:05,  1.66it/s]

torch.Size([1, 91520])


Processing audio:  80%|███████▉  | 1593/2000 [22:24<03:44,  1.81it/s]

torch.Size([1, 209280])


Processing audio:  80%|███████▉  | 1594/2000 [22:25<04:48,  1.41it/s]

torch.Size([1, 151040])


Processing audio:  80%|███████▉  | 1595/2000 [22:26<04:58,  1.36it/s]

torch.Size([1, 87360])


Processing audio:  80%|███████▉  | 1596/2000 [22:26<04:29,  1.50it/s]

torch.Size([1, 194400])


Processing audio:  80%|███████▉  | 1597/2000 [22:28<05:58,  1.13it/s]

torch.Size([1, 57120])


Processing audio:  80%|███████▉  | 1598/2000 [22:28<05:08,  1.30it/s]

torch.Size([1, 86240])


Processing audio:  80%|███████▉  | 1599/2000 [22:29<04:53,  1.37it/s]

torch.Size([1, 270080])


Processing audio:  80%|████████  | 1600/2000 [22:30<06:28,  1.03it/s]

torch.Size([1, 181760])


Processing audio:  80%|████████  | 1601/2000 [22:31<06:45,  1.02s/it]

torch.Size([1, 110720])


Processing audio:  80%|████████  | 1602/2000 [22:32<05:59,  1.11it/s]

torch.Size([1, 21440])


Processing audio:  80%|████████  | 1603/2000 [22:32<04:35,  1.44it/s]

torch.Size([1, 141920])


Processing audio:  80%|████████  | 1604/2000 [22:33<04:36,  1.43it/s]

torch.Size([1, 321280])


Processing audio:  80%|████████  | 1605/2000 [22:34<06:11,  1.06it/s]

torch.Size([1, 160480])


Processing audio:  80%|████████  | 1606/2000 [22:35<06:10,  1.06it/s]

torch.Size([1, 181440])


Processing audio:  80%|████████  | 1607/2000 [22:36<05:57,  1.10it/s]

torch.Size([1, 50400])


Processing audio:  80%|████████  | 1608/2000 [22:37<04:58,  1.31it/s]

torch.Size([1, 69280])


Processing audio:  80%|████████  | 1609/2000 [22:37<04:14,  1.54it/s]

torch.Size([1, 160160])


Processing audio:  80%|████████  | 1610/2000 [22:38<04:51,  1.34it/s]

torch.Size([1, 85120])


Processing audio:  81%|████████  | 1611/2000 [22:38<04:24,  1.47it/s]

torch.Size([1, 100640])


Processing audio:  81%|████████  | 1612/2000 [22:39<04:04,  1.58it/s]

torch.Size([1, 35680])


Processing audio:  81%|████████  | 1613/2000 [22:39<03:18,  1.95it/s]

torch.Size([1, 200320])


Processing audio:  81%|████████  | 1614/2000 [22:41<05:21,  1.20it/s]

torch.Size([1, 139040])


Processing audio:  81%|████████  | 1615/2000 [22:42<05:24,  1.19it/s]

torch.Size([1, 67040])


Processing audio:  81%|████████  | 1616/2000 [22:42<05:00,  1.28it/s]

torch.Size([1, 245120])


Processing audio:  81%|████████  | 1617/2000 [22:44<05:58,  1.07it/s]

torch.Size([1, 44480])


Processing audio:  81%|████████  | 1618/2000 [22:44<04:44,  1.34it/s]

torch.Size([1, 131360])


Processing audio:  81%|████████  | 1619/2000 [22:45<04:40,  1.36it/s]

torch.Size([1, 207040])


Processing audio:  81%|████████  | 1620/2000 [22:45<04:45,  1.33it/s]

torch.Size([1, 223520])


Processing audio:  81%|████████  | 1621/2000 [22:47<05:39,  1.12it/s]

torch.Size([1, 108480])


Processing audio:  81%|████████  | 1622/2000 [22:47<05:01,  1.25it/s]

torch.Size([1, 84320])


Processing audio:  81%|████████  | 1623/2000 [22:48<04:26,  1.41it/s]

torch.Size([1, 123840])


Processing audio:  81%|████████  | 1624/2000 [22:48<04:32,  1.38it/s]

torch.Size([1, 179360])


Processing audio:  81%|████████▏ | 1625/2000 [22:49<04:53,  1.28it/s]

torch.Size([1, 104960])


Processing audio:  81%|████████▏ | 1626/2000 [22:50<04:34,  1.36it/s]

torch.Size([1, 89280])


Processing audio:  81%|████████▏ | 1627/2000 [22:51<04:19,  1.44it/s]

torch.Size([1, 215040])


Processing audio:  81%|████████▏ | 1628/2000 [22:51<04:40,  1.32it/s]

torch.Size([1, 140640])


Processing audio:  81%|████████▏ | 1629/2000 [22:52<04:30,  1.37it/s]

torch.Size([1, 80960])


Processing audio:  82%|████████▏ | 1630/2000 [22:53<03:51,  1.60it/s]

torch.Size([1, 349760])


Processing audio:  82%|████████▏ | 1631/2000 [22:55<07:24,  1.20s/it]

torch.Size([1, 144000])


Processing audio:  82%|████████▏ | 1632/2000 [22:56<07:04,  1.15s/it]

torch.Size([1, 98400])


Processing audio:  82%|████████▏ | 1633/2000 [22:56<05:35,  1.09it/s]

torch.Size([1, 155200])


Processing audio:  82%|████████▏ | 1634/2000 [22:57<05:26,  1.12it/s]

torch.Size([1, 113280])


Processing audio:  82%|████████▏ | 1635/2000 [22:58<04:43,  1.29it/s]

torch.Size([1, 109760])


Processing audio:  82%|████████▏ | 1636/2000 [22:58<04:23,  1.38it/s]

torch.Size([1, 110400])


Processing audio:  82%|████████▏ | 1637/2000 [22:59<04:08,  1.46it/s]

torch.Size([1, 269600])


Processing audio:  82%|████████▏ | 1638/2000 [23:00<05:13,  1.16it/s]

torch.Size([1, 236640])


Processing audio:  82%|████████▏ | 1639/2000 [23:01<05:34,  1.08it/s]

torch.Size([1, 105120])


Processing audio:  82%|████████▏ | 1640/2000 [23:02<04:52,  1.23it/s]

torch.Size([1, 143840])


Processing audio:  82%|████████▏ | 1641/2000 [23:03<04:34,  1.31it/s]

torch.Size([1, 180640])


Processing audio:  82%|████████▏ | 1642/2000 [23:04<04:50,  1.23it/s]

torch.Size([1, 239680])


Processing audio:  82%|████████▏ | 1644/2000 [23:05<04:03,  1.46it/s]

torch.Size([1, 27360])
torch.Size([1, 136960])


Processing audio:  82%|████████▏ | 1645/2000 [23:05<04:04,  1.45it/s]

torch.Size([1, 125760])


Processing audio:  82%|████████▏ | 1646/2000 [23:06<04:28,  1.32it/s]

torch.Size([1, 170400])


Processing audio:  82%|████████▏ | 1647/2000 [23:07<04:53,  1.20it/s]

torch.Size([1, 66080])


Processing audio:  82%|████████▏ | 1648/2000 [23:08<04:33,  1.29it/s]

torch.Size([1, 99520])


Processing audio:  82%|████████▏ | 1649/2000 [23:09<04:19,  1.35it/s]

torch.Size([1, 140800])


Processing audio:  82%|████████▎ | 1650/2000 [23:10<04:26,  1.31it/s]

torch.Size([1, 156640])


Processing audio:  83%|████████▎ | 1651/2000 [23:10<04:35,  1.27it/s]

torch.Size([1, 62560])


Processing audio:  83%|████████▎ | 1652/2000 [23:11<03:47,  1.53it/s]

torch.Size([1, 54560])


Processing audio:  83%|████████▎ | 1653/2000 [23:11<03:12,  1.81it/s]

torch.Size([1, 168640])


Processing audio:  83%|████████▎ | 1654/2000 [23:12<03:49,  1.51it/s]

torch.Size([1, 242880])


Processing audio:  83%|████████▎ | 1655/2000 [23:13<04:52,  1.18it/s]

torch.Size([1, 208320])


Processing audio:  83%|████████▎ | 1656/2000 [23:14<05:09,  1.11it/s]

torch.Size([1, 89120])


Processing audio:  83%|████████▎ | 1657/2000 [23:15<04:23,  1.30it/s]

torch.Size([1, 55520])


Processing audio:  83%|████████▎ | 1658/2000 [23:15<03:32,  1.61it/s]

torch.Size([1, 233120])


Processing audio:  83%|████████▎ | 1659/2000 [23:16<04:32,  1.25it/s]

torch.Size([1, 108320])


Processing audio:  83%|████████▎ | 1660/2000 [23:17<04:03,  1.39it/s]

torch.Size([1, 195520])


Processing audio:  83%|████████▎ | 1661/2000 [23:18<04:42,  1.20it/s]

torch.Size([1, 67520])


Processing audio:  83%|████████▎ | 1662/2000 [23:18<03:54,  1.44it/s]

torch.Size([1, 190720])


Processing audio:  83%|████████▎ | 1663/2000 [23:19<04:03,  1.38it/s]

torch.Size([1, 88960])


Processing audio:  83%|████████▎ | 1664/2000 [23:20<03:52,  1.44it/s]

torch.Size([1, 68640])


Processing audio:  83%|████████▎ | 1665/2000 [23:20<03:37,  1.54it/s]

torch.Size([1, 166080])


Processing audio:  83%|████████▎ | 1666/2000 [23:21<04:14,  1.31it/s]

torch.Size([1, 306240])


Processing audio:  83%|████████▎ | 1667/2000 [23:23<05:11,  1.07it/s]

torch.Size([1, 114880])


Processing audio:  83%|████████▎ | 1668/2000 [23:23<04:51,  1.14it/s]

torch.Size([1, 524480])


Processing audio:  83%|████████▎ | 1669/2000 [23:26<08:03,  1.46s/it]

torch.Size([1, 69920])


Processing audio:  84%|████████▎ | 1670/2000 [23:27<06:27,  1.17s/it]

torch.Size([1, 153440])


Processing audio:  84%|████████▎ | 1671/2000 [23:28<06:05,  1.11s/it]

torch.Size([1, 212640])


Processing audio:  84%|████████▎ | 1672/2000 [23:29<06:15,  1.15s/it]

torch.Size([1, 75680])


Processing audio:  84%|████████▎ | 1673/2000 [23:29<04:56,  1.10it/s]

torch.Size([1, 56160])


Processing audio:  84%|████████▎ | 1674/2000 [23:29<03:59,  1.36it/s]

torch.Size([1, 178560])


Processing audio:  84%|████████▍ | 1675/2000 [23:30<04:11,  1.29it/s]

torch.Size([1, 139040])


Processing audio:  84%|████████▍ | 1676/2000 [23:31<04:14,  1.27it/s]

torch.Size([1, 238240])


Processing audio:  84%|████████▍ | 1677/2000 [23:32<04:51,  1.11it/s]

torch.Size([1, 54400])


Processing audio:  84%|████████▍ | 1678/2000 [23:33<03:53,  1.38it/s]

torch.Size([1, 27200])


Processing audio:  84%|████████▍ | 1679/2000 [23:33<03:07,  1.72it/s]

torch.Size([1, 118240])


Processing audio:  84%|████████▍ | 1680/2000 [23:34<03:51,  1.38it/s]

torch.Size([1, 85440])


Processing audio:  84%|████████▍ | 1681/2000 [23:35<03:55,  1.36it/s]

torch.Size([1, 178560])


Processing audio:  84%|████████▍ | 1682/2000 [23:36<04:02,  1.31it/s]

torch.Size([1, 169760])


Processing audio:  84%|████████▍ | 1683/2000 [23:36<03:47,  1.39it/s]

torch.Size([1, 113280])


Processing audio:  84%|████████▍ | 1684/2000 [23:37<03:33,  1.48it/s]

torch.Size([1, 221280])


Processing audio:  84%|████████▍ | 1686/2000 [23:38<03:01,  1.73it/s]

torch.Size([1, 35200])
torch.Size([1, 72000])


Processing audio:  84%|████████▍ | 1688/2000 [23:38<02:16,  2.28it/s]

torch.Size([1, 57920])
torch.Size([1, 202720])


Processing audio:  84%|████████▍ | 1690/2000 [23:40<02:20,  2.20it/s]

torch.Size([1, 32320])
torch.Size([1, 22880])


Processing audio:  85%|████████▍ | 1691/2000 [23:40<01:58,  2.62it/s]

torch.Size([1, 215680])


Processing audio:  85%|████████▍ | 1692/2000 [23:41<02:58,  1.72it/s]

torch.Size([1, 310240])


Processing audio:  85%|████████▍ | 1693/2000 [23:42<04:16,  1.20it/s]

torch.Size([1, 305920])


Processing audio:  85%|████████▍ | 1694/2000 [23:44<05:11,  1.02s/it]

torch.Size([1, 81120])


Processing audio:  85%|████████▍ | 1695/2000 [23:44<04:16,  1.19it/s]

torch.Size([1, 83360])


Processing audio:  85%|████████▍ | 1696/2000 [23:45<03:42,  1.37it/s]

torch.Size([1, 128000])


Processing audio:  85%|████████▍ | 1697/2000 [23:45<03:39,  1.38it/s]

torch.Size([1, 82400])


Processing audio:  85%|████████▍ | 1698/2000 [23:46<03:20,  1.51it/s]

torch.Size([1, 404960])


Processing audio:  85%|████████▌ | 1700/2000 [23:48<04:22,  1.14it/s]

torch.Size([1, 30560])
torch.Size([1, 85280])


Processing audio:  85%|████████▌ | 1701/2000 [23:49<03:43,  1.34it/s]

torch.Size([1, 165920])


Processing audio:  85%|████████▌ | 1702/2000 [23:50<03:48,  1.31it/s]

torch.Size([1, 43680])


Processing audio:  85%|████████▌ | 1703/2000 [23:50<03:05,  1.60it/s]

torch.Size([1, 227200])


Processing audio:  85%|████████▌ | 1704/2000 [23:51<03:45,  1.31it/s]

torch.Size([1, 44160])


Processing audio:  85%|████████▌ | 1705/2000 [23:51<03:03,  1.61it/s]

torch.Size([1, 181120])


Processing audio:  85%|████████▌ | 1706/2000 [23:52<03:12,  1.53it/s]

torch.Size([1, 126080])


Processing audio:  85%|████████▌ | 1707/2000 [23:53<03:10,  1.54it/s]

torch.Size([1, 84800])


Processing audio:  85%|████████▌ | 1708/2000 [23:53<02:52,  1.70it/s]

torch.Size([1, 83520])


Processing audio:  85%|████████▌ | 1709/2000 [23:54<02:45,  1.76it/s]

torch.Size([1, 32320])
torch.Size([1, 132320])


Processing audio:  86%|████████▌ | 1712/2000 [23:54<01:52,  2.56it/s]

torch.Size([1, 11840])
torch.Size([1, 264000])


Processing audio:  86%|████████▌ | 1713/2000 [23:56<03:02,  1.58it/s]

torch.Size([1, 149440])


Processing audio:  86%|████████▌ | 1714/2000 [23:57<03:09,  1.51it/s]

torch.Size([1, 226880])


Processing audio:  86%|████████▌ | 1715/2000 [23:58<03:47,  1.25it/s]

torch.Size([1, 142240])


Processing audio:  86%|████████▌ | 1716/2000 [23:59<03:58,  1.19it/s]

torch.Size([1, 96480])


Processing audio:  86%|████████▌ | 1717/2000 [23:59<03:52,  1.22it/s]

torch.Size([1, 17120])


Processing audio:  86%|████████▌ | 1718/2000 [24:00<03:02,  1.54it/s]

torch.Size([1, 77600])


Processing audio:  86%|████████▌ | 1719/2000 [24:00<02:55,  1.60it/s]

torch.Size([1, 127040])


Processing audio:  86%|████████▌ | 1720/2000 [24:01<03:32,  1.32it/s]

torch.Size([1, 172000])


Processing audio:  86%|████████▌ | 1721/2000 [24:02<03:32,  1.31it/s]

torch.Size([1, 127360])


Processing audio:  86%|████████▌ | 1722/2000 [24:03<03:27,  1.34it/s]

torch.Size([1, 92000])


Processing audio:  86%|████████▌ | 1723/2000 [24:03<03:06,  1.48it/s]

torch.Size([1, 71200])


Processing audio:  86%|████████▋ | 1725/2000 [24:04<02:03,  2.23it/s]

torch.Size([1, 20640])
torch.Size([1, 604960])


Processing audio:  86%|████████▋ | 1726/2000 [24:09<08:42,  1.91s/it]

torch.Size([1, 67200])


Processing audio:  86%|████████▋ | 1727/2000 [24:09<06:35,  1.45s/it]

torch.Size([1, 117600])


Processing audio:  86%|████████▋ | 1728/2000 [24:10<05:21,  1.18s/it]

torch.Size([1, 123840])


Processing audio:  86%|████████▋ | 1729/2000 [24:11<04:43,  1.05s/it]

torch.Size([1, 87840])


Processing audio:  86%|████████▋ | 1730/2000 [24:11<04:05,  1.10it/s]

torch.Size([1, 109280])


Processing audio:  87%|████████▋ | 1731/2000 [24:12<03:58,  1.13it/s]

torch.Size([1, 170400])


Processing audio:  87%|████████▋ | 1732/2000 [24:13<04:22,  1.02it/s]

torch.Size([1, 222400])


Processing audio:  87%|████████▋ | 1733/2000 [24:15<05:11,  1.17s/it]

torch.Size([1, 211200])


Processing audio:  87%|████████▋ | 1734/2000 [24:16<04:54,  1.11s/it]

torch.Size([1, 51840])


Processing audio:  87%|████████▋ | 1736/2000 [24:16<02:49,  1.56it/s]

torch.Size([1, 43040])
torch.Size([1, 89920])


Processing audio:  87%|████████▋ | 1737/2000 [24:17<02:39,  1.65it/s]

torch.Size([1, 67360])


Processing audio:  87%|████████▋ | 1738/2000 [24:17<02:15,  1.93it/s]

torch.Size([1, 125760])


Processing audio:  87%|████████▋ | 1739/2000 [24:18<02:37,  1.66it/s]

torch.Size([1, 73920])


Processing audio:  87%|████████▋ | 1740/2000 [24:18<02:27,  1.76it/s]

torch.Size([1, 17920])
torch.Size([1, 57120])


Processing audio:  87%|████████▋ | 1742/2000 [24:19<01:44,  2.47it/s]

torch.Size([1, 213440])


Processing audio:  87%|████████▋ | 1743/2000 [24:20<02:21,  1.82it/s]

torch.Size([1, 124480])


Processing audio:  87%|████████▋ | 1744/2000 [24:21<02:34,  1.66it/s]

torch.Size([1, 179840])


Processing audio:  87%|████████▋ | 1745/2000 [24:22<02:51,  1.49it/s]

torch.Size([1, 18400])
torch.Size([1, 72480])


Processing audio:  87%|████████▋ | 1747/2000 [24:22<02:06,  2.00it/s]

torch.Size([1, 91040])


Processing audio:  87%|████████▋ | 1748/2000 [24:23<02:11,  1.92it/s]

torch.Size([1, 57600])


Processing audio:  87%|████████▋ | 1749/2000 [24:23<02:02,  2.05it/s]

torch.Size([1, 275520])


Processing audio:  88%|████████▊ | 1751/2000 [24:25<02:18,  1.80it/s]

torch.Size([1, 32480])
torch.Size([1, 97120])


Processing audio:  88%|████████▊ | 1752/2000 [24:25<02:36,  1.58it/s]

torch.Size([1, 23200])


Processing audio:  88%|████████▊ | 1753/2000 [24:26<02:12,  1.86it/s]

torch.Size([1, 127360])


Processing audio:  88%|████████▊ | 1754/2000 [24:26<02:30,  1.64it/s]

torch.Size([1, 126400])


Processing audio:  88%|████████▊ | 1755/2000 [24:27<02:56,  1.39it/s]

torch.Size([1, 460000])


Processing audio:  88%|████████▊ | 1756/2000 [24:30<04:35,  1.13s/it]

torch.Size([1, 65280])


Processing audio:  88%|████████▊ | 1757/2000 [24:30<03:34,  1.13it/s]

torch.Size([1, 68320])


Processing audio:  88%|████████▊ | 1758/2000 [24:30<02:54,  1.38it/s]

torch.Size([1, 142560])


Processing audio:  88%|████████▊ | 1759/2000 [24:31<02:50,  1.41it/s]

torch.Size([1, 61120])


Processing audio:  88%|████████▊ | 1760/2000 [24:31<02:24,  1.67it/s]

torch.Size([1, 216800])


Processing audio:  88%|████████▊ | 1761/2000 [24:32<02:55,  1.36it/s]

torch.Size([1, 62560])


Processing audio:  88%|████████▊ | 1762/2000 [24:33<02:30,  1.58it/s]

torch.Size([1, 106720])


Processing audio:  88%|████████▊ | 1763/2000 [24:33<02:19,  1.70it/s]

torch.Size([1, 155680])


Processing audio:  88%|████████▊ | 1764/2000 [24:34<02:28,  1.59it/s]

torch.Size([1, 99680])


Processing audio:  88%|████████▊ | 1765/2000 [24:34<02:24,  1.63it/s]

torch.Size([1, 259520])


Processing audio:  88%|████████▊ | 1766/2000 [24:36<03:18,  1.18it/s]

torch.Size([1, 52480])


Processing audio:  88%|████████▊ | 1767/2000 [24:36<02:44,  1.42it/s]

torch.Size([1, 151360])


Processing audio:  88%|████████▊ | 1768/2000 [24:37<02:47,  1.39it/s]

torch.Size([1, 137120])


Processing audio:  88%|████████▊ | 1769/2000 [24:37<02:29,  1.55it/s]

torch.Size([1, 43360])


Processing audio:  88%|████████▊ | 1770/2000 [24:38<02:07,  1.80it/s]

torch.Size([1, 83520])


Processing audio:  89%|████████▊ | 1771/2000 [24:39<02:22,  1.60it/s]

torch.Size([1, 180480])


Processing audio:  89%|████████▊ | 1772/2000 [24:40<02:46,  1.37it/s]

torch.Size([1, 124160])


Processing audio:  89%|████████▊ | 1773/2000 [24:40<03:02,  1.24it/s]

torch.Size([1, 143200])


Processing audio:  89%|████████▊ | 1774/2000 [24:41<02:53,  1.30it/s]

torch.Size([1, 193920])


Processing audio:  89%|████████▉ | 1775/2000 [24:42<03:10,  1.18it/s]

torch.Size([1, 207040])


Processing audio:  89%|████████▉ | 1776/2000 [24:43<03:25,  1.09it/s]

torch.Size([1, 72800])


Processing audio:  89%|████████▉ | 1777/2000 [24:44<02:45,  1.35it/s]

torch.Size([1, 109760])


Processing audio:  89%|████████▉ | 1778/2000 [24:44<02:33,  1.45it/s]

torch.Size([1, 97600])


Processing audio:  89%|████████▉ | 1779/2000 [24:45<02:27,  1.50it/s]

torch.Size([1, 49760])


Processing audio:  89%|████████▉ | 1780/2000 [24:45<01:58,  1.86it/s]

torch.Size([1, 199360])


Processing audio:  89%|████████▉ | 1781/2000 [24:46<02:13,  1.64it/s]

torch.Size([1, 135680])


Processing audio:  89%|████████▉ | 1782/2000 [24:47<02:27,  1.48it/s]

torch.Size([1, 154720])


Processing audio:  89%|████████▉ | 1783/2000 [24:47<02:27,  1.47it/s]

torch.Size([1, 194720])


Processing audio:  89%|████████▉ | 1784/2000 [24:48<02:49,  1.27it/s]

torch.Size([1, 153600])


Processing audio:  89%|████████▉ | 1785/2000 [24:49<02:45,  1.30it/s]

torch.Size([1, 67360])


Processing audio:  89%|████████▉ | 1786/2000 [24:50<02:23,  1.49it/s]

torch.Size([1, 120320])


Processing audio:  89%|████████▉ | 1787/2000 [24:50<02:23,  1.49it/s]

torch.Size([1, 66560])


Processing audio:  89%|████████▉ | 1788/2000 [24:51<02:02,  1.73it/s]

torch.Size([1, 139840])


Processing audio:  89%|████████▉ | 1789/2000 [24:52<02:27,  1.43it/s]

torch.Size([1, 236480])


Processing audio:  90%|████████▉ | 1791/2000 [24:53<02:24,  1.44it/s]

torch.Size([1, 32320])
torch.Size([1, 192640])


Processing audio:  90%|████████▉ | 1792/2000 [24:54<02:56,  1.18it/s]

torch.Size([1, 147840])


Processing audio:  90%|████████▉ | 1793/2000 [24:55<02:56,  1.17it/s]

torch.Size([1, 112640])


Processing audio:  90%|████████▉ | 1794/2000 [24:56<02:46,  1.24it/s]

torch.Size([1, 281600])


Processing audio:  90%|████████▉ | 1795/2000 [24:57<03:13,  1.06it/s]

torch.Size([1, 170400])


Processing audio:  90%|████████▉ | 1797/2000 [24:58<02:21,  1.43it/s]

torch.Size([1, 20160])
torch.Size([1, 285600])


Processing audio:  90%|████████▉ | 1798/2000 [25:00<03:05,  1.09it/s]

torch.Size([1, 267680])


Processing audio:  90%|████████▉ | 1799/2000 [25:01<03:25,  1.02s/it]

torch.Size([1, 184480])


Processing audio:  90%|█████████ | 1800/2000 [25:02<03:16,  1.02it/s]

torch.Size([1, 171360])


Processing audio:  90%|█████████ | 1801/2000 [25:03<03:03,  1.09it/s]

torch.Size([1, 215360])


Processing audio:  90%|█████████ | 1802/2000 [25:03<02:53,  1.14it/s]

torch.Size([1, 66240])


Processing audio:  90%|█████████ | 1803/2000 [25:04<02:30,  1.31it/s]

torch.Size([1, 131840])


Processing audio:  90%|█████████ | 1804/2000 [25:05<02:32,  1.29it/s]

torch.Size([1, 159680])


Processing audio:  90%|█████████ | 1805/2000 [25:06<02:51,  1.13it/s]

torch.Size([1, 308640])


Processing audio:  90%|█████████ | 1806/2000 [25:11<06:54,  2.14s/it]

torch.Size([1, 352960])


Processing audio:  90%|█████████ | 1807/2000 [25:12<06:06,  1.90s/it]

torch.Size([1, 83520])


Processing audio:  90%|█████████ | 1808/2000 [25:13<04:43,  1.48s/it]

torch.Size([1, 57760])


Processing audio:  90%|█████████ | 1809/2000 [25:13<03:42,  1.16s/it]

torch.Size([1, 207200])


Processing audio:  90%|█████████ | 1810/2000 [25:14<03:32,  1.12s/it]

torch.Size([1, 185760])


Processing audio:  91%|█████████ | 1811/2000 [25:15<03:27,  1.10s/it]

torch.Size([1, 137440])


Processing audio:  91%|█████████ | 1812/2000 [25:16<02:58,  1.05it/s]

torch.Size([1, 203200])


Processing audio:  91%|█████████ | 1813/2000 [25:17<03:05,  1.01it/s]

torch.Size([1, 475840])


Processing audio:  91%|█████████ | 1814/2000 [25:20<05:07,  1.66s/it]

torch.Size([1, 88960])


Processing audio:  91%|█████████ | 1816/2000 [25:21<03:02,  1.01it/s]

torch.Size([1, 25120])
torch.Size([1, 148320])


Processing audio:  91%|█████████ | 1817/2000 [25:21<02:24,  1.26it/s]

torch.Size([1, 97440])


Processing audio:  91%|█████████ | 1818/2000 [25:22<02:08,  1.42it/s]

torch.Size([1, 130400])


Processing audio:  91%|█████████ | 1819/2000 [25:23<02:11,  1.38it/s]

torch.Size([1, 58240])


Processing audio:  91%|█████████ | 1820/2000 [25:23<01:49,  1.64it/s]

torch.Size([1, 457280])


Processing audio:  91%|█████████ | 1821/2000 [25:25<03:02,  1.02s/it]

torch.Size([1, 734080])


Processing audio:  91%|█████████ | 1822/2000 [25:31<07:55,  2.67s/it]

torch.Size([1, 78080])


Processing audio:  91%|█████████ | 1823/2000 [25:32<06:06,  2.07s/it]

torch.Size([1, 146080])


Processing audio:  91%|█████████ | 1824/2000 [25:33<05:10,  1.76s/it]

torch.Size([1, 180480])


Processing audio:  91%|█████████▏| 1825/2000 [25:34<04:13,  1.45s/it]

torch.Size([1, 264480])


Processing audio:  91%|█████████▏| 1826/2000 [25:35<04:03,  1.40s/it]

torch.Size([1, 354720])


Processing audio:  91%|█████████▏| 1827/2000 [25:37<04:14,  1.47s/it]

torch.Size([1, 97280])


Processing audio:  91%|█████████▏| 1828/2000 [25:37<03:28,  1.21s/it]

torch.Size([1, 367680])


Processing audio:  91%|█████████▏| 1829/2000 [25:39<03:57,  1.39s/it]

torch.Size([1, 128480])


Processing audio:  92%|█████████▏| 1830/2000 [25:40<03:21,  1.18s/it]

torch.Size([1, 274880])


Processing audio:  92%|█████████▏| 1831/2000 [25:41<03:29,  1.24s/it]

torch.Size([1, 82240])


Processing audio:  92%|█████████▏| 1832/2000 [25:42<02:47,  1.00it/s]

torch.Size([1, 212960])


Processing audio:  92%|█████████▏| 1833/2000 [25:43<02:47,  1.00s/it]

torch.Size([1, 209120])


Processing audio:  92%|█████████▏| 1834/2000 [25:44<02:57,  1.07s/it]

torch.Size([1, 179200])


Processing audio:  92%|█████████▏| 1835/2000 [25:45<03:09,  1.15s/it]

torch.Size([1, 144160])


Processing audio:  92%|█████████▏| 1836/2000 [25:46<02:58,  1.09s/it]

torch.Size([1, 55680])


Processing audio:  92%|█████████▏| 1837/2000 [25:47<02:23,  1.13it/s]

torch.Size([1, 154560])


Processing audio:  92%|█████████▏| 1838/2000 [25:47<02:26,  1.11it/s]

torch.Size([1, 93600])


Processing audio:  92%|█████████▏| 1839/2000 [25:48<02:10,  1.23it/s]

torch.Size([1, 146880])


Processing audio:  92%|█████████▏| 1840/2000 [25:49<01:57,  1.36it/s]

torch.Size([1, 107520])


Processing audio:  92%|█████████▏| 1841/2000 [25:49<01:45,  1.51it/s]

torch.Size([1, 133920])


Processing audio:  92%|█████████▏| 1842/2000 [25:50<01:32,  1.72it/s]

torch.Size([1, 69440])


Processing audio:  92%|█████████▏| 1843/2000 [25:50<01:23,  1.89it/s]

torch.Size([1, 72000])


Processing audio:  92%|█████████▏| 1844/2000 [25:50<01:22,  1.88it/s]

torch.Size([1, 195680])


Processing audio:  92%|█████████▏| 1845/2000 [25:51<01:42,  1.52it/s]

torch.Size([1, 79840])


Processing audio:  92%|█████████▏| 1846/2000 [25:52<01:30,  1.71it/s]

torch.Size([1, 144480])


Processing audio:  92%|█████████▏| 1847/2000 [25:52<01:26,  1.76it/s]

torch.Size([1, 126240])


Processing audio:  92%|█████████▏| 1848/2000 [25:53<01:33,  1.63it/s]

torch.Size([1, 102080])


Processing audio:  92%|█████████▏| 1849/2000 [25:54<01:27,  1.73it/s]

torch.Size([1, 158080])


Processing audio:  92%|█████████▎| 1850/2000 [25:54<01:38,  1.52it/s]

torch.Size([1, 145920])


Processing audio:  93%|█████████▎| 1851/2000 [25:55<01:41,  1.47it/s]

torch.Size([1, 110080])


Processing audio:  93%|█████████▎| 1852/2000 [25:56<01:35,  1.55it/s]

torch.Size([1, 228160])


Processing audio:  93%|█████████▎| 1853/2000 [25:57<01:52,  1.30it/s]

torch.Size([1, 62880])


Processing audio:  93%|█████████▎| 1854/2000 [25:57<01:48,  1.34it/s]

torch.Size([1, 143360])


Processing audio:  93%|█████████▎| 1855/2000 [25:59<02:07,  1.14it/s]

torch.Size([1, 71520])


Processing audio:  93%|█████████▎| 1856/2000 [25:59<01:48,  1.33it/s]

torch.Size([1, 161120])


Processing audio:  93%|█████████▎| 1857/2000 [26:00<01:55,  1.24it/s]

torch.Size([1, 160480])


Processing audio:  93%|█████████▎| 1858/2000 [26:01<01:49,  1.29it/s]

torch.Size([1, 222880])


Processing audio:  93%|█████████▎| 1859/2000 [26:02<02:01,  1.16it/s]

torch.Size([1, 68000])


Processing audio:  93%|█████████▎| 1860/2000 [26:02<01:39,  1.41it/s]

torch.Size([1, 207200])


Processing audio:  93%|█████████▎| 1861/2000 [26:03<01:51,  1.25it/s]

torch.Size([1, 355520])


Processing audio:  93%|█████████▎| 1862/2000 [26:05<02:19,  1.01s/it]

torch.Size([1, 137440])


Processing audio:  93%|█████████▎| 1863/2000 [26:05<02:06,  1.09it/s]

torch.Size([1, 60160])


Processing audio:  93%|█████████▎| 1864/2000 [26:06<01:43,  1.31it/s]

torch.Size([1, 104480])


Processing audio:  93%|█████████▎| 1865/2000 [26:06<01:32,  1.46it/s]

torch.Size([1, 183840])


Processing audio:  93%|█████████▎| 1866/2000 [26:07<01:26,  1.55it/s]

torch.Size([1, 152480])


Processing audio:  93%|█████████▎| 1867/2000 [26:08<01:38,  1.34it/s]

torch.Size([1, 288960])


Processing audio:  93%|█████████▎| 1868/2000 [26:09<02:12,  1.01s/it]

torch.Size([1, 69280])


Processing audio:  93%|█████████▎| 1869/2000 [26:10<01:53,  1.16it/s]

torch.Size([1, 109280])


Processing audio:  94%|█████████▎| 1870/2000 [26:11<01:47,  1.20it/s]

torch.Size([1, 181600])


Processing audio:  94%|█████████▎| 1871/2000 [26:12<01:59,  1.08it/s]

torch.Size([1, 318560])


Processing audio:  94%|█████████▎| 1872/2000 [26:14<02:25,  1.14s/it]

torch.Size([1, 61120])


Processing audio:  94%|█████████▎| 1873/2000 [26:14<01:54,  1.11it/s]

torch.Size([1, 79040])


Processing audio:  94%|█████████▎| 1874/2000 [26:14<01:36,  1.30it/s]

torch.Size([1, 248480])


Processing audio:  94%|█████████▍| 1875/2000 [26:15<01:48,  1.15it/s]

torch.Size([1, 121920])


Processing audio:  94%|█████████▍| 1876/2000 [26:16<01:39,  1.24it/s]

torch.Size([1, 109280])


Processing audio:  94%|█████████▍| 1877/2000 [26:17<01:28,  1.40it/s]

torch.Size([1, 137760])


Processing audio:  94%|█████████▍| 1878/2000 [26:17<01:30,  1.35it/s]

torch.Size([1, 215040])


Processing audio:  94%|█████████▍| 1879/2000 [26:18<01:38,  1.23it/s]

torch.Size([1, 167680])


Processing audio:  94%|█████████▍| 1880/2000 [26:19<01:38,  1.22it/s]

torch.Size([1, 61280])


Processing audio:  94%|█████████▍| 1881/2000 [26:20<01:19,  1.49it/s]

torch.Size([1, 73280])


Processing audio:  94%|█████████▍| 1882/2000 [26:20<01:05,  1.81it/s]

torch.Size([1, 116640])


Processing audio:  94%|█████████▍| 1884/2000 [26:21<00:53,  2.16it/s]

torch.Size([1, 38880])
torch.Size([1, 119040])


Processing audio:  94%|█████████▍| 1885/2000 [26:21<00:55,  2.07it/s]

torch.Size([1, 147840])


Processing audio:  94%|█████████▍| 1886/2000 [26:22<01:05,  1.74it/s]

torch.Size([1, 92960])


Processing audio:  94%|█████████▍| 1887/2000 [26:23<01:06,  1.70it/s]

torch.Size([1, 72000])


Processing audio:  94%|█████████▍| 1888/2000 [26:23<00:59,  1.90it/s]

torch.Size([1, 212480])


Processing audio:  94%|█████████▍| 1889/2000 [26:25<01:32,  1.20it/s]

torch.Size([1, 982720])


Processing audio:  94%|█████████▍| 1890/2000 [26:32<05:20,  2.92s/it]

torch.Size([1, 566400])


Processing audio:  95%|█████████▍| 1891/2000 [26:38<07:01,  3.87s/it]

torch.Size([1, 61440])


Processing audio:  95%|█████████▍| 1892/2000 [26:39<05:02,  2.80s/it]

torch.Size([1, 40000])


Processing audio:  95%|█████████▍| 1893/2000 [26:39<03:37,  2.03s/it]

torch.Size([1, 199200])


Processing audio:  95%|█████████▍| 1894/2000 [26:40<03:02,  1.72s/it]

torch.Size([1, 108960])


Processing audio:  95%|█████████▍| 1895/2000 [26:41<02:27,  1.40s/it]

torch.Size([1, 439360])


Processing audio:  95%|█████████▍| 1896/2000 [26:43<02:47,  1.61s/it]

torch.Size([1, 185120])


Processing audio:  95%|█████████▍| 1897/2000 [26:43<02:20,  1.36s/it]

torch.Size([1, 147360])


Processing audio:  95%|█████████▍| 1898/2000 [26:44<02:00,  1.18s/it]

torch.Size([1, 76160])


Processing audio:  95%|█████████▍| 1899/2000 [26:45<01:40,  1.01it/s]

torch.Size([1, 287520])


Processing audio:  95%|█████████▌| 1900/2000 [26:46<01:45,  1.06s/it]

torch.Size([1, 212160])


Processing audio:  95%|█████████▌| 1901/2000 [26:47<01:47,  1.09s/it]

torch.Size([1, 259360])


Processing audio:  95%|█████████▌| 1902/2000 [26:48<01:43,  1.06s/it]

torch.Size([1, 100640])


Processing audio:  95%|█████████▌| 1903/2000 [26:49<01:29,  1.08it/s]

torch.Size([1, 86720])


Processing audio:  95%|█████████▌| 1904/2000 [26:50<01:24,  1.13it/s]

torch.Size([1, 119200])


Processing audio:  95%|█████████▌| 1905/2000 [26:50<01:22,  1.16it/s]

torch.Size([1, 143840])


Processing audio:  95%|█████████▌| 1906/2000 [26:51<01:29,  1.06it/s]

torch.Size([1, 191840])


Processing audio:  95%|█████████▌| 1907/2000 [26:52<01:27,  1.06it/s]

torch.Size([1, 171680])


Processing audio:  95%|█████████▌| 1908/2000 [26:53<01:26,  1.07it/s]

torch.Size([1, 39680])


Processing audio:  95%|█████████▌| 1909/2000 [26:54<01:07,  1.35it/s]

torch.Size([1, 41600])


Processing audio:  96%|█████████▌| 1910/2000 [26:54<00:52,  1.71it/s]

torch.Size([1, 131840])


Processing audio:  96%|█████████▌| 1911/2000 [26:54<00:51,  1.72it/s]

torch.Size([1, 150560])


Processing audio:  96%|█████████▌| 1912/2000 [26:55<00:53,  1.64it/s]

torch.Size([1, 119680])


Processing audio:  96%|█████████▌| 1913/2000 [26:56<00:53,  1.63it/s]

torch.Size([1, 307040])


Processing audio:  96%|█████████▌| 1914/2000 [26:57<01:14,  1.15it/s]

torch.Size([1, 335040])


Processing audio:  96%|█████████▌| 1915/2000 [26:59<01:25,  1.01s/it]

torch.Size([1, 317120])


Processing audio:  96%|█████████▌| 1916/2000 [27:00<01:35,  1.14s/it]

torch.Size([1, 74080])


Processing audio:  96%|█████████▌| 1917/2000 [27:00<01:13,  1.13it/s]

torch.Size([1, 167040])


Processing audio:  96%|█████████▌| 1918/2000 [27:01<00:58,  1.39it/s]

torch.Size([1, 105600])


Processing audio:  96%|█████████▌| 1919/2000 [27:01<00:54,  1.48it/s]

torch.Size([1, 300160])


Processing audio:  96%|█████████▌| 1920/2000 [27:03<01:21,  1.02s/it]

torch.Size([1, 80800])


Processing audio:  96%|█████████▌| 1921/2000 [27:03<01:08,  1.15it/s]

torch.Size([1, 168640])


Processing audio:  96%|█████████▌| 1922/2000 [27:05<01:12,  1.08it/s]

torch.Size([1, 137280])


Processing audio:  96%|█████████▌| 1923/2000 [27:05<01:07,  1.13it/s]

torch.Size([1, 254720])


Processing audio:  96%|█████████▌| 1924/2000 [27:07<01:14,  1.02it/s]

torch.Size([1, 154720])


Processing audio:  96%|█████████▋| 1925/2000 [27:11<02:36,  2.08s/it]

torch.Size([1, 242080])


Processing audio:  96%|█████████▋| 1926/2000 [27:12<02:12,  1.79s/it]

torch.Size([1, 41760])


Processing audio:  96%|█████████▋| 1927/2000 [27:13<01:36,  1.32s/it]

torch.Size([1, 75360])


Processing audio:  96%|█████████▋| 1928/2000 [27:13<01:15,  1.05s/it]

torch.Size([1, 186560])


Processing audio:  96%|█████████▋| 1929/2000 [27:14<01:12,  1.02s/it]

torch.Size([1, 112640])


Processing audio:  96%|█████████▋| 1930/2000 [27:15<01:06,  1.05it/s]

torch.Size([1, 106720])


Processing audio:  97%|█████████▋| 1931/2000 [27:16<01:04,  1.07it/s]

torch.Size([1, 144480])


Processing audio:  97%|█████████▋| 1932/2000 [27:17<01:05,  1.03it/s]

torch.Size([1, 46720])


Processing audio:  97%|█████████▋| 1933/2000 [27:17<00:55,  1.22it/s]

torch.Size([1, 217600])


Processing audio:  97%|█████████▋| 1934/2000 [27:18<01:02,  1.05it/s]

torch.Size([1, 71520])


Processing audio:  97%|█████████▋| 1935/2000 [27:19<00:48,  1.34it/s]

torch.Size([1, 67520])


Processing audio:  97%|█████████▋| 1936/2000 [27:19<00:41,  1.56it/s]

torch.Size([1, 103840])


Processing audio:  97%|█████████▋| 1937/2000 [27:20<00:40,  1.54it/s]

torch.Size([1, 287040])


Processing audio:  97%|█████████▋| 1938/2000 [27:21<00:51,  1.21it/s]

torch.Size([1, 292320])


Processing audio:  97%|█████████▋| 1939/2000 [27:22<00:57,  1.07it/s]

torch.Size([1, 171840])


Processing audio:  97%|█████████▋| 1940/2000 [27:23<00:49,  1.20it/s]

torch.Size([1, 513760])


Processing audio:  97%|█████████▋| 1941/2000 [27:25<01:15,  1.29s/it]

torch.Size([1, 145120])


Processing audio:  97%|█████████▋| 1942/2000 [27:26<01:06,  1.15s/it]

torch.Size([1, 62080])


Processing audio:  97%|█████████▋| 1943/2000 [27:26<00:53,  1.07it/s]

torch.Size([1, 80320])


Processing audio:  97%|█████████▋| 1944/2000 [27:27<00:44,  1.27it/s]

torch.Size([1, 44640])


Processing audio:  97%|█████████▋| 1945/2000 [27:27<00:35,  1.56it/s]

torch.Size([1, 768320])


Processing audio:  97%|█████████▋| 1946/2000 [27:31<01:34,  1.75s/it]

torch.Size([1, 78080])


Processing audio:  97%|█████████▋| 1947/2000 [27:32<01:11,  1.34s/it]

torch.Size([1, 115360])


Processing audio:  97%|█████████▋| 1948/2000 [27:32<00:54,  1.05s/it]

torch.Size([1, 252640])


Processing audio:  97%|█████████▋| 1949/2000 [27:33<00:55,  1.09s/it]

torch.Size([1, 242240])


Processing audio:  98%|█████████▊| 1950/2000 [27:35<00:55,  1.12s/it]

torch.Size([1, 122080])


Processing audio:  98%|█████████▊| 1951/2000 [27:35<00:47,  1.04it/s]

torch.Size([1, 77440])
torch.Size([1, 214240])


Processing audio:  98%|█████████▊| 1953/2000 [27:36<00:37,  1.26it/s]

torch.Size([1, 113600])


Processing audio:  98%|█████████▊| 1954/2000 [27:37<00:34,  1.32it/s]

torch.Size([1, 96160])


Processing audio:  98%|█████████▊| 1955/2000 [27:37<00:31,  1.44it/s]

torch.Size([1, 173440])


Processing audio:  98%|█████████▊| 1956/2000 [27:38<00:33,  1.33it/s]

torch.Size([1, 164960])


Processing audio:  98%|█████████▊| 1957/2000 [27:39<00:31,  1.38it/s]

torch.Size([1, 164800])


Processing audio:  98%|█████████▊| 1958/2000 [27:40<00:32,  1.28it/s]

torch.Size([1, 58560])


Processing audio:  98%|█████████▊| 1959/2000 [27:40<00:25,  1.62it/s]

torch.Size([1, 96000])


Processing audio:  98%|█████████▊| 1960/2000 [27:41<00:24,  1.63it/s]

torch.Size([1, 80160])


Processing audio:  98%|█████████▊| 1961/2000 [27:41<00:24,  1.57it/s]

torch.Size([1, 376480])


Processing audio:  98%|█████████▊| 1962/2000 [27:44<00:43,  1.15s/it]

torch.Size([1, 78080])


Processing audio:  98%|█████████▊| 1963/2000 [27:44<00:35,  1.03it/s]

torch.Size([1, 101600])


Processing audio:  98%|█████████▊| 1964/2000 [27:45<00:30,  1.19it/s]

torch.Size([1, 95040])


Processing audio:  98%|█████████▊| 1965/2000 [27:45<00:26,  1.34it/s]

torch.Size([1, 168480])


Processing audio:  98%|█████████▊| 1966/2000 [27:46<00:25,  1.35it/s]

torch.Size([1, 113920])


Processing audio:  98%|█████████▊| 1967/2000 [27:47<00:22,  1.47it/s]

torch.Size([1, 36480])


Processing audio:  98%|█████████▊| 1968/2000 [27:47<00:17,  1.84it/s]

torch.Size([1, 124480])


Processing audio:  98%|█████████▊| 1969/2000 [27:48<00:18,  1.64it/s]

torch.Size([1, 601440])


Processing audio:  98%|█████████▊| 1970/2000 [27:53<01:00,  2.03s/it]

torch.Size([1, 134720])


Processing audio:  99%|█████████▊| 1971/2000 [27:54<00:49,  1.70s/it]

torch.Size([1, 239040])


Processing audio:  99%|█████████▊| 1972/2000 [27:55<00:45,  1.63s/it]

torch.Size([1, 169120])


Processing audio:  99%|█████████▊| 1973/2000 [27:57<00:41,  1.52s/it]

torch.Size([1, 270720])


Processing audio:  99%|█████████▊| 1974/2000 [27:58<00:37,  1.44s/it]

torch.Size([1, 166240])


Processing audio:  99%|█████████▉| 1975/2000 [27:59<00:31,  1.26s/it]

torch.Size([1, 135040])


Processing audio:  99%|█████████▉| 1976/2000 [27:59<00:26,  1.08s/it]

torch.Size([1, 67040])


Processing audio:  99%|█████████▉| 1977/2000 [28:00<00:19,  1.15it/s]

torch.Size([1, 46080])


Processing audio:  99%|█████████▉| 1978/2000 [28:00<00:14,  1.47it/s]

torch.Size([1, 94880])


Processing audio:  99%|█████████▉| 1979/2000 [28:01<00:12,  1.65it/s]

torch.Size([1, 171360])


Processing audio:  99%|█████████▉| 1980/2000 [28:01<00:13,  1.51it/s]

torch.Size([1, 154240])


Processing audio:  99%|█████████▉| 1981/2000 [28:02<00:12,  1.53it/s]

torch.Size([1, 74080])


Processing audio:  99%|█████████▉| 1982/2000 [28:02<00:10,  1.73it/s]

torch.Size([1, 235840])


Processing audio:  99%|█████████▉| 1983/2000 [28:03<00:12,  1.37it/s]

torch.Size([1, 335360])


Processing audio:  99%|█████████▉| 1984/2000 [28:05<00:15,  1.06it/s]

torch.Size([1, 98240])


Processing audio:  99%|█████████▉| 1985/2000 [28:05<00:12,  1.24it/s]

torch.Size([1, 105600])


Processing audio:  99%|█████████▉| 1986/2000 [28:06<00:10,  1.34it/s]

torch.Size([1, 127200])


Processing audio:  99%|█████████▉| 1987/2000 [28:07<00:09,  1.35it/s]

torch.Size([1, 283840])


Processing audio:  99%|█████████▉| 1988/2000 [28:09<00:12,  1.07s/it]

torch.Size([1, 80800])


Processing audio:  99%|█████████▉| 1989/2000 [28:09<00:10,  1.06it/s]

torch.Size([1, 91680])


Processing audio: 100%|█████████▉| 1990/2000 [28:10<00:08,  1.15it/s]

torch.Size([1, 62080])


Processing audio: 100%|█████████▉| 1991/2000 [28:10<00:06,  1.37it/s]

torch.Size([1, 155040])


Processing audio: 100%|█████████▉| 1992/2000 [28:11<00:05,  1.34it/s]

torch.Size([1, 165120])


Processing audio: 100%|█████████▉| 1993/2000 [28:12<00:05,  1.26it/s]

torch.Size([1, 228320])


Processing audio: 100%|█████████▉| 1994/2000 [28:13<00:05,  1.13it/s]

torch.Size([1, 147200])


Processing audio: 100%|█████████▉| 1995/2000 [28:14<00:04,  1.19it/s]

torch.Size([1, 133440])


Processing audio: 100%|█████████▉| 1996/2000 [28:15<00:03,  1.26it/s]

torch.Size([1, 70080])


Processing audio: 100%|█████████▉| 1997/2000 [28:15<00:01,  1.52it/s]

torch.Size([1, 211040])


Processing audio: 100%|█████████▉| 1998/2000 [28:16<00:01,  1.25it/s]

torch.Size([1, 196640])


Processing audio: 100%|█████████▉| 1999/2000 [28:17<00:00,  1.19it/s]

torch.Size([1, 162400])


Processing audio: 100%|██████████| 2000/2000 [28:18<00:00,  1.18it/s]


WER on custom dataset = 7.3%


WER on custom dataset = 8.9%


Collecting jiwer
  Downloading jiwer-3.1.0-py3-none-any.whl.metadata (2.6 kB)
Collecting rapidfuzz>=3.9.7 (from jiwer)
  Downloading rapidfuzz-3.12.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)
Downloading jiwer-3.1.0-py3-none-any.whl (22 kB)
Downloading rapidfuzz-3.12.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (3.1 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m24.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: rapidfuzz, jiwer
Successfully installed jiwer-3.1.0 rapidfuzz-3.12.2
