In [None]:
"""
You can run either this notebook locally (if you have all the dependencies and a GPU) or on Google Colab.

Instructions for setting up Colab are as follows:
1. Open a new Python 3 notebook.
2. Import this notebook from GitHub (File -> Upload Notebook -> "GITHUB" tab -> copy/paste GitHub URL)
3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select "GPU" for hardware accelerator)
4. Run this cell to set up dependencies.
5. Restart the runtime (Runtime -> Restart Runtime) for any upgraded packages to take effect
"""
# If you're using Google Colab and not running locally, run this cell.

## Install dependencies
!pip install wget
!apt-get install sox libsndfile1 ffmpeg
!pip install unidecode
!pip install matplotlib>=3.3.2

## Install NeMo
BRANCH = 'r1.7.2'
!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]

Reading package lists... Done
Building dependency tree       
Reading state information... Done
libsndfile1 is already the newest version (1.0.28-4ubuntu0.18.04.2).
ffmpeg is already the newest version (7:3.4.8-0ubuntu0.2).
sox is already the newest version (14.4.2-3ubuntu0.18.04.1).
0 upgraded, 0 newly installed, 0 to remove and 39 not upgraded.
Collecting nemo_toolkit[all]
  Cloning https://github.com/NVIDIA/NeMo.git (to revision r1.7.2) to /tmp/pip-install-4fychxl8/nemo-toolkit_e6a2cac1f8d4475d8ad1fcfc8c7b7cfe
  Running command git clone -q https://github.com/NVIDIA/NeMo.git /tmp/pip-install-4fychxl8/nemo-toolkit_e6a2cac1f8d4475d8ad1fcfc8c7b7cfe
  Running command git checkout -b r1.7.2 --track origin/r1.7.2
  Switched to a new branch 'r1.7.2'
  Branch 'r1.7.2' set up to track remote branch 'r1.7.2' from 'origin'.
Collecting matplotlib>=3.3.2
  Using cached matplotlib-3.5.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl (11.2 MB)
Collecting nltk>=3.6.5
  Using cached nltk-3.7-

# Preparação do Dataset

In [None]:
!pip install -U gdown
import gdown

#https://drive.google.com/file/d/132ylX-eH1qsyuNuHPwnF6jRCDsXQeEck/view?usp=sharing
!gdown 132ylX-eH1qsyuNuHPwnF6jRCDsXQeEck

Downloading...
From: https://drive.google.com/uc?id=132ylX-eH1qsyuNuHPwnF6jRCDsXQeEck
To: /content/CORAA_DATASET_SAMPLE.tar.bz
100% 166M/166M [00:02<00:00, 66.6MB/s]


In [None]:
import tarfile
my_tar = tarfile.open('CORAA_DATASET_SAMPLE.tar.bz')
my_tar.extractall('./') # specify which folder to extract to
my_tar.close()

In [None]:
# --- Building Manifest Files --- #
import json
import os
import librosa

data_dir = './'

# Function to build a manifest
def build_manifest(transcripts_path, manifest_path):
    with open(transcripts_path, 'r') as fin:
        with open(manifest_path, 'w') as fout:
            for line_number, line in enumerate(fin):
                if line_number == 0:
                    continue
                audio_path, transcript = line.split(',')
                audio_path = audio_path.replace('/raid/fred/Wav2Vec-Wrapper/datasets/CORAA_DATASET/', '/content/CORAA_DATASET_SAMPLE/sample_wavs/CORAA_DATASET/')
                duration = librosa.core.get_duration(filename=audio_path)
                if duration < 1.0:
                  continue
                # Write the metadata to the manifest
                metadata = {
                    "audio_filepath": audio_path,
                    "duration": duration,
                    "text": transcript
                }
                #json.dump(metadata, fout, indent=4, ensure_ascii=False)
                json.dump(metadata, fout)
                fout.write('\n')
                
# Building Manifests
print("******")
train_transcripts = data_dir + './CORAA_DATASET_SAMPLE/sample_train.csv'
train_manifest = data_dir + './CORAA_DATASET_SAMPLE/train_manifest.csv'
#if not os.path.isfile(train_manifest):
build_manifest(train_transcripts, train_manifest)
print("Training manifest created.")

test_transcripts = data_dir + './CORAA_DATASET_SAMPLE/sample_test.csv'
test_manifest = data_dir + './CORAA_DATASET_SAMPLE/test_manifest.csv'
#if not os.path.isfile(test_manifest):
build_manifest(test_transcripts, test_manifest)
print("Test manifest created.")
print("***Done***")

******
Training manifest created.
Test manifest created.
***Done***


# Treinamento do Zero

In [None]:
## Install NeMo
BRANCH = 'r1.7.2'
## Grab the config we'll use in this example
!mkdir configs
!wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/$BRANCH/examples/asr/conf/config.yaml

--2022-04-07 17:18:11--  https://raw.githubusercontent.com/NVIDIA/NeMo/r1.7.2/examples/asr/conf/config.yaml
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4143 (4.0K) [text/plain]
Saving to: ‘configs/config.yaml’


2022-04-07 17:18:11 (60.2 MB/s) - ‘configs/config.yaml’ saved [4143/4143]



In [None]:
# --- Config Information ---#
try:
    from ruamel.yaml import YAML
except ModuleNotFoundError:
    from ruamel_yaml import YAML
config_path = './configs/config.yaml'

yaml = YAML(typ='safe')
with open(config_path) as f:
    params = yaml.load(f)
print(params)

{'name': 'QuartzNet15x5', 'sample_rate': 16000, 'repeat': 1, 'dropout': 0.0, 'separable': True, 'labels': [' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', "'"], 'model': {'train_ds': {'manifest_filepath': '???', 'sample_rate': 16000, 'labels': [' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', "'"], 'batch_size': 32, 'trim_silence': True, 'max_duration': 16.7, 'shuffle': True, 'is_tarred': False, 'tarred_audio_filepaths': None, 'shuffle_n': 2048, 'bucketing_strategy': 'synced_randomized', 'bucketing_batch_size': None}, 'validation_ds': {'manifest_filepath': '???', 'sample_rate': 16000, 'labels': [' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', "'"], 'batch_size': 32, 'shuffle': False}, 'preprocessor': {'_target_': 'nemo.collections

In [None]:
from omegaconf import DictConfig
params['model']['train_ds']['manifest_filepath'] = train_manifest
params['model']['validation_ds']['manifest_filepath'] = test_manifest

In [None]:
# NeMo's "core" package
import nemo
# NeMo's ASR collection - this collections contains complete ASR models and
# building blocks (modules) for ASR
import nemo.collections.asr as nemo_asr

[NeMo W 2022-04-07 17:18:30 optimizers:55] Apex was not found. Using the lamb or fused_adam optimizer will error out.
################################################################################
###          (please add 'export KALDI_ROOT=<your_path>' in your $HOME/.profile)
###          (or run as: KALDI_ROOT=<your_path> python <your_script>.py)
################################################################################



In [None]:
import pytorch_lightning as pl
trainer = pl.Trainer(gpus=1, max_epochs=50)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [None]:
first_asr_model = nemo_asr.models.EncDecCTCModel(cfg=DictConfig(params['model']), trainer=trainer)

[NeMo I 2022-04-07 17:18:31 audio_to_text_dataset:42] Model level config does not container `sample_rate`, please explicitly provide `sample_rate` to the dataloaders.
[NeMo I 2022-04-07 17:18:31 audio_to_text_dataset:42] Model level config does not container `labels`, please explicitly provide `labels` to the dataloaders.
[NeMo I 2022-04-07 17:18:31 collections:173] Dataset loaded with 261 files totalling 0.57 hours
[NeMo I 2022-04-07 17:18:31 collections:174] 23 files were filtered totalling 0.13 hours
[NeMo I 2022-04-07 17:18:31 audio_to_text_dataset:42] Model level config does not container `sample_rate`, please explicitly provide `sample_rate` to the dataloaders.
[NeMo I 2022-04-07 17:18:31 audio_to_text_dataset:42] Model level config does not container `labels`, please explicitly provide `labels` to the dataloaders.
[NeMo I 2022-04-07 17:18:31 collections:173] Dataset loaded with 20 files totalling 0.04 hours
[NeMo I 2022-04-07 17:18:31 collections:174] 0 files were filtered total

In [None]:
# Check what kind of vocabulary/alphabet the model has right now
print(first_asr_model.decoder.vocabulary)

NameError: ignored

In [None]:
first_asr_model.change_vocabulary(
    new_vocabulary=[
        ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
        'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', "á", "à", "ã", "é", "ê", "í", "ó", "ô", "ú", "ç"
    ]
)

[NeMo I 2022-04-07 13:42:00 ctc_models:365] Changed decoder to output to [' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'á', 'à', 'ã', 'é', 'ê', 'í', 'ó', 'ô', 'ú', 'ç'] vocabulary.


In [None]:
trainer.fit(first_asr_model)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[NeMo W 2022-04-07 13:42:05 modelPT:497] The lightning trainer received accelerator: <pytorch_lightning.accelerators.gpu.GPUAccelerator object at 0x7f37c8916e90>. We recommend to use 'ddp' instead.


[NeMo I 2022-04-07 13:42:05 modelPT:587] Optimizer config = Novograd (
    Parameter Group 0
        amsgrad: False
        betas: [0.8, 0.5]
        eps: 1e-08
        grad_averaging: False
        initial_lr: 0.01
        lr: 0.009956671383094069
        weight_decay: 0.001
    )
[NeMo I 2022-04-07 13:42:05 lr_scheduler:837] Scheduler "<nemo.core.optim.lr_scheduler.CosineAnnealing object at 0x7f37c0046850>" 
    will be used during training (effective maximum steps = 1550) - 
    Parameters : 
    (warmup_steps: null
    warmup_ratio: null
    min_lr: 0.0
    last_epoch: -1
    max_steps: 1550
    )



  | Name              | Type                              | Params
------------------------------------------------------------------------
0 | preprocessor      | AudioToMelSpectrogramPreprocessor | 0     
1 | encoder           | ConvASREncoder                    | 1.2 M 
2 | spec_augmentation | SpectrogramAugmentation           | 0     
3 | _wer              | WER                               | 0     
4 | decoder           | ConvASRDecoder                    | 39.0 K
5 | loss              | CTCLoss                           | 0     
------------------------------------------------------------------------
1.2 M     Trainable params
0         Non-trainable params
1.2 M     Total params
4.873     Total estimated model params size (MB)
      rank_zero_warn(f"Checkpoint directory {dirpath} exists and is not empty.")
    


Validation sanity check: 0it [00:00, ?it/s]

      f"The number of training samples ({self.num_training_batches}) is smaller than the logging interval"
    


Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

      rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")
    


# Transfer Learning

In [None]:
## Install NeMo
BRANCH = 'r1.7.2'
## Grab the config we'll use in this example
!mkdir configs
!wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/$BRANCH/examples/asr/conf/config.yaml

mkdir: cannot create directory ‘configs’: File exists
--2022-04-07 17:23:33--  https://raw.githubusercontent.com/NVIDIA/NeMo/r1.7.2/examples/asr/conf/config.yaml
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.110.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 4143 (4.0K) [text/plain]
Saving to: ‘configs/config.yaml.1’


2022-04-07 17:23:34 (52.9 MB/s) - ‘configs/config.yaml.1’ saved [4143/4143]



In [None]:
# --- Config Information ---#
try:
    from ruamel.yaml import YAML
except ModuleNotFoundError:
    from ruamel_yaml import YAML
config_path = './configs/config.yaml'

yaml = YAML(typ='safe')
with open(config_path) as f:
    params = yaml.load(f)
print(params)

{'name': 'QuartzNet15x5', 'sample_rate': 16000, 'repeat': 1, 'dropout': 0.0, 'separable': True, 'labels': [' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', "'"], 'model': {'train_ds': {'manifest_filepath': '???', 'sample_rate': 16000, 'labels': [' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', "'"], 'batch_size': 8, 'trim_silence': True, 'max_duration': 16.7, 'shuffle': True, 'is_tarred': False, 'tarred_audio_filepaths': None, 'shuffle_n': 2048, 'bucketing_strategy': 'synced_randomized', 'bucketing_batch_size': None}, 'validation_ds': {'manifest_filepath': '???', 'sample_rate': 16000, 'labels': [' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', "'"], 'batch_size': 2, 'shuffle': False}, 'preprocessor': {'_target_': 'nemo.collections.a

In [None]:
from omegaconf import DictConfig
params['model']['train_ds']['manifest_filepath'] = train_manifest
params['model']['validation_ds']['manifest_filepath'] = test_manifest

In [None]:
# NeMo's "core" package
import nemo
# NeMo's ASR collection - this collections contains complete ASR models and
# building blocks (modules) for ASR
import nemo.collections.asr as nemo_asr

[NeMo W 2022-04-07 17:31:31 optimizers:55] Apex was not found. Using the lamb or fused_adam optimizer will error out.
################################################################################
###          (please add 'export KALDI_ROOT=<your_path>' in your $HOME/.profile)
###          (or run as: KALDI_ROOT=<your_path> python <your_script>.py)
################################################################################



In [None]:
# This line will download pre-trained QuartzNet15x5 model from NVIDIA's NGC cloud and instantiate it for you
quartznet = nemo_asr.models.EncDecCTCModel.from_pretrained(model_name="QuartzNet15x5Base-En")

[NeMo I 2022-04-07 17:31:31 cloud:56] Found existing object /root/.cache/torch/NeMo/NeMo_1.7.2/QuartzNet15x5Base-En/2b066be39e9294d7100fb176ec817722/QuartzNet15x5Base-En.nemo.
[NeMo I 2022-04-07 17:31:31 cloud:62] Re-using file from: /root/.cache/torch/NeMo/NeMo_1.7.2/QuartzNet15x5Base-En/2b066be39e9294d7100fb176ec817722/QuartzNet15x5Base-En.nemo
[NeMo I 2022-04-07 17:31:31 common:704] Instantiating model from pre-trained checkpoint
[NeMo I 2022-04-07 17:31:32 features:255] PADDING: 16
[NeMo I 2022-04-07 17:31:32 features:272] STFT using torch
[NeMo I 2022-04-07 17:31:38 save_restore_connector:157] Model EncDecCTCModel was successfully restored from /root/.cache/torch/NeMo/NeMo_1.7.2/QuartzNet15x5Base-En/2b066be39e9294d7100fb176ec817722/QuartzNet15x5Base-En.nemo.


In [None]:
# Check what kind of vocabulary/alphabet the model has right now
print(quartznet.decoder.vocabulary)


[' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', "'"]


In [None]:
# Let's add "!" symbol there. Note that you can (and should!) change the vocabulary
# entirely when fine-tuning using a different language.
quartznet.change_vocabulary(
    new_vocabulary=[
        ' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n',
        'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', "á", "à", "ã", "é", "ê", "í", "ó", "ô", "ú", "ç"
    ]
)

[NeMo I 2022-04-07 17:31:38 ctc_models:365] Changed decoder to output to [' ', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 'á', 'à', 'ã', 'é', 'ê', 'í', 'ó', 'ô', 'ú', 'ç'] vocabulary.


In [None]:
import copy
new_opt = copy.deepcopy(params['model']['optim'])
new_opt['lr'] = 0.001

# And then you can invoke trainer.fit(first_asr_model)

In [None]:
quartznet.setup_optimization(optim_config=DictConfig(new_opt))

[NeMo W 2022-04-07 17:31:38 modelPT:475] Trainer wasn't specified in model constructor. Make sure that you really wanted it.


[NeMo I 2022-04-07 17:31:38 modelPT:587] Optimizer config = Novograd (
    Parameter Group 0
        amsgrad: False
        betas: [0.8, 0.5]
        eps: 1e-08
        grad_averaging: False
        lr: 0.001
        weight_decay: 0.001
    )


[NeMo W 2022-04-07 17:31:38 lr_scheduler:817] Neither `max_steps` nor `iters_per_batch` were provided to `optim.sched`, cannot compute effective `max_steps` !
    Scheduler will not be instantiated !


(Novograd (
 Parameter Group 0
     amsgrad: False
     betas: [0.8, 0.5]
     eps: 1e-08
     grad_averaging: False
     lr: 0.001
     weight_decay: 0.001
 ), None)

In [None]:
quartznet.summarize()

      """Entry point for launching an IPython kernel.
    
      "Argument `mode` in `LightningModule.summarize` is deprecated in v1.4"
    


  | Name              | Type                              | Params
------------------------------------------------------------------------
0 | preprocessor      | AudioToMelSpectrogramPreprocessor | 0     
1 | encoder           | ConvASREncoder                    | 18.9 M
2 | spec_augmentation | SpectrogramAugmentation           | 0     
3 | _wer              | WER                               | 0     
4 | decoder           | ConvASRDecoder                    | 39.0 K
5 | loss              | CTCLoss                           | 0     
------------------------------------------------------------------------
18.9 M    Trainable params
0         Non-trainable params
18.9 M    Total params
75.734    Total estimated model params size (MB)

In [None]:
  # Use the smaller learning rate we set before
  #quartznet.setup_optimization(optim_config=DictConfig(new_opt))

  # Point to the data we'll use for fine-tuning as the training set
  quartznet.setup_training_data(train_data_config=params['model']['train_ds'])

  # Point to the new validation data for fine-tuning
  quartznet.setup_validation_data(val_data_config=params['model']['validation_ds'])

# And now we can create a PyTorch Lightning trainer and call `fit` again.
#trainer = pl.Trainer(gpus=[0], max_epochs=2)
#trainer.fit(quartznet)

[NeMo I 2022-04-07 17:31:38 audio_to_text_dataset:42] Model level config does not container `sample_rate`, please explicitly provide `sample_rate` to the dataloaders.
[NeMo I 2022-04-07 17:31:38 audio_to_text_dataset:42] Model level config does not container `labels`, please explicitly provide `labels` to the dataloaders.
[NeMo I 2022-04-07 17:31:38 collections:173] Dataset loaded with 893 files totalling 1.05 hours
[NeMo I 2022-04-07 17:31:38 collections:174] 23 files were filtered totalling 0.13 hours
[NeMo I 2022-04-07 17:31:38 audio_to_text_dataset:42] Model level config does not container `sample_rate`, please explicitly provide `sample_rate` to the dataloaders.
[NeMo I 2022-04-07 17:31:38 audio_to_text_dataset:42] Model level config does not container `labels`, please explicitly provide `labels` to the dataloaders.
[NeMo I 2022-04-07 17:31:38 collections:173] Dataset loaded with 84 files totalling 0.08 hours
[NeMo I 2022-04-07 17:31:38 collections:174] 0 files were filtered total

In [None]:
print(quartznet._cfg['spec_augment'])

{'_target_': 'nemo.collections.asr.modules.SpectrogramAugmentation', 'rect_freq': 50, 'rect_masks': 5, 'rect_time': 120}


In [None]:
import pytorch_lightning as pl
trainer = pl.Trainer(gpus=1, max_epochs=50)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs


In [None]:
trainer.fit(quartznet)

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[NeMo W 2022-04-07 17:31:38 modelPT:475] Trainer wasn't specified in model constructor. Make sure that you really wanted it.


[NeMo I 2022-04-07 17:31:38 modelPT:587] Optimizer config = Novograd (
    Parameter Group 0
        amsgrad: False
        betas: [0.8, 0.5]
        eps: 1e-08
        grad_averaging: False
        lr: 0.001
        weight_decay: 0.001
    )


[NeMo W 2022-04-07 17:31:38 lr_scheduler:817] Neither `max_steps` nor `iters_per_batch` were provided to `optim.sched`, cannot compute effective `max_steps` !
    Scheduler will not be instantiated !

  | Name              | Type                              | Params
------------------------------------------------------------------------
0 | preprocessor      | AudioToMelSpectrogramPreprocessor | 0     
1 | encoder           | ConvASREncoder                    | 18.9 M
2 | spec_augmentation | SpectrogramAugmentation           | 0     
3 | _wer              | WER                               | 0     
4 | decoder           | ConvASRDecoder                    | 39.0 K
5 | loss              | CTCLoss                           | 0     
------------------------------------------------------------------------
18.9 M    Trainable params
0         Non-trainable params
18.9 M    Total params
75.734    Total estimated model params size (MB)


Validation sanity check: 0it [00:00, ?it/s]

Training: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

Validating: 0it [00:00, ?it/s]

# Inference

In [None]:

filelist = {}

with open('/content/CORAA_DATASET_SAMPLE/sample_test.csv', 'r') as csv_file:
  for line_number, line in enumerate(csv_file):
    if line_number == 0:
      continue
    filepath, transcript = line.split(',')
    filepath = filepath.replace('/raid/fred/Wav2Vec-Wrapper/datasets/CORAA_DATASET/', '/content/CORAA_DATASET_SAMPLE/sample_wavs/CORAA_DATASET/')
    filelist[filepath] = transcript


In [None]:
for i, k in enumerate(filelist):
  print(quartznet.transcribe(paths2audio_files=[k])[0])
  print(filelist[k])
  if i > 10:
    break

Transcribing:   0%|          | 0/1 [00:00<?, ?it/s]

seu asc que  ezs qu fereecen escoros
eu acho que existe diferenças entre esportes



Transcribing:   0%|          | 0/1 [00:00<?, ?it/s]

de contses que vesaanos
depois que resolvemos



Transcribing:   0%|          | 0/1 [00:00<?, ?it/s]

esala tu que etuque  li berdas nenin e de s a falava qui o chamadado m
aí eu falei mas porque você quer ir no banheiro das meninas e aí ele só falava que eu que tinha mandado né



Transcribing:   0%|          | 0/1 [00:00<?, ?it/s]

s comporta e ta forrma defrente dasmes a miga e no tinhesa reao ma no consegui aver
se comportar de outra forma diferente das minhas amigas eu não tinha essa relação eu não conseguia ver



Transcribing:   0%|          | 0/1 [00:00<?, ?it/s]

con que e
qual que cês querem



Transcribing:   0%|          | 0/1 [00:00<?, ?it/s]

na ca queto deguosita   na vosciau pude cea oravano nomaci da de mascranto edasade
não é que eu questão de gostar o ou não gostar se eu pudesse eu morava nu numa cidade mais tranquila sabe



Transcribing:   0%|          | 0/1 [00:00<?, ?it/s]

e monte  acompanebi a manno dorstava qaandese te daro
e não tinha companhia e minha mãe num gostava que eu andasse pela rua



Transcribing:   0%|          | 0/1 [00:00<?, ?it/s]

lequando agora e
olha quando eu agora eu



Transcribing:   0%|          | 0/1 [00:00<?, ?it/s]

 a mqui dem diaias
e as máquinas ainda há



Transcribing:   0%|          | 0/1 [00:00<?, ?it/s]

a margo
amargo



Transcribing:   0%|          | 0/1 [00:00<?, ?it/s]

a gente qu j de baa se da sociao no dosesila
a identificação de ação social e norma social



Transcribing:   0%|          | 0/1 [00:00<?, ?it/s]

 mais fases permo no
mas fala tipo

