In [1]:
!pip install torch torchaudio transformers datasets vector-quantize-pytorch torchcodec jiwer wandb

Collecting vector-quantize-pytorch
  Downloading vector_quantize_pytorch-1.23.2-py3-none-any.whl.metadata (30 kB)
Collecting torchcodec
  Downloading torchcodec-0.7.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (9.4 kB)
Collecting jiwer
  Downloading jiwer-4.0.0-py3-none-any.whl.metadata (3.3 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvi

In [2]:
!wandb login bcf87665e6100865897ebf8c1c0bdcc8a9b72283

[34m[1mwandb[0m: No netrc file found, creating one.
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc
[34m[1mwandb[0m: W&B API key is configured. Use [1m`wandb login --relogin`[0m to force relogin


In [3]:
import torch
import torch.nn as nn
from transformers import Wav2Vec2Processor, Wav2Vec2Model, Wav2Vec2ForCTC
from datasets import load_from_disk, load_dataset
from tqdm import tqdm
from jiwer import wer
from torch.utils.data import DataLoader
import wandb
from torch.optim.lr_scheduler import LambdaLR
from torch.nn.utils.rnn import pad_sequence
import random
import numpy as np

2025-09-29 05:42:46.344085: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1759124566.526527      19 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1759124566.581677      19 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [4]:
#1️⃣ Set a fixed seed number
SEED = 42

# 2️⃣ Python random seed
random.seed(SEED)

# 3️⃣ NumPy random seed
np.random.seed(SEED)

# 4️⃣ PyTorch random seed
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.cuda.manual_seed_all(SEED)  # if using multi-GPU

# 5️⃣ For cudnn backend reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [5]:
# from google.colab import drive
# drive.mount("/content/drive")

In [6]:
wandb.init(
    project = "Wav2Vec2 finetuning on low resourse language",
    name = "Without RVQ - Exp001 - basic",    
)

[34m[1mwandb[0m: Currently logged in as: [33mrasarathathsarana63[0m ([33mrasarathathsarana63-university-of-moratuwa[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
[34m[1mwandb[0m: Tracking run with wandb version 0.20.1
[34m[1mwandb[0m: Run data is saved locally in [35m[1m/kaggle/working/wandb/run-20250929_054303-n7vp2udy[0m
[34m[1mwandb[0m: Run [1m`wandb offline`[0m to turn off syncing.
[34m[1mwandb[0m: Syncing run [33mWithout RVQ - Exp001 - basic[0m
[34m[1mwandb[0m: ⭐️ View project at [34m[4mhttps://wandb.ai/rasarathathsarana63-university-of-moratuwa/Wav2Vec2%20finetuning%20on%20low%20resourse%20language[0m
[34m[1mwandb[0m: 🚀 View run at [34m[4mhttps://wandb.ai/rasarathathsarana63-university-of-moratuwa/Wav2Vec2%20finetuning%20on%20low%20resourse%20language/runs/n7vp2udy[0m


In [7]:
MODEL_NAME = "facebook/wav2vec2-base"
LEARNING_RATE = 1e-4
WARMUP_RATIO = 0.2
HOLD_RATIO = 0.2
DECAY_RATIO = 0.6
BATCH_SIZE = 8
EPOCHS = 30
GRADIENT_ACCUMULATION_STEPS = 8
LOGGING_STEPS = 10

In [8]:
def tri_stage_lr_lambda(current_step: int, 
                        total_steps: int,
                        warmup_ratio: float = 0.1, 
                        hold_ratio: float = 0.1, 
                        decay_ratio: float = 0.8):
    """
    Tri-stage learning rate schedule using ratios instead of raw steps.
    
    Args:
        current_step (int): Current step.
        total_steps (int): Total number of training steps.
        warmup_ratio (float): Fraction of steps for warmup.
        hold_ratio (float): Fraction of steps for hold.
        decay_ratio (float): Fraction of steps for decay.
    """
    # Convert ratios -> absolute steps
    warmup_steps = int(total_steps * warmup_ratio)
    hold_steps = int(total_steps * hold_ratio)
    decay_steps = int(total_steps * decay_ratio)

    if current_step < warmup_steps:
        # Linear warmup
        return float(current_step) / float(max(1, warmup_steps))
    elif current_step < warmup_steps + hold_steps:
        # Hold base LR
        return 1.0
    elif current_step < warmup_steps + hold_steps + decay_steps:
        # Linear decay
        decay_progress = (current_step - warmup_steps - hold_steps) / float(decay_steps)
        return max(0.0, 1.0 - decay_progress)
    else:
        # After decay, keep LR at 0
        return 0.0


In [9]:
# ------------------------
# 1️⃣ Load pre-trained Wav2Vec2
# ------------------------
processor = Wav2Vec2Processor.from_pretrained(MODEL_NAME)
wav2vec_model = Wav2Vec2Model.from_pretrained(MODEL_NAME)

# Freeze original model to save compute
for param in wav2vec_model.parameters():
    param.requires_grad = False


preprocessor_config.json:   0%|          | 0.00/159 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/163 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]



vocab.json:   0%|          | 0.00/291 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/85.0 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/380M [00:00<?, ?B/s]

In [10]:
# ------------------------
# 3️⃣ Define downstream CTC model
# ------------------------
class Wav2Vec2WithRVQ(nn.Module):
    def __init__(self, wav2vec_model, vocab_size):
        super().__init__()
        self.wav2vec = wav2vec_model
        self.classifier = nn.Linear(wav2vec_model.config.hidden_size, vocab_size)

    def forward(self, input_values):
        outputs = self.wav2vec(input_values).last_hidden_state  # [B, T, H]
        logits = self.classifier(outputs)                      # [B, T, vocab]
        return logits

# Vocabulary (use processor vocab size)
vocab_size = len(processor.tokenizer)
model = Wav2Vec2WithRVQ(wav2vec_model, vocab_size)


In [11]:
# ------------------------
# 4️⃣ Prepare dataset
# ------------------------
train_dataset = load_from_disk("/kaggle/input/librispeech-10h/dataset_10h")
valid_dataset = load_from_disk("/kaggle/input/librispeech-val-clean/dataset_val_clean")

def preprocess(batch):
    audio = batch["audio"]["array"]
    batch["input_values"] = processor(audio, sampling_rate=16000, return_tensors="pt").input_values[0]
    batch["labels"] = processor.tokenizer(batch["text"]).input_ids
    return batch

train_dataset = train_dataset.map(preprocess, load_from_cache_file=False, cache_file_name="/kaggle/working/temp.arrow")
valid_dataset = valid_dataset.map(preprocess, load_from_cache_file=False, cache_file_name="/kaggle/working/temp.arrow")

Map:   0%|          | 0/2850 [00:00<?, ? examples/s]

model.safetensors:   0%|          | 0.00/380M [00:00<?, ?B/s]

Map:   0%|          | 0/2703 [00:00<?, ? examples/s]

In [12]:
train_dataset.set_format(type="torch", columns=["input_values", "labels"])
valid_dataset.set_format(type="torch", columns=["input_values", "labels"])

In [13]:
def collate_fn(batch):
    # Pad input_values
    input_values = [item["input_values"] for item in batch]
    input_values = pad_sequence(input_values, batch_first=True)

    # Pad labels
    labels = [torch.tensor(item["labels"]) for item in batch]
    labels = pad_sequence(labels, batch_first=True, padding_value=-100)  # -100 ignored in loss

    return {"input_values": input_values, "labels": labels}

In [14]:
train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    collate_fn=collate_fn
)

valid_loader = DataLoader(
    valid_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    collate_fn=collate_fn
)

In [15]:
optimizer = torch.optim.AdamW(model.parameters(), lr=LEARNING_RATE)

# Scheduler
scheduler = LambdaLR(optimizer, lr_lambda=lambda step: tri_stage_lr_lambda(
    step,
    total_steps=EPOCHS * len(train_loader),
    warmup_ratio=WARMUP_RATIO,
    hold_ratio=HOLD_RATIO,
    decay_ratio=DECAY_RATIO
))

In [16]:
# ------------------------
# 5️⃣ Training loop (simplified)
# ------------------------
ctc_loss = nn.CTCLoss(blank=processor.tokenizer.pad_token_id, zero_infinity=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

Wav2Vec2WithRVQ(
  (wav2vec): Wav2Vec2Model(
    (feature_extractor): Wav2Vec2FeatureEncoder(
      (conv_layers): ModuleList(
        (0): Wav2Vec2GroupNormConvLayer(
          (conv): Conv1d(1, 512, kernel_size=(10,), stride=(5,), bias=False)
          (activation): GELUActivation()
          (layer_norm): GroupNorm(512, 512, eps=1e-05, affine=True)
        )
        (1-4): 4 x Wav2Vec2NoLayerNormConvLayer(
          (conv): Conv1d(512, 512, kernel_size=(3,), stride=(2,), bias=False)
          (activation): GELUActivation()
        )
        (5-6): 2 x Wav2Vec2NoLayerNormConvLayer(
          (conv): Conv1d(512, 512, kernel_size=(2,), stride=(2,), bias=False)
          (activation): GELUActivation()
        )
      )
    )
    (feature_projection): Wav2Vec2FeatureProjection(
      (layer_norm): LayerNorm((512,), eps=1e-05, elementwise_affine=True)
      (projection): Linear(in_features=512, out_features=768, bias=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder)

In [17]:
model.wav2vec.gradient_checkpointing_enable()

In [18]:
accumulation_steps = GRADIENT_ACCUMULATION_STEPS  # e.g., 8
logging_steps = LOGGING_STEPS  # how often to log
optimizer.zero_grad()

for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0

    # -----------------------
    # TRAINING LOOP
    # -----------------------
    for step, batch in enumerate(tqdm(train_loader, desc=f"Training Epoch {epoch}")):
        input_values = batch["input_values"].to(device)   # [B, T]
        labels = batch["labels"].to(device)               # [B, L]

        # Forward pass
        logits = model(input_values)                      # [B, T, vocab]
        log_probs = torch.nn.functional.log_softmax(logits, dim=-1)

        input_lengths = torch.full(
            size=(logits.size(0),),
            fill_value=logits.size(1),
            dtype=torch.long
        )
        target_lengths = torch.sum(labels != -100, dim=1)

        loss_fn = torch.nn.CTCLoss(
            blank=processor.tokenizer.pad_token_id,
            zero_infinity=True
        )

        loss = loss_fn(
            log_probs.transpose(0, 1),  # [T, B, vocab]
            labels,
            input_lengths,
            target_lengths
        )

        running_loss += loss.item()

        # Normalize for gradient accumulation
        loss = loss / accumulation_steps
        loss.backward()

        # Update optimizer & scheduler every accumulation_steps
        if (step + 1) % accumulation_steps == 0:
            optimizer.step()
            scheduler.step()
            optimizer.zero_grad()

        # Logging every accumulation_steps * logging_steps
        if (step + 1) % (accumulation_steps * logging_steps) == 0:
            avg_loss = running_loss / (accumulation_steps * logging_steps)
            wandb.log({
                "train loss": avg_loss,
                "learning rate": optimizer.param_groups[0]["lr"],
            })
            print(f"Epoch {epoch} Step {step+1} — Avg Train Loss: {avg_loss:.4f}")
            running_loss = 0.0

    # Catch leftover gradients if dataset not divisible by accumulation_steps
    if (step + 1) % accumulation_steps != 0:
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()

    # -----------------------
    # VALIDATION LOOP
    # -----------------------
    model.eval()
    val_loss = 0.0
    val_predictions, val_references = [], []

    with torch.no_grad():
        for batch in tqdm(valid_loader, desc=f"Validation Epoch {epoch}"):
            input_values = batch["input_values"].to(device)
            labels = batch["labels"]

            logits = model(input_values)
            log_probs = torch.nn.functional.log_softmax(logits, dim=-1)

            # Decode predictions
            pred_ids = torch.argmax(logits, dim=-1)
            pred_str = processor.batch_decode(pred_ids, skip_special_tokens=True)
            ref_str = processor.batch_decode(labels, group_tokens=False)

            val_predictions.extend([p.lower() for p in pred_str])
            val_references.extend([r.lower() for r in ref_str])

            # Compute validation loss
            input_lengths = torch.full(
                size=(logits.size(0),),
                fill_value=logits.size(1),
                dtype=torch.long
            )
            target_lengths = torch.sum(labels != -100, dim=1)

            loss = torch.nn.CTCLoss(
                blank=processor.tokenizer.pad_token_id,
                zero_infinity=True
            )(log_probs.transpose(0, 1), labels, input_lengths, target_lengths)

            val_loss += loss.item()

    val_loss /= len(valid_loader)
    val_wer = wer(hypothesis=val_predictions, reference=val_references)
    print(f"Epoch {epoch} — Validation Loss: {val_loss:.4f}, Validation WER: {val_wer:.4f}")

    # -----------------------
    # TRAIN WER CALCULATION
    # -----------------------
    train_predictions, train_references = [], []
    model.eval()
    with torch.no_grad():
        for batch in tqdm(train_loader, desc=f"Train WER Epoch {epoch}"):
            input_values = batch["input_values"].to(device)
            labels = batch["labels"]

            logits = model(input_values)
            pred_ids = torch.argmax(logits, dim=-1)

            pred_str = processor.batch_decode(pred_ids, skip_special_tokens=True)
            ref_str = processor.batch_decode(labels, group_tokens=False)

            train_predictions.extend([p.lower() for p in pred_str])
            train_references.extend([r.lower() for r in ref_str])

    train_wer = wer(hypothesis=train_predictions, reference=train_references)
    print(f"Epoch {epoch} — Train WER: {train_wer:.4f}")

    # -----------------------
    # LOGGING
    # -----------------------
    wandb.log({
        "epoch": epoch,
        "train wer": train_wer,
        "validation loss": val_loss,
        "validation wer": val_wer,
    })

print("✅ Fine-tuning with RVQ done!")


  labels = [torch.tensor(item["labels"]) for item in batch]
Training Epoch 0:  22%|██▏       | 80/357 [03:18<11:44,  2.54s/it]

Epoch 0 Step 80 — Avg Train Loss: 14.7627


Training Epoch 0:  45%|████▍     | 160/357 [06:36<08:14,  2.51s/it]

Epoch 0 Step 160 — Avg Train Loss: 15.1650


Training Epoch 0:  67%|██████▋   | 240/357 [09:57<04:57,  2.55s/it]

Epoch 0 Step 240 — Avg Train Loss: 15.3410


Training Epoch 0:  90%|████████▉ | 320/357 [13:17<01:32,  2.51s/it]

Epoch 0 Step 320 — Avg Train Loss: 14.9336


Training Epoch 0: 100%|██████████| 357/357 [14:48<00:00,  2.49s/it]
Validation Epoch 0: 100%|██████████| 338/338 [02:07<00:00,  2.65it/s]


Epoch 0 — Validation Loss: 27.7640, Validation WER: 1.5257


Train WER Epoch 0: 100%|██████████| 357/357 [02:36<00:00,  2.28it/s]


Epoch 0 — Train WER: 1.3938


Training Epoch 1:  22%|██▏       | 80/357 [03:18<11:39,  2.52s/it]

Epoch 1 Step 80 — Avg Train Loss: 15.3877


Training Epoch 1:  45%|████▍     | 160/357 [06:36<08:18,  2.53s/it]

Epoch 1 Step 160 — Avg Train Loss: 14.6938


Training Epoch 1:  67%|██████▋   | 240/357 [09:54<04:42,  2.41s/it]

Epoch 1 Step 240 — Avg Train Loss: 14.6557


Training Epoch 1:  90%|████████▉ | 320/357 [13:14<01:29,  2.42s/it]

Epoch 1 Step 320 — Avg Train Loss: 14.9432


Training Epoch 1: 100%|██████████| 357/357 [14:46<00:00,  2.48s/it]
Validation Epoch 1: 100%|██████████| 338/338 [02:06<00:00,  2.67it/s]


Epoch 1 — Validation Loss: 27.5797, Validation WER: 1.7009


Train WER Epoch 1: 100%|██████████| 357/357 [02:36<00:00,  2.28it/s]


Epoch 1 — Train WER: 1.5516


Training Epoch 2:  22%|██▏       | 80/357 [03:18<11:10,  2.42s/it]

Epoch 2 Step 80 — Avg Train Loss: 14.7750


Training Epoch 2:  45%|████▍     | 160/357 [06:37<08:07,  2.48s/it]

Epoch 2 Step 160 — Avg Train Loss: 14.2130


Training Epoch 2:  67%|██████▋   | 240/357 [09:56<04:56,  2.54s/it]

Epoch 2 Step 240 — Avg Train Loss: 15.0434


Training Epoch 2:  90%|████████▉ | 320/357 [13:14<01:29,  2.42s/it]

Epoch 2 Step 320 — Avg Train Loss: 14.7242


Training Epoch 2: 100%|██████████| 357/357 [14:46<00:00,  2.48s/it]
Validation Epoch 2: 100%|██████████| 338/338 [02:06<00:00,  2.67it/s]


Epoch 2 — Validation Loss: 27.2616, Validation WER: 1.8914


Train WER Epoch 2: 100%|██████████| 357/357 [02:36<00:00,  2.29it/s]


Epoch 2 — Train WER: 1.7576


Training Epoch 3:  22%|██▏       | 80/357 [03:19<11:36,  2.51s/it]

Epoch 3 Step 80 — Avg Train Loss: 14.8420


Training Epoch 3:  45%|████▍     | 160/357 [06:39<08:14,  2.51s/it]

Epoch 3 Step 160 — Avg Train Loss: 14.3946


Training Epoch 3:  67%|██████▋   | 240/357 [09:56<04:50,  2.49s/it]

Epoch 3 Step 240 — Avg Train Loss: 14.6313


Training Epoch 3:  90%|████████▉ | 320/357 [13:15<01:30,  2.45s/it]

Epoch 3 Step 320 — Avg Train Loss: 14.2409


Training Epoch 3: 100%|██████████| 357/357 [14:46<00:00,  2.48s/it]
Validation Epoch 3: 100%|██████████| 338/338 [02:06<00:00,  2.67it/s]


Epoch 3 — Validation Loss: 26.7896, Validation WER: 2.0458


Train WER Epoch 3: 100%|██████████| 357/357 [02:36<00:00,  2.28it/s]


Epoch 3 — Train WER: 1.9735


Training Epoch 4:  22%|██▏       | 80/357 [03:16<11:16,  2.44s/it]

Epoch 4 Step 80 — Avg Train Loss: 14.2702


Training Epoch 4:  45%|████▍     | 160/357 [06:35<08:16,  2.52s/it]

Epoch 4 Step 160 — Avg Train Loss: 15.1302


Training Epoch 4:  67%|██████▋   | 240/357 [09:55<05:02,  2.59s/it]

Epoch 4 Step 240 — Avg Train Loss: 14.5381


Training Epoch 4:  90%|████████▉ | 320/357 [13:14<01:32,  2.49s/it]

Epoch 4 Step 320 — Avg Train Loss: 13.7299


Training Epoch 4: 100%|██████████| 357/357 [14:46<00:00,  2.48s/it]
Validation Epoch 4: 100%|██████████| 338/338 [02:06<00:00,  2.67it/s]


Epoch 4 — Validation Loss: 26.1478, Validation WER: 2.1489


Train WER Epoch 4: 100%|██████████| 357/357 [02:36<00:00,  2.28it/s]


Epoch 4 — Train WER: 2.1423


Training Epoch 5:  22%|██▏       | 80/357 [03:19<11:14,  2.43s/it]

Epoch 5 Step 80 — Avg Train Loss: 14.8956


Training Epoch 5:  45%|████▍     | 160/357 [06:38<08:03,  2.45s/it]

Epoch 5 Step 160 — Avg Train Loss: 13.8047


Training Epoch 5:  67%|██████▋   | 240/357 [09:57<04:52,  2.50s/it]

Epoch 5 Step 240 — Avg Train Loss: 13.6459


Training Epoch 5:  90%|████████▉ | 320/357 [13:17<01:35,  2.57s/it]

Epoch 5 Step 320 — Avg Train Loss: 13.5102


Training Epoch 5: 100%|██████████| 357/357 [14:48<00:00,  2.49s/it]
Validation Epoch 5: 100%|██████████| 338/338 [02:06<00:00,  2.67it/s]


Epoch 5 — Validation Loss: 25.3423, Validation WER: 2.1716


Train WER Epoch 5: 100%|██████████| 357/357 [02:35<00:00,  2.29it/s]


Epoch 5 — Train WER: 2.2251


Training Epoch 6:  22%|██▏       | 80/357 [03:17<11:24,  2.47s/it]

Epoch 6 Step 80 — Avg Train Loss: 13.8126


Training Epoch 6:  45%|████▍     | 160/357 [06:35<08:11,  2.50s/it]

Epoch 6 Step 160 — Avg Train Loss: 13.9701


Training Epoch 6:  67%|██████▋   | 240/357 [09:54<04:48,  2.46s/it]

Epoch 6 Step 240 — Avg Train Loss: 13.5869


Training Epoch 6:  90%|████████▉ | 320/357 [13:14<01:30,  2.44s/it]

Epoch 6 Step 320 — Avg Train Loss: 13.2869


Training Epoch 6: 100%|██████████| 357/357 [14:46<00:00,  2.48s/it]
Validation Epoch 6: 100%|██████████| 338/338 [02:06<00:00,  2.68it/s]


Epoch 6 — Validation Loss: 24.3606, Validation WER: 2.0582


Train WER Epoch 6: 100%|██████████| 357/357 [02:35<00:00,  2.29it/s]


Epoch 6 — Train WER: 2.1557


Training Epoch 7:  22%|██▏       | 80/357 [03:19<11:24,  2.47s/it]

Epoch 7 Step 80 — Avg Train Loss: 13.2799


Training Epoch 7:  45%|████▍     | 160/357 [06:39<08:11,  2.50s/it]

Epoch 7 Step 160 — Avg Train Loss: 13.2160


Training Epoch 7:  67%|██████▋   | 240/357 [09:57<04:52,  2.50s/it]

Epoch 7 Step 240 — Avg Train Loss: 13.3789


Training Epoch 7:  90%|████████▉ | 320/357 [13:16<01:32,  2.49s/it]

Epoch 7 Step 320 — Avg Train Loss: 12.7099


Training Epoch 7: 100%|██████████| 357/357 [14:47<00:00,  2.48s/it]
Validation Epoch 7: 100%|██████████| 338/338 [02:06<00:00,  2.68it/s]


Epoch 7 — Validation Loss: 23.2246, Validation WER: 1.6553


Train WER Epoch 7: 100%|██████████| 357/357 [02:35<00:00,  2.30it/s]


Epoch 7 — Train WER: 1.7213


Training Epoch 8:  22%|██▏       | 80/357 [03:17<11:15,  2.44s/it]

Epoch 8 Step 80 — Avg Train Loss: 13.0587


Training Epoch 8:  45%|████▍     | 160/357 [06:36<08:01,  2.44s/it]

Epoch 8 Step 160 — Avg Train Loss: 12.4742


Training Epoch 8:  67%|██████▋   | 240/357 [09:56<04:52,  2.50s/it]

Epoch 8 Step 240 — Avg Train Loss: 12.8661


Training Epoch 8:  90%|████████▉ | 320/357 [13:15<01:30,  2.45s/it]

Epoch 8 Step 320 — Avg Train Loss: 12.5937


Training Epoch 8: 100%|██████████| 357/357 [14:46<00:00,  2.48s/it]
Validation Epoch 8: 100%|██████████| 338/338 [02:05<00:00,  2.68it/s]


Epoch 8 — Validation Loss: 21.8965, Validation WER: 1.2674


Train WER Epoch 8: 100%|██████████| 357/357 [02:35<00:00,  2.30it/s]


Epoch 8 — Train WER: 1.2268


Training Epoch 9:  22%|██▏       | 80/357 [03:17<11:20,  2.46s/it]

Epoch 9 Step 80 — Avg Train Loss: 12.0311


Training Epoch 9:  45%|████▍     | 160/357 [06:36<08:07,  2.47s/it]

Epoch 9 Step 160 — Avg Train Loss: 13.1365


Training Epoch 9:  67%|██████▋   | 240/357 [09:54<05:00,  2.57s/it]

Epoch 9 Step 240 — Avg Train Loss: 11.7360


Training Epoch 9:  90%|████████▉ | 320/357 [13:14<01:35,  2.57s/it]

Epoch 9 Step 320 — Avg Train Loss: 11.8303


Training Epoch 9: 100%|██████████| 357/357 [14:46<00:00,  2.48s/it]
Validation Epoch 9: 100%|██████████| 338/338 [02:06<00:00,  2.68it/s]


Epoch 9 — Validation Loss: 20.4120, Validation WER: 1.0960


Train WER Epoch 9: 100%|██████████| 357/357 [02:34<00:00,  2.30it/s]


Epoch 9 — Train WER: 1.0498


Training Epoch 10:  22%|██▏       | 80/357 [03:17<11:35,  2.51s/it]

Epoch 10 Step 80 — Avg Train Loss: 12.2452


Training Epoch 10:  45%|████▍     | 160/357 [06:36<08:20,  2.54s/it]

Epoch 10 Step 160 — Avg Train Loss: 11.1371


Training Epoch 10:  67%|██████▋   | 240/357 [09:57<04:51,  2.49s/it]

Epoch 10 Step 240 — Avg Train Loss: 11.5236


Training Epoch 10:  90%|████████▉ | 320/357 [13:17<01:32,  2.50s/it]

Epoch 10 Step 320 — Avg Train Loss: 10.9838


Training Epoch 10: 100%|██████████| 357/357 [14:47<00:00,  2.49s/it]
Validation Epoch 10: 100%|██████████| 338/338 [02:05<00:00,  2.69it/s]


Epoch 10 — Validation Loss: 18.8146, Validation WER: 1.0366


Train WER Epoch 10: 100%|██████████| 357/357 [02:34<00:00,  2.31it/s]


Epoch 10 — Train WER: 1.0082


Training Epoch 11:  22%|██▏       | 80/357 [03:17<11:39,  2.52s/it]

Epoch 11 Step 80 — Avg Train Loss: 10.7872


Training Epoch 11:  45%|████▍     | 160/357 [06:36<08:07,  2.47s/it]

Epoch 11 Step 160 — Avg Train Loss: 10.7483


Training Epoch 11:  67%|██████▋   | 240/357 [09:56<04:50,  2.48s/it]

Epoch 11 Step 240 — Avg Train Loss: 10.5533


Training Epoch 11:  90%|████████▉ | 320/357 [13:16<01:29,  2.43s/it]

Epoch 11 Step 320 — Avg Train Loss: 10.6242


Training Epoch 11: 100%|██████████| 357/357 [14:46<00:00,  2.48s/it]
Validation Epoch 11: 100%|██████████| 338/338 [02:05<00:00,  2.69it/s]


Epoch 11 — Validation Loss: 17.1493, Validation WER: 1.0177


Train WER Epoch 11: 100%|██████████| 357/357 [02:35<00:00,  2.30it/s]


Epoch 11 — Train WER: 1.0034


Training Epoch 12:  22%|██▏       | 80/357 [03:17<11:33,  2.50s/it]

Epoch 12 Step 80 — Avg Train Loss: 10.4395


Training Epoch 12:  45%|████▍     | 160/357 [06:38<08:14,  2.51s/it]

Epoch 12 Step 160 — Avg Train Loss: 10.0001


Training Epoch 12:  67%|██████▋   | 240/357 [09:57<04:49,  2.47s/it]

Epoch 12 Step 240 — Avg Train Loss: 10.2964


Training Epoch 12:  90%|████████▉ | 320/357 [13:16<01:32,  2.49s/it]

Epoch 12 Step 320 — Avg Train Loss: 9.2444


Training Epoch 12: 100%|██████████| 357/357 [14:48<00:00,  2.49s/it]
Validation Epoch 12: 100%|██████████| 338/338 [02:05<00:00,  2.69it/s]


Epoch 12 — Validation Loss: 15.4404, Validation WER: 1.0087


Train WER Epoch 12: 100%|██████████| 357/357 [02:34<00:00,  2.31it/s]


Epoch 12 — Train WER: 1.0031


Training Epoch 13:  22%|██▏       | 80/357 [03:18<11:11,  2.42s/it]

Epoch 13 Step 80 — Avg Train Loss: 9.4400


Training Epoch 13:  45%|████▍     | 160/357 [06:37<08:20,  2.54s/it]

Epoch 13 Step 160 — Avg Train Loss: 9.1153


Training Epoch 13:  67%|██████▋   | 240/357 [09:57<04:54,  2.52s/it]

Epoch 13 Step 240 — Avg Train Loss: 9.1314


Training Epoch 13:  90%|████████▉ | 320/357 [13:15<01:31,  2.47s/it]

Epoch 13 Step 320 — Avg Train Loss: 9.4863


Training Epoch 13: 100%|██████████| 357/357 [14:47<00:00,  2.49s/it]
Validation Epoch 13: 100%|██████████| 338/338 [02:06<00:00,  2.68it/s]


Epoch 13 — Validation Loss: 13.8410, Validation WER: 1.0047


Train WER Epoch 13: 100%|██████████| 357/357 [02:35<00:00,  2.30it/s]


Epoch 13 — Train WER: 1.0021


Training Epoch 14:  22%|██▏       | 80/357 [03:17<11:21,  2.46s/it]

Epoch 14 Step 80 — Avg Train Loss: 9.0048


Training Epoch 14:  45%|████▍     | 160/357 [06:37<08:15,  2.51s/it]

Epoch 14 Step 160 — Avg Train Loss: 8.7225


Training Epoch 14:  67%|██████▋   | 240/357 [09:56<04:50,  2.48s/it]

Epoch 14 Step 240 — Avg Train Loss: 8.5487


Training Epoch 14:  90%|████████▉ | 320/357 [13:15<01:32,  2.49s/it]

Epoch 14 Step 320 — Avg Train Loss: 7.8025


Training Epoch 14: 100%|██████████| 357/357 [14:47<00:00,  2.49s/it]
Validation Epoch 14: 100%|██████████| 338/338 [02:06<00:00,  2.68it/s]


Epoch 14 — Validation Loss: 12.3990, Validation WER: 1.0028


Train WER Epoch 14: 100%|██████████| 357/357 [02:34<00:00,  2.31it/s]


Epoch 14 — Train WER: 1.0013


Training Epoch 15:  22%|██▏       | 80/357 [03:17<11:25,  2.47s/it]

Epoch 15 Step 80 — Avg Train Loss: 8.3941


Training Epoch 15:  45%|████▍     | 160/357 [06:37<08:01,  2.45s/it]

Epoch 15 Step 160 — Avg Train Loss: 8.2616


Training Epoch 15:  67%|██████▋   | 240/357 [09:55<04:45,  2.44s/it]

Epoch 15 Step 240 — Avg Train Loss: 7.7624


Training Epoch 15:  90%|████████▉ | 320/357 [13:15<01:34,  2.55s/it]

Epoch 15 Step 320 — Avg Train Loss: 7.2436


Training Epoch 15: 100%|██████████| 357/357 [14:47<00:00,  2.49s/it]
Validation Epoch 15: 100%|██████████| 338/338 [02:06<00:00,  2.67it/s]


Epoch 15 — Validation Loss: 11.1754, Validation WER: 1.0017


Train WER Epoch 15: 100%|██████████| 357/357 [02:35<00:00,  2.29it/s]


Epoch 15 — Train WER: 1.0008


Training Epoch 16:  22%|██▏       | 80/357 [03:18<11:39,  2.53s/it]

Epoch 16 Step 80 — Avg Train Loss: 7.5078


Training Epoch 16:  45%|████▍     | 160/357 [06:37<08:13,  2.50s/it]

Epoch 16 Step 160 — Avg Train Loss: 7.4432


Training Epoch 16:  67%|██████▋   | 240/357 [09:57<04:53,  2.51s/it]

Epoch 16 Step 240 — Avg Train Loss: 7.2631


Training Epoch 16:  90%|████████▉ | 320/357 [13:17<01:32,  2.51s/it]

Epoch 16 Step 320 — Avg Train Loss: 6.9389


Training Epoch 16: 100%|██████████| 357/357 [14:49<00:00,  2.49s/it]
Validation Epoch 16: 100%|██████████| 338/338 [02:06<00:00,  2.68it/s]


Epoch 16 — Validation Loss: 10.1799, Validation WER: 1.0009


Train WER Epoch 16: 100%|██████████| 357/357 [02:34<00:00,  2.31it/s]


Epoch 16 — Train WER: 1.0005


Training Epoch 17:  22%|██▏       | 80/357 [03:17<11:24,  2.47s/it]

Epoch 17 Step 80 — Avg Train Loss: 6.9628


Training Epoch 17:  45%|████▍     | 160/357 [06:38<08:14,  2.51s/it]

Epoch 17 Step 160 — Avg Train Loss: 6.5206


Training Epoch 17:  67%|██████▋   | 240/357 [09:58<04:56,  2.54s/it]

Epoch 17 Step 240 — Avg Train Loss: 6.7502


Training Epoch 17:  90%|████████▉ | 320/357 [13:17<01:30,  2.45s/it]

Epoch 17 Step 320 — Avg Train Loss: 6.8682


Training Epoch 17: 100%|██████████| 357/357 [14:48<00:00,  2.49s/it]
Validation Epoch 17: 100%|██████████| 338/338 [02:06<00:00,  2.68it/s]


Epoch 17 — Validation Loss: 9.3787, Validation WER: 1.0008


Train WER Epoch 17: 100%|██████████| 357/357 [02:35<00:00,  2.30it/s]


Epoch 17 — Train WER: 1.0004


Training Epoch 18:  22%|██▏       | 80/357 [03:19<11:39,  2.52s/it]

Epoch 18 Step 80 — Avg Train Loss: 6.5036


Training Epoch 18:  45%|████▍     | 160/357 [06:38<08:21,  2.55s/it]

Epoch 18 Step 160 — Avg Train Loss: 6.1177


Training Epoch 18:  67%|██████▋   | 240/357 [09:57<04:48,  2.47s/it]

Epoch 18 Step 240 — Avg Train Loss: 6.5801


Training Epoch 18:  90%|████████▉ | 320/357 [13:15<01:35,  2.57s/it]

Epoch 18 Step 320 — Avg Train Loss: 6.1846


Training Epoch 18: 100%|██████████| 357/357 [14:46<00:00,  2.48s/it]
Validation Epoch 18: 100%|██████████| 338/338 [02:06<00:00,  2.68it/s]


Epoch 18 — Validation Loss: 8.7441, Validation WER: 1.0006


Train WER Epoch 18: 100%|██████████| 357/357 [02:35<00:00,  2.30it/s]


Epoch 18 — Train WER: 1.0002


Training Epoch 19:  22%|██▏       | 80/357 [03:16<11:26,  2.48s/it]

Epoch 19 Step 80 — Avg Train Loss: 6.5250


Training Epoch 19:  45%|████▍     | 160/357 [06:36<08:11,  2.49s/it]

Epoch 19 Step 160 — Avg Train Loss: 5.9164


Training Epoch 19:  67%|██████▋   | 240/357 [09:55<04:44,  2.43s/it]

Epoch 19 Step 240 — Avg Train Loss: 5.7850


Training Epoch 19:  90%|████████▉ | 320/357 [13:14<01:34,  2.55s/it]

Epoch 19 Step 320 — Avg Train Loss: 5.7603


Training Epoch 19: 100%|██████████| 357/357 [14:46<00:00,  2.48s/it]
Validation Epoch 19: 100%|██████████| 338/338 [02:06<00:00,  2.68it/s]


Epoch 19 — Validation Loss: 8.2456, Validation WER: 1.0004


Train WER Epoch 19: 100%|██████████| 357/357 [02:35<00:00,  2.30it/s]


Epoch 19 — Train WER: 1.0000


Training Epoch 20:  22%|██▏       | 80/357 [03:16<11:09,  2.42s/it]

Epoch 20 Step 80 — Avg Train Loss: 5.9994


Training Epoch 20:  45%|████▍     | 160/357 [06:37<08:08,  2.48s/it]

Epoch 20 Step 160 — Avg Train Loss: 5.8604


Training Epoch 20:  67%|██████▋   | 240/357 [09:56<04:43,  2.42s/it]

Epoch 20 Step 240 — Avg Train Loss: 5.7194


Training Epoch 20:  90%|████████▉ | 320/357 [13:15<01:34,  2.55s/it]

Epoch 20 Step 320 — Avg Train Loss: 5.5785


Training Epoch 20: 100%|██████████| 357/357 [14:46<00:00,  2.48s/it]
Validation Epoch 20: 100%|██████████| 338/338 [02:06<00:00,  2.68it/s]


Epoch 20 — Validation Loss: 7.8616, Validation WER: 1.0005


Train WER Epoch 20: 100%|██████████| 357/357 [02:34<00:00,  2.30it/s]


Epoch 20 — Train WER: 1.0001


Training Epoch 21:  22%|██▏       | 80/357 [03:16<11:13,  2.43s/it]

Epoch 21 Step 80 — Avg Train Loss: 5.8653


Training Epoch 21:  45%|████▍     | 160/357 [06:34<08:13,  2.51s/it]

Epoch 21 Step 160 — Avg Train Loss: 5.5699


Training Epoch 21:  67%|██████▋   | 240/357 [09:55<04:52,  2.50s/it]

Epoch 21 Step 240 — Avg Train Loss: 5.5942


Training Epoch 21:  90%|████████▉ | 320/357 [13:14<01:31,  2.48s/it]

Epoch 21 Step 320 — Avg Train Loss: 5.3166


Training Epoch 21: 100%|██████████| 357/357 [14:46<00:00,  2.48s/it]
Validation Epoch 21: 100%|██████████| 338/338 [02:05<00:00,  2.69it/s]


Epoch 21 — Validation Loss: 7.5716, Validation WER: 1.0006


Train WER Epoch 21: 100%|██████████| 357/357 [02:34<00:00,  2.30it/s]


Epoch 21 — Train WER: 1.0000


Training Epoch 22:  22%|██▏       | 80/357 [03:18<11:39,  2.53s/it]

Epoch 22 Step 80 — Avg Train Loss: 5.5223


Training Epoch 22:  45%|████▍     | 160/357 [06:38<08:10,  2.49s/it]

Epoch 22 Step 160 — Avg Train Loss: 5.0463


Training Epoch 22:  67%|██████▋   | 240/357 [09:58<04:49,  2.48s/it]

Epoch 22 Step 240 — Avg Train Loss: 5.3052


Training Epoch 22:  90%|████████▉ | 320/357 [13:16<01:30,  2.44s/it]

Epoch 22 Step 320 — Avg Train Loss: 5.4973


Training Epoch 22: 100%|██████████| 357/357 [14:46<00:00,  2.48s/it]
Validation Epoch 22: 100%|██████████| 338/338 [02:06<00:00,  2.67it/s]


Epoch 22 — Validation Loss: 7.3454, Validation WER: 1.0001


Train WER Epoch 22: 100%|██████████| 357/357 [02:35<00:00,  2.30it/s]


Epoch 22 — Train WER: 1.0000


Training Epoch 23:  22%|██▏       | 80/357 [03:17<11:24,  2.47s/it]

Epoch 23 Step 80 — Avg Train Loss: 5.2462


Training Epoch 23:  45%|████▍     | 160/357 [06:36<08:07,  2.47s/it]

Epoch 23 Step 160 — Avg Train Loss: 5.2429


Training Epoch 23:  67%|██████▋   | 240/357 [09:54<04:54,  2.52s/it]

Epoch 23 Step 240 — Avg Train Loss: 5.2355


Training Epoch 23:  90%|████████▉ | 320/357 [13:16<01:30,  2.44s/it]

Epoch 23 Step 320 — Avg Train Loss: 5.1909


Training Epoch 23: 100%|██████████| 357/357 [14:47<00:00,  2.49s/it]
Validation Epoch 23: 100%|██████████| 338/338 [02:06<00:00,  2.67it/s]


Epoch 23 — Validation Loss: 7.1521, Validation WER: 1.0003


Train WER Epoch 23: 100%|██████████| 357/357 [02:35<00:00,  2.29it/s]


Epoch 23 — Train WER: 1.0000


Training Epoch 24:  22%|██▏       | 80/357 [03:19<11:32,  2.50s/it]

Epoch 24 Step 80 — Avg Train Loss: 5.1532


Training Epoch 24:  45%|████▍     | 160/357 [06:38<08:10,  2.49s/it]

Epoch 24 Step 160 — Avg Train Loss: 5.2172


Training Epoch 24:  67%|██████▋   | 240/357 [09:59<04:48,  2.47s/it]

Epoch 24 Step 240 — Avg Train Loss: 4.9757


Training Epoch 24:  90%|████████▉ | 320/357 [13:18<01:32,  2.49s/it]

Epoch 24 Step 320 — Avg Train Loss: 4.8859


Training Epoch 24: 100%|██████████| 357/357 [14:49<00:00,  2.49s/it]
Validation Epoch 24: 100%|██████████| 338/338 [02:06<00:00,  2.67it/s]


Epoch 24 — Validation Loss: 6.9523, Validation WER: 1.0002


Train WER Epoch 24: 100%|██████████| 357/357 [02:35<00:00,  2.30it/s]


Epoch 24 — Train WER: 1.0000


Training Epoch 25:  22%|██▏       | 80/357 [03:15<11:14,  2.44s/it]

Epoch 25 Step 80 — Avg Train Loss: 4.9456


Training Epoch 25:  45%|████▍     | 160/357 [06:35<08:08,  2.48s/it]

Epoch 25 Step 160 — Avg Train Loss: 4.9539


Training Epoch 25:  67%|██████▋   | 240/357 [09:53<04:52,  2.50s/it]

Epoch 25 Step 240 — Avg Train Loss: 4.8168


Training Epoch 25:  90%|████████▉ | 320/357 [13:13<01:32,  2.50s/it]

Epoch 25 Step 320 — Avg Train Loss: 4.5173


Training Epoch 25: 100%|██████████| 357/357 [14:45<00:00,  2.48s/it]
Validation Epoch 25: 100%|██████████| 338/338 [02:06<00:00,  2.67it/s]


Epoch 25 — Validation Loss: 6.5994, Validation WER: 1.0000


Train WER Epoch 25: 100%|██████████| 357/357 [02:35<00:00,  2.29it/s]


Epoch 25 — Train WER: 1.0000


Training Epoch 26:  22%|██▏       | 80/357 [03:17<11:20,  2.46s/it]

Epoch 26 Step 80 — Avg Train Loss: 4.5831


Training Epoch 26:  45%|████▍     | 160/357 [06:37<08:14,  2.51s/it]

Epoch 26 Step 160 — Avg Train Loss: 4.5030


Training Epoch 26:  67%|██████▋   | 240/357 [09:54<04:39,  2.39s/it]

Epoch 26 Step 240 — Avg Train Loss: 4.5056


Training Epoch 26:  90%|████████▉ | 320/357 [13:15<01:33,  2.53s/it]

Epoch 26 Step 320 — Avg Train Loss: 4.5284


Training Epoch 26: 100%|██████████| 357/357 [14:47<00:00,  2.49s/it]
Validation Epoch 26: 100%|██████████| 338/338 [02:06<00:00,  2.67it/s]


Epoch 26 — Validation Loss: 6.1016, Validation WER: 1.0000


Train WER Epoch 26: 100%|██████████| 357/357 [02:35<00:00,  2.30it/s]


Epoch 26 — Train WER: 1.0000


Training Epoch 27:  22%|██▏       | 80/357 [03:17<11:33,  2.50s/it]

Epoch 27 Step 80 — Avg Train Loss: 4.3681


Training Epoch 27:  45%|████▍     | 160/357 [06:36<08:13,  2.50s/it]

Epoch 27 Step 160 — Avg Train Loss: 4.2579


Training Epoch 27:  67%|██████▋   | 240/357 [09:56<04:44,  2.43s/it]

Epoch 27 Step 240 — Avg Train Loss: 4.2955


Training Epoch 27:  90%|████████▉ | 320/357 [13:16<01:32,  2.49s/it]

Epoch 27 Step 320 — Avg Train Loss: 4.2759


Training Epoch 27: 100%|██████████| 357/357 [14:47<00:00,  2.49s/it]
Validation Epoch 27: 100%|██████████| 338/338 [02:06<00:00,  2.67it/s]


Epoch 27 — Validation Loss: 5.7550, Validation WER: 1.0000


Train WER Epoch 27: 100%|██████████| 357/357 [02:35<00:00,  2.29it/s]


Epoch 27 — Train WER: 1.0000


Training Epoch 28:  22%|██▏       | 80/357 [03:18<11:26,  2.48s/it]

Epoch 28 Step 80 — Avg Train Loss: 4.2035


Training Epoch 28:  45%|████▍     | 160/357 [06:37<08:10,  2.49s/it]

Epoch 28 Step 160 — Avg Train Loss: 4.1618


Training Epoch 28:  67%|██████▋   | 240/357 [09:56<04:52,  2.50s/it]

Epoch 28 Step 240 — Avg Train Loss: 4.2559


Training Epoch 28:  90%|████████▉ | 320/357 [13:14<01:29,  2.42s/it]

Epoch 28 Step 320 — Avg Train Loss: 4.1205


Training Epoch 28: 100%|██████████| 357/357 [14:46<00:00,  2.48s/it]
Validation Epoch 28: 100%|██████████| 338/338 [02:05<00:00,  2.68it/s]


Epoch 28 — Validation Loss: 5.5284, Validation WER: 1.0000


Train WER Epoch 28: 100%|██████████| 357/357 [02:35<00:00,  2.30it/s]


Epoch 28 — Train WER: 1.0000


Training Epoch 29:  22%|██▏       | 80/357 [03:17<11:41,  2.53s/it]

Epoch 29 Step 80 — Avg Train Loss: 4.2054


Training Epoch 29:  45%|████▍     | 160/357 [06:35<08:12,  2.50s/it]

Epoch 29 Step 160 — Avg Train Loss: 4.0225


Training Epoch 29:  67%|██████▋   | 240/357 [09:54<04:47,  2.45s/it]

Epoch 29 Step 240 — Avg Train Loss: 4.0529


Training Epoch 29:  90%|████████▉ | 320/357 [13:12<01:33,  2.53s/it]

Epoch 29 Step 320 — Avg Train Loss: 4.1153


Training Epoch 29: 100%|██████████| 357/357 [14:44<00:00,  2.48s/it]
Validation Epoch 29: 100%|██████████| 338/338 [02:06<00:00,  2.68it/s]


Epoch 29 — Validation Loss: 5.3880, Validation WER: 1.0000


Train WER Epoch 29: 100%|██████████| 357/357 [02:34<00:00,  2.31it/s]

Epoch 29 — Train WER: 1.0000
✅ Fine-tuning with RVQ done!





In [19]:
torch.save(model.state_dict(), "/kaggle/working/model_with_RVQ.pth")

In [20]:
val_predictions

['',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 'e',
 '',
 '',
 '',
 'e',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 'e',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 'e',
 '',
 '',
 '',
 

In [21]:
val_references

["he was in a fevered state of mind owing to the blight his wife's action threatened to cast upon his entire future<unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk>",
 'he would have to pay her the money which she would now regularly demand or there would be trouble it did not matter what he did',
 'hurstwood walked the floor mentally arranging the chief points of his situation<unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk>',
 'he also thought of his managerial position<unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><unk><u