In [1]:
import os
import gc
import torch
import evaluate
import numpy as np
from tqdm import tqdm
from dataclasses import dataclass
from typing import Any, Dict, List, Union
from torch.utils.data import DataLoader

from huggingface_hub import login
from datasets import load_from_disk, concatenate_datasets
from transformers import (
    WhisperFeatureExtractor, WhisperTokenizer, WhisperProcessor,
    WhisperForConditionalGeneration, Seq2SeqTrainer, Seq2SeqTrainingArguments
)
from peft import (
    prepare_model_for_kbit_training, LoraConfig,
    get_peft_model, PeftModel, PeftConfig
)
from transformers.models.whisper.english_normalizer import BasicTextNormalizer






In [2]:
# ------------------------------------------------------------
# 1. ENVIRONMENT & SETUP
# ------------------------------------------------------------
os.environ["HF_DATASETS_CACHE"] = "None"
login(token="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", add_to_git_credential=True)

model_name_or_path = "openai/whisper-large-v3"
task = "transcribe"
#dataset_path = "zh_health_dataset"
#sentence_counts = [1288, 416, 1843, 33, 3739, 3602, 1583, 2227, 58, 116, 177, 2574, 187, 13, 919, 276, 637, 130, 679, 666,556, 69, 516]

#data = load_from_disk(dataset_path)

In [3]:
# ------------------------------------------------------------
# 2. SPLIT DATASET BY LANGUAGE COUNTS
# ------------------------------------------------------------
def split_multilang_dataset(dataset, counts):
    start_index = 0
    train, test, eval = [], [], []
    for count in counts:
        end_index = start_index + count
        if count != 1:
            train.append(dataset.select(range(start_index, start_index + int(count * 0.8))))
            test.append(dataset.select(range(start_index + int(count * 0.8), start_index + int(count * 0.9))))
            eval.append(dataset.select(range(start_index + int(count * 0.9), start_index + count)))
        else:
            train.append(dataset.select(range(start_index, start_index + 1)))
        start_index = end_index
    return concatenate_datasets(train), concatenate_datasets(test), concatenate_datasets(eval)

#train_data, test_data, eval_data = split_multilang_dataset(data, sentence_counts)



In [4]:
# ------------------------------------------------------------
# 3. PROCESSOR & COLLATOR
# ------------------------------------------------------------
feature_extractor = WhisperFeatureExtractor.from_pretrained(model_name_or_path)
tokenizer = WhisperTokenizer.from_pretrained(model_name_or_path, task=task)
processor = WhisperProcessor.from_pretrained(model_name_or_path, task=task)

@dataclass
class DataCollatorSpeechSeq2SeqWithPadding:
    processor: Any
    def __call__(self, features):
        input_feats = [{"input_features": f["input_features"]} for f in features]
        batch = self.processor.feature_extractor.pad(input_feats, return_tensors="pt")
        label_feats = [{"input_ids": f["labels"]} for f in features]
        labels_batch = self.processor.tokenizer.pad(label_feats, return_tensors="pt")
        labels = labels_batch["input_ids"].masked_fill(labels_batch.attention_mask.ne(1), -100)
        if (labels[:, 0] == self.processor.tokenizer.bos_token_id).all().cpu().item():
            labels = labels[:, 1:]
        batch["labels"] = labels
        return batch

data_collator = DataCollatorSpeechSeq2SeqWithPadding(processor)




Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [5]:
def fine_tune_model(train_data, val_data, output_dir, lang_code):
    model = WhisperForConditionalGeneration.from_pretrained(model_name_or_path, device_map="auto")
    model.config.forced_decoder_ids = None
    model.config.suppress_tokens = []

    model = prepare_model_for_kbit_training(model)
    model.model.encoder.conv1.register_forward_hook(lambda m, i, o: o.requires_grad_(True))

    config = LoraConfig(
        r=32, lora_alpha=64, target_modules=["q_proj", "v_proj"],
        lora_dropout=0.05, bias="none"
    )
    model = get_peft_model(model, config)

    training_args = Seq2SeqTrainingArguments(
        output_dir=output_dir,
        per_device_train_batch_size=8,
        gradient_accumulation_steps=1,
        learning_rate=1e-6,
        warmup_steps=50,
        eval_steps=100,
        gradient_checkpointing=True,
        num_train_epochs=3,
        evaluation_strategy="steps",
        fp16=True,
        report_to="none",
        per_device_eval_batch_size=8,
        generation_max_length=128,
        logging_steps=1,
        lr_scheduler_type="linear",
        remove_unused_columns=False,
        label_names=["labels"],
        load_best_model_at_end=True,
        greater_is_better=False,
    )

    trainer = Seq2SeqTrainer(
        args=training_args,
        model=model,
        train_dataset=train_data,
        eval_dataset=val_data,
        data_collator=data_collator,
        tokenizer=processor.feature_extractor,
    )

    trainer.train()
    trainer.model.save_pretrained(output_dir)
    processor.save_pretrained(output_dir)
    del trainer, model
    gc.collect()
    torch.cuda.empty_cache()


In [None]:
def evaluate_from_encoded(model_path, dataset):
    from transformers import WhisperProcessor, WhisperForConditionalGeneration
    from datasets import load_from_disk
    import torch
    import jiwer
    from jiwer import wer, Compose, ToLowerCase, RemovePunctuation, RemoveMultipleSpaces, Strip
    from tqdm import tqdm

    # Normalizer
    nwer_norm = Compose([ToLowerCase(), RemovePunctuation(), RemoveMultipleSpaces(), Strip()])

    print(f"🔹 Veriset yükleniyor: {dataset_path}")
    dataset = dataset

    print(f"🔹 Model yükleniyor: {model_path}")
    processor = WhisperProcessor.from_pretrained("openai/whisper-large-v3")
    model = WhisperForConditionalGeneration.from_pretrained(model_path, device_map="auto", )
    model.eval()
    device = model.device

    all_predictions, all_references = [], []

    print("🔍 Değerlendirme başlatıldı...")
    for sample in tqdm(dataset):
        input_feats = torch.tensor(sample["input_features"], dtype=torch.float32).unsqueeze(0).to(device)

        try:
            label_ids = [token for token in sample["labels"] if token != -100]
            reference = processor.tokenizer.decode(label_ids, skip_special_tokens=True)
        except Exception as e:
            print(f"[!] Referans decode hatası: {e}")
            continue

        with torch.no_grad():
            pred_ids = model.generate(input_feats)
        prediction = processor.batch_decode(pred_ids, skip_special_tokens=True)[0]

        all_predictions.append(prediction)
        all_references.append(reference)

    overall_wer = wer(all_references, all_predictions)
    normalized_references = [nwer_norm(r) for r in all_references]
    normalized_predictions = [nwer_norm(p) for p in all_predictions]
    overall_nwer = wer(normalized_references, normalized_predictions)

    print(f"\n✅ WER: {overall_wer:.4f}")
    print(f"✅ Normalized WER: {overall_nwer:.4f}")

    return {
        "WER": overall_wer * 100,
        "Normalized_WER": overall_nwer * 100
    }


In [19]:
domains = ["law", "health"]
languages = ["ar", "cs", "de", "el", "en", "es", "fa", "fr", "he", "hi", "id", "it",
             "ja", "ko", "nl", "pl", "pt", "ro", "ru", "tr", "uk", "vi", "zh"]

results = []

for domain in domains:
    for lang in languages:
        print(f"\n=== ⏳ Evaluating: {lang.upper()} - {domain.upper()} ===")

        dataset_path = f"/home/enulu/Workspace/Domain_based/Dataset/{domain}/{lang}"
        try:
            dataset = load_from_disk(dataset_path)
        except Exception as e:
            print(f"{lang}-{domain} verisi yüklenemedi: {e}")
            continue

        # Split dataset
        total = len(dataset)
        train_data = dataset.select(range(0, int(total * 0.8)))
        eval_data = dataset.select(range(int(total * 0.8), int(total * 0.9)))
        test_data = dataset.select(range(int(total * 0.9), total))



        model_path = f"/home/enulu/Workspace/Domain_based/fine-tuned_modeller/{domain}/{lang}"  # veya "openai/whisper-large-v3"

        try:
            metrics = evaluate_from_encoded(model_path, test_data)
        except Exception as e:
            print(f"[ERROR] {lang}-{domain} değerlendirme başarısız: {e}")
            continue

        results.append({
            "domain": domain,
            "lang": lang,
            **metrics
        })



=== ⏳ Evaluating: AR - LAW ===
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/law/ar
🔹 Model yükleniyor: /home/enulu/Workspace/Domain_based/fine-tuned_modeller/law/ar


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


KeyboardInterrupt: 

In [20]:
original_results = []

for domain in domains:
    for lang in languages:
        print(f"\n🌍 Evaluating ORIGINAL Whisper: {lang.upper()} - {domain.upper()}")

        dataset_path = f"/home/enulu/Workspace/Domain_based/Dataset/{domain}/{lang}"
        model_path = "openai/whisper-large-v3"  # Orijinal Hugging Face modeli

        try:
            dataset = load_from_disk(dataset_path)
            dataset = dataset.shuffle(seed=42)
            total = len(dataset)
            test_data = dataset.select(range(int(0.9 * total), total))



            metrics = evaluate_from_encoded(model_path, test_data)

        except Exception as e:
            print(f"[❌ ERROR] {lang}-{domain} (original): {e}")
            continue

        original_results.append({
            "domain": domain,
            "lang": lang,
            **metrics
        })

# CSV olarak orijinal sonuçları kaydet
df_orig = pd.DataFrame(original_results)
df_orig.to_csv("original_whisper_eval_results.csv", index=False)



🌍 Evaluating ORIGINAL Whisper: AR - LAW
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/law/ar
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 47/47 [00:24<00:00,  1.88it/s]



✅ WER: 0.5282
✅ Normalized WER: 0.4488

🌍 Evaluating ORIGINAL Whisper: CS - LAW
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/law/cs
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 37/37 [00:24<00:00,  1.53it/s]



✅ WER: 0.1149
✅ Normalized WER: 0.0743

🌍 Evaluating ORIGINAL Whisper: DE - LAW
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/law/de
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 255/255 [02:22<00:00,  1.80it/s]



✅ WER: 0.0401
✅ Normalized WER: 0.0215

🌍 Evaluating ORIGINAL Whisper: EL - LAW
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/law/el
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 2/2 [00:01<00:00,  1.64it/s]



✅ WER: 0.1333
✅ Normalized WER: 0.0000

🌍 Evaluating ORIGINAL Whisper: EN - LAW
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/law/en
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 387/387 [03:06<00:00,  2.08it/s]



✅ WER: 0.1041
✅ Normalized WER: 0.0614

🌍 Evaluating ORIGINAL Whisper: ES - LAW
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/law/es
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 273/273 [02:27<00:00,  1.84it/s]



✅ WER: 0.0741
✅ Normalized WER: 0.0288

🌍 Evaluating ORIGINAL Whisper: FA - LAW
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/law/fa
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 89/89 [00:54<00:00,  1.62it/s]



✅ WER: 0.3393
✅ Normalized WER: 0.3016

🌍 Evaluating ORIGINAL Whisper: FR - LAW
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/law/fr
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 298/298 [02:42<00:00,  1.83it/s]



✅ WER: 0.1349
✅ Normalized WER: 0.0761

🌍 Evaluating ORIGINAL Whisper: HE - LAW
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/law/he
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 4/4 [00:02<00:00,  1.52it/s]



✅ WER: 0.4062
✅ Normalized WER: 0.2812

🌍 Evaluating ORIGINAL Whisper: HI - LAW
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/law/hi
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 5/5 [00:03<00:00,  1.37it/s]



✅ WER: 0.6286
✅ Normalized WER: 0.6286

🌍 Evaluating ORIGINAL Whisper: ID - LAW
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/law/id
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 9/9 [00:05<00:00,  1.57it/s]



✅ WER: 0.0900
✅ Normalized WER: 0.0700

🌍 Evaluating ORIGINAL Whisper: IT - LAW
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/law/it
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 198/198 [01:53<00:00,  1.74it/s]



✅ WER: 0.0883
✅ Normalized WER: 0.0431

🌍 Evaluating ORIGINAL Whisper: JA - LAW
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/law/ja
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 11/11 [00:06<00:00,  1.62it/s]



✅ WER: 0.8182
✅ Normalized WER: 0.2727

🌍 Evaluating ORIGINAL Whisper: KO - LAW
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/law/ko
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 1/1 [00:00<00:00,  2.03it/s]



✅ WER: 0.3333
✅ Normalized WER: 0.0000

🌍 Evaluating ORIGINAL Whisper: NL - LAW
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/law/nl
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 195/195 [01:49<00:00,  1.78it/s]



✅ WER: 0.0698
✅ Normalized WER: 0.0557

🌍 Evaluating ORIGINAL Whisper: PL - LAW
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/law/pl
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 36/36 [00:22<00:00,  1.57it/s]



✅ WER: 0.1616
✅ Normalized WER: 0.0279

🌍 Evaluating ORIGINAL Whisper: PT - LAW
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/law/pt
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 56/56 [00:29<00:00,  1.91it/s]



✅ WER: 0.2500
✅ Normalized WER: 0.1148

🌍 Evaluating ORIGINAL Whisper: RO - LAW
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/law/ro
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 34/34 [00:16<00:00,  2.01it/s]



✅ WER: 0.0909
✅ Normalized WER: 0.0537

🌍 Evaluating ORIGINAL Whisper: RU - LAW
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/law/ru
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 87/87 [00:50<00:00,  1.72it/s]



✅ WER: 0.0560
✅ Normalized WER: 0.0274

🌍 Evaluating ORIGINAL Whisper: TR - LAW
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/law/tr
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 26/26 [00:11<00:00,  2.18it/s]



✅ WER: 0.1333
✅ Normalized WER: 0.0963

🌍 Evaluating ORIGINAL Whisper: UK - LAW
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/law/uk
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 37/37 [00:20<00:00,  1.77it/s]



✅ WER: 0.2103
✅ Normalized WER: 0.1536

🌍 Evaluating ORIGINAL Whisper: VI - LAW
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/law/vi
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 1/1 [00:00<00:00,  2.07it/s]



✅ WER: 0.1429
✅ Normalized WER: 0.0000

🌍 Evaluating ORIGINAL Whisper: ZH - LAW
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/law/zh
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 28/28 [00:16<00:00,  1.72it/s]



✅ WER: 1.0357
✅ Normalized WER: 0.7143

🌍 Evaluating ORIGINAL Whisper: AR - HEALTH
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/health/ar
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 129/129 [01:00<00:00,  2.13it/s]



✅ WER: 0.4150
✅ Normalized WER: 0.2912

🌍 Evaluating ORIGINAL Whisper: CS - HEALTH
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/health/cs
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 42/42 [00:23<00:00,  1.76it/s]



✅ WER: 0.1049
✅ Normalized WER: 0.0839

🌍 Evaluating ORIGINAL Whisper: DE - HEALTH
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/health/de
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 185/185 [01:36<00:00,  1.91it/s]



✅ WER: 0.0561
✅ Normalized WER: 0.0376

🌍 Evaluating ORIGINAL Whisper: EL - HEALTH
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/health/el
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 4/4 [00:02<00:00,  1.54it/s]



✅ WER: 0.1471
✅ Normalized WER: 0.0294

🌍 Evaluating ORIGINAL Whisper: EN - HEALTH
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/health/en
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 374/374 [03:03<00:00,  2.03it/s]



✅ WER: 0.1944
✅ Normalized WER: 0.1721

🌍 Evaluating ORIGINAL Whisper: ES - HEALTH
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/health/es
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 361/361 [03:02<00:00,  1.98it/s]



✅ WER: 0.0520
✅ Normalized WER: 0.0236

🌍 Evaluating ORIGINAL Whisper: FA - HEALTH
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/health/fa
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 159/159 [01:27<00:00,  1.81it/s]



✅ WER: 0.3266
✅ Normalized WER: 0.2911

🌍 Evaluating ORIGINAL Whisper: FR - HEALTH
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/health/fr
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 223/223 [01:55<00:00,  1.93it/s]



✅ WER: 0.0996
✅ Normalized WER: 0.0608

🌍 Evaluating ORIGINAL Whisper: HE - HEALTH
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/health/he
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 6/6 [00:03<00:00,  1.63it/s]



✅ WER: 0.2174
✅ Normalized WER: 0.0870

🌍 Evaluating ORIGINAL Whisper: HI - HEALTH
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/health/hi
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 12/12 [00:10<00:00,  1.15it/s]



✅ WER: 0.2336
✅ Normalized WER: 0.2150

🌍 Evaluating ORIGINAL Whisper: ID - HEALTH
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/health/id
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 18/18 [00:10<00:00,  1.71it/s]



✅ WER: 0.1062
✅ Normalized WER: 0.0750

🌍 Evaluating ORIGINAL Whisper: IT - HEALTH
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/health/it
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 258/258 [02:24<00:00,  1.79it/s]



✅ WER: 0.0612
✅ Normalized WER: 0.0357

🌍 Evaluating ORIGINAL Whisper: JA - HEALTH
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/health/ja
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 19/19 [00:12<00:00,  1.51it/s]



✅ WER: 1.0000
✅ Normalized WER: 0.6316

🌍 Evaluating ORIGINAL Whisper: KO - HEALTH
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/health/ko
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 2/2 [00:01<00:00,  1.83it/s]



✅ WER: 0.2000
✅ Normalized WER: 0.2000

🌍 Evaluating ORIGINAL Whisper: NL - HEALTH
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/health/nl
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 92/92 [00:51<00:00,  1.77it/s]



✅ WER: 0.0680
✅ Normalized WER: 0.0592

🌍 Evaluating ORIGINAL Whisper: PL - HEALTH
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/health/pl
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 28/28 [00:16<00:00,  1.66it/s]



✅ WER: 0.1783
✅ Normalized WER: 0.0306

🌍 Evaluating ORIGINAL Whisper: PT - HEALTH
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/health/pt
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 64/64 [00:31<00:00,  2.00it/s]



✅ WER: 0.1937
✅ Normalized WER: 0.0631

🌍 Evaluating ORIGINAL Whisper: RO - HEALTH
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/health/ro
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 13/13 [00:07<00:00,  1.69it/s]



✅ WER: 0.2143
✅ Normalized WER: 0.1531

🌍 Evaluating ORIGINAL Whisper: RU - HEALTH
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/health/ru
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 68/68 [00:39<00:00,  1.73it/s]



✅ WER: 0.0794
✅ Normalized WER: 0.0468

🌍 Evaluating ORIGINAL Whisper: TR - HEALTH
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/health/tr
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 67/67 [00:33<00:00,  1.99it/s]



✅ WER: 0.1716
✅ Normalized WER: 0.1099

🌍 Evaluating ORIGINAL Whisper: UK - HEALTH
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/health/uk
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 56/56 [00:33<00:00,  1.67it/s]



✅ WER: 0.2129
✅ Normalized WER: 0.1639

🌍 Evaluating ORIGINAL Whisper: VI - HEALTH
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/health/vi
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 7/7 [00:03<00:00,  1.82it/s]



✅ WER: 0.1343
✅ Normalized WER: 0.0149

🌍 Evaluating ORIGINAL Whisper: ZH - HEALTH
🔹 Veriset yükleniyor: /home/enulu/Workspace/Domain_based/Dataset/health/zh
🔹 Model yükleniyor: openai/whisper-large-v3


Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


🔍 Değerlendirme başlatıldı...


100%|██████████| 52/52 [00:31<00:00,  1.67it/s]


✅ WER: 0.9245
✅ Normalized WER: 0.7170





In [9]:
import pandas as pd
df = pd.DataFrame(results)
df.to_csv("eval_result_latest.csv", index=False)


In [None]:
# ------------------------------------------------------------
# 6. EVALUATION
# ------------------------------------------------------------
def evaluate_model(model_id, dataset, processor, lang_code, task="transcribe"):
    """
    Evaluate a fine-tuned Whisper model with WER and normalized WER.

    Args:
        model_id (str): Path to the fine-tuned model.
        dataset (Dataset): Hugging Face dataset to evaluate.
        processor: WhisperProcessor instance.
        lang_code (str): ISO language code (e.g., "tr", "en", "zh").
        task (str): "transcribe" or "translate".

    Returns:
        dict: Dictionary with WER and normalized WER.
    """
    peft_config = PeftConfig.from_pretrained(model_id)
    model = WhisperForConditionalGeneration.from_pretrained(
        peft_config.base_model_name_or_path, device_map="auto"
    )
    model = PeftModel.from_pretrained(model, model_id)
    model.eval()

    normalizer = BasicTextNormalizer()
    metric = evaluate.load("wer")
    dataloader = DataLoader(dataset, batch_size=5, collate_fn=data_collator)

    # Try assigning forced_decoder_ids based on language
    try:
        forced_decoder_ids = processor.get_decoder_prompt_ids(language=lang_code, task=task)
    except Exception as e:
        print(f"[WARNING] {lang_code} için forced_decoder_ids kullanılamadı: {e}")
        forced_decoder_ids = None

    preds, refs, norm_preds, norm_refs = [], [], [], []

    for batch in tqdm(dataloader, desc=f"Evaluating {lang_code}"):
        with torch.no_grad(), torch.cuda.amp.autocast():
            input_features = batch["input_features"].to("cuda")

            generated = model.generate(
                input_features=input_features,
                forced_decoder_ids=forced_decoder_ids,
                max_new_tokens=255
            )

            labels = batch["labels"].to("cuda")

            decoded_preds = processor.tokenizer.batch_decode(
                generated.cpu().numpy(), skip_special_tokens=True)
            decoded_labels = processor.tokenizer.batch_decode(
                np.where(labels.cpu().numpy() != -100, labels.cpu().numpy(), processor.tokenizer.pad_token_id),
                skip_special_tokens=True
            )

            preds.extend(decoded_preds)
            refs.extend(decoded_labels)
            norm_preds.extend([normalizer(p).strip() for p in decoded_preds])
            norm_refs.extend([normalizer(r).strip() for r in decoded_labels])

        gc.collect()

    wer = 100 * metric.compute(predictions=preds, references=refs)
    norm_wer = 100 * metric.compute(predictions=norm_preds, references=norm_refs)

    print(f"[RESULT] {lang_code} WER: {wer:.2f}, Normalized WER: {norm_wer:.2f}")
    return {"WER": wer, "Normalized_WER": norm_wer}



In [None]:
"""domains = ["law", "health"]
languages = ["ar", "cs", "de", "el", "en", "es", "fa", "fr", "he", "hi", "id", "it",
             "ja", "ko", "nl", "pl", "pt", "ro", "ru", "tr", "uk", "vi", "zh"]

results = []

for domain in domains:
    for lang in languages:
        # Load dataset
        dataset_path = f"/home/enulu/Workspace/Domain_based/Dataset/{domain}/{lang}"
        try:
            dataset = load_from_disk(dataset_path)
        except Exception as e:
            print(f"{lang}-{domain} verisi yüklenemedi: {e}")
            continue

        # Split dataset
        total = len(dataset)
        train_data = dataset.select(range(0, int(total * 0.8)))
        eval_data = dataset.select(range(int(total * 0.8), int(total * 0.9)))
        test_data = dataset.select(range(int(total * 0.9), total))

        # Fine-tune model (veya önceden eğitilmiş modeli yükle)
        model_path = f"/home/enulu/Workspace/Domain_based/fine-tuned_modeller/fine-tuned-{domain}"
        if not os.path.exists(model_path):
            fine_tune_model(train_data, eval_data, model_path, lang)

        # Evaluate
        metrics = evaluate_model(model_path, test_data, processor, lang)
        print(f"{lang}-{domain} WER:", metrics)
        results.append({"lang": lang, "domain": domain, **metrics})
"""

In [None]:
domains = ["law", "health" ]

languages = [ "ar", "cs", "de", "el", "en","es", "fa", "fr", "he", "hi", "id", "it",
             "ja" "nl", "pl", "pt", "ro", "ru", "tr", "uk", "vi", "zh"]

results = []
log_file_path = "domain_language_lora_results.txt"

# Eğer varsa eski log dosyasını temizle
if os.path.exists(log_file_path):
    os.remove(log_file_path)

for domain in domains:
    for lang in languages:

        print(f"\n=== {domain.upper()} - {lang.upper()} başlatılıyor ===")

        dataset_path = f"/home/enulu/Workspace/Domain_based/Dataset/{domain}/{lang}"
        try:
            dataset = load_from_disk(dataset_path)
        except Exception as e:
            print(f"{lang}-{domain} verisi yüklenemedi: {e}")
            continue

        total = len(dataset)
        train_data = dataset.select(range(0, int(total * 0.8)))
        val_data   = dataset.select(range(int(total * 0.8), int(total * 0.9)))
        test_data  = dataset.select(range(int(total * 0.9), total))

        output_dir = f"/home/enulu/Workspace/Domain_based/fine-tuned_modeller/{domain}/{lang}"

# Modelin eğitilip eğitilmediğini kontrol et (örneğin: adapter_model.bin varsa eğitilmiştir)
        model_file = os.path.join(output_dir, "vocab.json")  # veya "pytorch_model.bin"

        if not os.path.exists(model_file):
            print(f"Model bulunamadı, eğitim başlatılıyor: {output_dir}")
            with open(log_file_path, "a") as f:
                f.write(f"{lang}-{domain}: Eğitim başlatılıyor\n")
            fine_tune_model(train_data, val_data, output_dir, lang)
        else:
            print(f"Model zaten mevcut, eğitim atlanıyor: {output_dir}")
            with open(log_file_path, "a") as f:
                f.write(f"{lang}-{domain}: Eğitim atlandı, model mevcut\n")


        # Değerlendirme
        metrics = evaluate_model(output_dir, test_data, processor, lang_code=lang)
        results.append({
            "domain": domain,
            "lang": lang,
            **metrics
        })
                # Anlık log yaz
        with open(log_file_path, "a") as f:
            metric_line = f"{domain}-{lang}: " + ", ".join([f"{k}={v:.4f}" for k, v in metrics.items()])
            f.write(metric_line + "\n")

# Sonuçları CSV'ye kaydet
import pandas as pd
df = pd.DataFrame(results)
df.to_csv("domain_language_lora_results.csv", index=False)


In [None]:
domains = ["law", "health" ]

languages = [ "ar", "cs", "de", "el", "en","es", "fa", "fr", "he", "hi", "id", "it",
             "ja" "nl", "pl", "pt", "ro", "ru", "tr", "uk", "vi", "zh"]

results = []
log_file_path = "domain_language_lora_full-model.txt"

# Eğer varsa eski log dosyasını temizle
if os.path.exists(log_file_path):
    os.remove(log_file_path)

for domain in domains:
    for lang in languages:

        print(f"\n=== {domain.upper()} - {lang.upper()} başlatılıyor ===")

        dataset_path = f"/home/enulu/Workspace/Domain_based/Dataset/{domain}/{lang}"
        try:
            dataset = load_from_disk(dataset_path)
        except Exception as e:
            print(f"{lang}-{domain} verisi yüklenemedi: {e}")
            continue

        total = len(dataset)
        train_data = dataset.select(range(0, int(total * 0.8)))
        val_data   = dataset.select(range(int(total * 0.8), int(total * 0.9)))
        test_data  = dataset.select(range(int(total * 0.9), total))

        output_dir = f"/home/enulu/Workspace/Domain_based/fine-tuned_modeller/fine-tuned-{domain}"

# Modelin eğitilip eğitilmediğini kontrol et (örneğin: adapter_model.bin varsa eğitilmiştir)
        model_file = os.path.join(output_dir, "adapter_config.json")  # veya "pytorch_model.bin"

        if not os.path.exists(model_file):
            print(f"Model bulunamadı, eğitim başlatılıyor: {output_dir}")
            with open(log_file_path, "a") as f:
                f.write(f"{lang}-{domain}: Eğitim başlatılıyor\n")
            fine_tune_model(train_data, val_data, output_dir, lang)
        else:
            print(f"Model zaten mevcut, eğitim atlanıyor: {output_dir}")
            with open(log_file_path, "a") as f:
                f.write(f"{lang}-{domain}: Eğitim atlandı, model mevcut\n")


        # Değerlendirme
        metrics = evaluate_model(output_dir, test_data, processor, lang_code=lang)
        results.append({
            "domain": domain,
            "lang": lang,
            **metrics
        })
                # Anlık log yaz
        with open(log_file_path, "a") as f:
            metric_line = f"{domain}-{lang}: " + ", ".join([f"{k}={v:.4f}" for k, v in metrics.items()])
            f.write(metric_line + "\n")

# Sonuçları CSV'ye kaydet
import pandas as pd
df = pd.DataFrame(results)
df.to_csv("domain_language_lora_results.csv", index=False)
