In [1]:
import io
import json
import matplotlib as mpl
import matplotlib.pyplot as plt
import mmap
import numpy
import soundfile
import torchaudio
import torch
import os
import re
import pandas as pd
from datasets import load_dataset
import ffmpeg

from collections import defaultdict
from IPython.display import Audio, display
from pathlib import Path
from pydub import AudioSegment

from seamless_communication.inference import Translator
from seamless_communication.streaming.dataloaders.s2tt import SileroVADSilenceRemover
import warnings
warnings.filterwarnings("ignore")

In [2]:
from datasets import load_dataset
from seamless_communication.inference import Translator
from jiwer import wer
from sacrebleu import corpus_bleu

In [3]:
# Initialize a Translator object with a multitask model, vocoder on the GPU.

model_name = "seamlessM4T_v2_large"
vocoder_name = "vocoder_v2" if model_name == "seamlessM4T_v2_large" else "vocoder_36langs"

translator = Translator(
    model_name,
    vocoder_name,
    device=torch.device("cuda"), # Changed from "cuda:0" to "cpu"
    dtype=torch.float16,
)

Using the cached checkpoint of seamlessM4T_v2_large. Set `force` to `True` to download again.
Using the cached tokenizer of seamlessM4T_v2_large. Set `force` to `True` to download again.
Using the cached tokenizer of seamlessM4T_v2_large. Set `force` to `True` to download again.
Using the cached tokenizer of seamlessM4T_v2_large. Set `force` to `True` to download again.
Using the cached checkpoint of vocoder_v2. Set `force` to `True` to download again.


In [4]:
from datasets import load_dataset
import torch
import torchaudio
def resample_to_16k(audio, orig_sr):
    # implement resampling logic here, e.g. torchaudio.transforms.Resample
    return torchaudio.transforms.Resample(orig_sr, 16000)(torch.tensor(audio)).numpy()

In [5]:
import whisper

# Load Whisper-Large once (outside function, so it’s not reloaded every call)
whisper_model = whisper.load_model("large-v3", device="cuda")

whisper.audio.FFMPEG_PATH = "/home/aj/Bhavna/ffmpeg_bin/ffmpeg"

In [6]:
#%pip install seamless_communication

from seamless_communication.inference import SequenceGeneratorOptions
text_opts = SequenceGeneratorOptions(
    beam_size=5
    #temperature=0.7,   # 0 for deterministic, >0 adds diversity
)

# Beam search for unit hypotheses
unit_opts = SequenceGeneratorOptions(
    beam_size=5
)

In [7]:
import string

def normalize(text):
    return text.lower().translate(str.maketrans("", "", string.punctuation)).strip()

In [8]:
def save_dataframe(df, lang, out_dir="/scratch/aj/Bhavna/bhav_venv_311/f-X-Eng-Direct/f-X-Eng-Direct-CSVs"):
    os.makedirs(out_dir, exist_ok=True)
    out_path = os.path.join(out_dir, f"{lang}_results.csv")
    df.to_csv(out_path, index=False)
    print(f"Saved results to {out_path}")

In [9]:
from indicnlp.tokenize import indic_tokenize

In [11]:
def apply_tokenizer(texts, lang_code):
    """Apply IndicNLP/Urdu tokenization for En→Indic evaluation"""
    if not texts:
        return texts
    elif lang_code in ["hi", "bn", "te", "ta", "ml", "kn", "gu", "mr", "pa", "or"]:
        return [" ".join(indic_tokenize.trivial_tokenize(t, lang=lang_code)) for t in texts]
    else:  # fallback (English etc.)
        return texts

In [20]:
def run_translation_for_language(sm4t_src_lang,fleurs_src_lang,sm4t_tgt_lang, fleurs_tgt_lang, full_tasks=True):
    """
    Run translation pipeline for one language.
    full_tasks=True → Run all 4 tasks
    full_tasks=False → Run only S2TT, T2TT
    """
    print("\n" + "="*60)
    print(f"🔹 Processing Target Language: {sm4t_tgt_lang.upper()} ({fleurs_tgt_lang})")
    print("="*60)

    # Load datasets
    src_dataset = load_dataset("google/fleurs", fleurs_src_lang, split="test")
    tgt_dataset = load_dataset("google/fleurs", fleurs_tgt_lang, split="test")

    src_by_id = {item["id"]: item for item in src_dataset}
    tgt_by_id = {item["id"]: item for item in tgt_dataset}
    common_ids = sorted(set(src_by_id.keys()) & set(tgt_by_id.keys()))

    print(f"Found {len(common_ids)} parallel sentences")

    references, hypotheses_s2tt, hypotheses_t2tt = [], [], []
    predicted_s2s, predicted_t2s = [], []
    source_texts = []

    for sentence_id in common_ids:
        src = src_by_id[sentence_id]
        tgt = tgt_by_id[sentence_id]

        src_audio = src["audio"]["array"]
        src_sr = src["audio"]["sampling_rate"]
        src_text = src["transcription"]
        tgt_text = tgt["transcription"]

        references.append([tgt_text])
        source_texts.append(src_text)

        if src_sr != 16000:
            src_audio = resample_to_16k(src_audio, src_sr)

        base_dir = "/scratch/aj/Bhavna/bhav_venv_311/f-X-Eng-Direct/input_audios_of_eng"
        lang_dir = os.path.join(base_dir, sm4t_src_lang)
        os.makedirs(lang_dir, exist_ok=True)
        audio_path = os.path.join(lang_dir, f"input_{sm4t_tgt_lang}_{sentence_id}.wav")

        torchaudio.save(audio_path, torch.tensor(src_audio).unsqueeze(0), 16000)

        # --- S2TT ---
        s2tt_out, _ = translator.predict(
            input=audio_path, task_str="s2tt", tgt_lang=sm4t_tgt_lang,text_generation_opts=text_opts
        )
        hypotheses_s2tt.append(normalize(str(s2tt_out[0])))

        # --- T2TT ---
        t2tt_out, _ = translator.predict(
            input=src_text, task_str="t2tt", src_lang=sm4t_src_lang, tgt_lang=sm4t_tgt_lang,text_generation_opts=text_opts
        )
        hypotheses_t2tt.append(normalize(str(t2tt_out[0])))

        if full_tasks:
            # --- S2ST + Whisper ASR ---
            _, s2s_audio_out = translator.predict(
                input=audio_path, task_str="s2st", tgt_lang=sm4t_tgt_lang,text_generation_opts=text_opts,unit_generation_opts=unit_opts
            )
            base_dir = "/scratch/aj/Bhavna/bhav_venv_311/f-X-Eng-Direct/s2s_outputs"
            lang_dir = os.path.join(base_dir, sm4t_src_lang)
            os.makedirs(lang_dir, exist_ok=True)
            s2s_path= os.path.join(lang_dir, f"s2s_{sm4t_tgt_lang}_{sentence_id}.wav")

            torchaudio.save(
                s2s_path,
                s2s_audio_out.audio_wavs[0][0].to(torch.float32).cpu(),
                s2s_audio_out.sample_rate,
            )

            # Use Whisper-Large for ASR
            s2s_result = whisper_model.transcribe(
                audio=s2s_path,
                task="transcribe",
                temperature=0.0,   # greedy, deterministic
                beam_size=None
            )
            predicted_s2s.append(normalize(s2s_result["text"]))

            # --- T2ST + Whisper ASR ---
            _, t2s_audio_out = translator.predict(
                input=src_text, task_str="t2st", src_lang=sm4t_src_lang, tgt_lang=sm4t_tgt_lang,text_generation_opts=text_opts,unit_generation_opts=unit_opts
            )
            base_dir = "/scratch/aj/Bhavna/bhav_venv_311/f-X-Eng-Direct/t2s_outputs"
            lang_dir = os.path.join(base_dir, sm4t_src_lang)
            os.makedirs(lang_dir, exist_ok=True)
            t2s_path = os.path.join(lang_dir, f"t2s_{sm4t_tgt_lang}_{sentence_id}.wav")

            torchaudio.save(
                t2s_path,
                t2s_audio_out.audio_wavs[0][0].to(torch.float32).cpu(),
                t2s_audio_out.sample_rate,
            )

            # Use Whisper-Large for ASR
            t2s_result = whisper_model.transcribe(
                audio=t2s_path,
                task="transcribe",
                temperature=0.0,
                beam_size=None
            )
            predicted_t2s.append(normalize(t2s_result["text"]))

    # Build dataframe
    data = {
        "source_text": source_texts,
        "reference_text": [r[0] for r in references],
        "S2TT_prediction": hypotheses_s2tt,
        "T2TT_prediction": hypotheses_t2tt
    }
    if full_tasks:
        data["S2ST_ASR"] = predicted_s2s
        data["T2ST_ASR"] = predicted_t2s

    df = pd.DataFrame(data)

    save_dataframe(df, sm4t_tgt_lang)

    return references, hypotheses_s2tt, hypotheses_t2tt, predicted_s2s, predicted_t2s

In [21]:
from sacrebleu import corpus_chrf

def compute_metrics(src_lang, tgt_lang, references, hypotheses_s2tt, hypotheses_t2tt, predicted_s2s, predicted_t2s):
    print(f"\nComputing metrics for {src_lang.upper()} → {tgt_lang.upper()}")

    # Normalize hyps
    hypotheses_s2tt = [str(h) for h in hypotheses_s2tt]
    hypotheses_t2tt = [str(h) for h in hypotheses_t2tt]
    predicted_s2s   = [str(h) for h in predicted_s2s]
    predicted_t2s   = [str(h) for h in predicted_t2s]

    # Normalize refs
    references_norm = [[str(r) for r in refset] for refset in references]
    multi_references = list(zip(*references_norm))

    # ---- Tokenization switch ----
    if src_lang == "en":  
        # Eng → Indic → tokenize target
        hypotheses_s2tt = apply_tokenizer(hypotheses_s2tt, tgt_lang)
        hypotheses_t2tt = apply_tokenizer(hypotheses_t2tt, tgt_lang)
        predicted_s2s   = apply_tokenizer(predicted_s2s, tgt_lang)
        predicted_t2s   = apply_tokenizer(predicted_t2s, tgt_lang)
        tokenized_refs = [apply_tokenizer(refs, tgt_lang) for refs in multi_references]
    else:
        # Indic → En → use sacreBLEU default tokenizer
        tokenized_refs = multi_references

    tokenized_refs = list(zip(*tokenized_refs))

    metrics = {}

    # --------------------
    # S2TT
    # --------------------
    metrics["S2TT_BLEU"]      = corpus_bleu(hypotheses_s2tt, tokenized_refs).score
    metrics["S2TT_chrF++"]    = corpus_chrf(hypotheses_s2tt, tokenized_refs).score
    metrics["S2TT_chrF2++"]   = corpus_chrf(hypotheses_s2tt, tokenized_refs, beta=2).score
    metrics["S2TT_WER"]       = sum(wer(ref[0], hyp) for ref, hyp in zip(references_norm, hypotheses_s2tt)) / len(references_norm)

    # --------------------
    # T2TT
    # --------------------
    metrics["T2TT_BLEU"]      = corpus_bleu(hypotheses_t2tt, tokenized_refs).score
    metrics["T2TT_chrF++"]    = corpus_chrf(hypotheses_t2tt, tokenized_refs).score
    metrics["T2TT_chrF2++"]   = corpus_chrf(hypotheses_t2tt, tokenized_refs, beta=2).score
    metrics["T2TT_WER"]       = sum(wer(ref[0], hyp) for ref, hyp in zip(references_norm, hypotheses_t2tt)) / len(references_norm)

    # --------------------
    # S2ST
    # --------------------
    if predicted_s2s:
        metrics["S2ST_BLEU"]    = corpus_bleu(predicted_s2s, tokenized_refs).score
        metrics["S2ST_chrF++"]  = corpus_chrf(predicted_s2s, tokenized_refs).score
        metrics["S2ST_chrF2++"] = corpus_chrf(predicted_s2s, tokenized_refs, beta=2).score
        metrics["S2ST_WER"]     = sum(wer(ref[0], hyp) for ref, hyp in zip(references_norm, predicted_s2s)) / len(references_norm)

    # --------------------
    # T2ST
    # --------------------
    if predicted_t2s:
        metrics["T2ST_BLEU"]    = corpus_bleu(predicted_t2s, tokenized_refs).score
        metrics["T2ST_chrF++"]  = corpus_chrf(predicted_t2s, tokenized_refs).score
        metrics["T2ST_chrF2++"] = corpus_chrf(predicted_t2s, tokenized_refs, beta=2).score
        metrics["T2ST_WER"]     = sum(wer(ref[0], hyp) for ref, hyp in zip(references_norm, predicted_t2s)) / len(references_norm)

    return metrics


In [24]:
refs1, hyps_s2tt1, hyps_t2tt1, preds_s2s1, preds_t2s1 = run_translation_for_language("hin", "hi_in","eng", "en_us", full_tasks=True)



🔹 Processing Target Language: ENG (en_us)
Found 265 parallel sentences
Saved results to /scratch/aj/Bhavna/bhav_venv_311/f-X-Eng-Direct/f-X-Eng-Direct-CSVs/eng_results.csv


In [25]:
refs2, hyps_s2tt2, hyps_t2tt2, preds_s2s2, preds_t2s2 = run_translation_for_language("ben", "bn_in","eng", "en_us", full_tasks=True)


🔹 Processing Target Language: ENG (en_us)
Found 349 parallel sentences
Saved results to /scratch/aj/Bhavna/bhav_venv_311/f-X-Eng-Direct/f-X-Eng-Direct-CSVs/eng_results.csv


In [26]:
refs3, hyps_s2tt3, hyps_t2tt3, preds_s2s3, preds_t2s3 = run_translation_for_language("tel", "te_in","eng", "en_us", full_tasks=True)


🔹 Processing Target Language: ENG (en_us)
Found 302 parallel sentences
Saved results to /scratch/aj/Bhavna/bhav_venv_311/f-X-Eng-Direct/f-X-Eng-Direct-CSVs/eng_results.csv


In [27]:
refs4, hyps_s2tt4, hyps_t2tt4, preds_s2s4, preds_t2s4 = run_translation_for_language("tam", "ta_in","eng", "en_us", full_tasks=True)


🔹 Processing Target Language: ENG (en_us)
Found 336 parallel sentences
Saved results to /scratch/aj/Bhavna/bhav_venv_311/f-X-Eng-Direct/f-X-Eng-Direct-CSVs/eng_results.csv


In [28]:
refs5,hyps_s2tt5, hyps_t2tt5, preds_s2s5, preds_t2s5 = run_translation_for_language("mal", "ml_in","eng", "en_us", full_tasks=True)


🔹 Processing Target Language: ENG (en_us)
Found 344 parallel sentences
Saved results to /scratch/aj/Bhavna/bhav_venv_311/f-X-Eng-Direct/f-X-Eng-Direct-CSVs/eng_results.csv


In [29]:
refs6,hyps_s2tt6, hyps_t2tt6, preds_s2s6, preds_t2s6 = run_translation_for_language("guj", "gu_in","eng", "en_us", full_tasks=True)


🔹 Processing Target Language: ENG (en_us)
Found 349 parallel sentences
Saved results to /scratch/aj/Bhavna/bhav_venv_311/f-X-Eng-Direct/f-X-Eng-Direct-CSVs/eng_results.csv


In [30]:
refs7,hyps_s2tt7, hyps_t2tt7, preds_s2s7, preds_t2s7 = run_translation_for_language("mar", "mr_in","eng", "en_us", full_tasks=True)


🔹 Processing Target Language: ENG (en_us)
Found 349 parallel sentences
Saved results to /scratch/aj/Bhavna/bhav_venv_311/f-X-Eng-Direct/f-X-Eng-Direct-CSVs/eng_results.csv


In [31]:
refs8,hyps_s2tt8, hyps_t2tt8, preds_s2s8, preds_t2s8 = run_translation_for_language("pan", "pa_in","eng", "en_us", full_tasks=True)


🔹 Processing Target Language: ENG (en_us)
Found 279 parallel sentences
Saved results to /scratch/aj/Bhavna/bhav_venv_311/f-X-Eng-Direct/f-X-Eng-Direct-CSVs/eng_results.csv


In [32]:
refs9,hyps_s2tt9, hyps_t2tt9, preds_s2s9, preds_t2s9 = run_translation_for_language("kan", "kn_in","eng", "en_us", full_tasks=True)


🔹 Processing Target Language: ENG (en_us)
Found 344 parallel sentences
Saved results to /scratch/aj/Bhavna/bhav_venv_311/f-X-Eng-Direct/f-X-Eng-Direct-CSVs/eng_results.csv


In [33]:
refs10,hyps_s2tt10, hyps_t2tt10, preds_s2s10, preds_t2s10 = run_translation_for_language("ory", "or_in","eng", "en_us", full_tasks=True)


🔹 Processing Target Language: ENG (en_us)
Found 334 parallel sentences
Saved results to /scratch/aj/Bhavna/bhav_venv_311/f-X-Eng-Direct/f-X-Eng-Direct-CSVs/eng_results.csv


In [34]:
refs11,hyps_s2tt11, hyps_t2tt11, preds_s2s11, preds_t2s11 = run_translation_for_language("urd", "ur_pk","eng", "en_us", full_tasks=True)


🔹 Processing Target Language: ENG (en_us)
Found 230 parallel sentences
Saved results to /scratch/aj/Bhavna/bhav_venv_311/f-X-Eng-Direct/f-X-Eng-Direct-CSVs/eng_results.csv


In [35]:
refs12,hyps_s2tt12, hyps_t2tt12, preds_s2s12, preds_t2s12 = run_translation_for_language("asm","as_in","eng", "en_us", full_tasks=True)


🔹 Processing Target Language: ENG (en_us)
Found 349 parallel sentences
Saved results to /scratch/aj/Bhavna/bhav_venv_311/f-X-Eng-Direct/f-X-Eng-Direct-CSVs/eng_results.csv


In [36]:
refs13,hyps_s2tt13, hyps_t2tt13, preds_s2s13, preds_t2s13 = run_translation_for_language("snd", "sd_in","eng", "en_us", full_tasks=True)


🔹 Processing Target Language: ENG (en_us)
Found 350 parallel sentences
Saved results to /scratch/aj/Bhavna/bhav_venv_311/f-X-Eng-Direct/f-X-Eng-Direct-CSVs/eng_results.csv


In [37]:
refs14,hyps_s2tt14, hyps_t2tt14, preds_s2s14, preds_t2s14 = run_translation_for_language("npi","ne_np","eng", "en_us", full_tasks=True)


🔹 Processing Target Language: ENG (en_us)
Found 343 parallel sentences
Saved results to /scratch/aj/Bhavna/bhav_venv_311/f-X-Eng-Direct/f-X-Eng-Direct-CSVs/eng_results.csv


In [38]:
compute_metrics("hin","en_us", refs1, hyps_s2tt1, hyps_t2tt1, preds_s2s1, preds_t2s1)


Computing metrics for HIN → EN_US


{'S2TT_BLEU': 19.259887228376275,
 'S2TT_chrF++': 47.064516706148865,
 'S2TT_chrF2++': 47.064516706148865,
 'S2TT_WER': 0.5769781283054644,
 'T2TT_BLEU': 100.00000000000004,
 'T2TT_chrF++': 100.0,
 'T2TT_chrF2++': 100.0,
 'T2TT_WER': 0.4829589650612814,
 'S2ST_BLEU': 18.951629567590746,
 'S2ST_chrF++': 44.4801824800045,
 'S2ST_chrF2++': 44.4801824800045,
 'S2ST_WER': 0.5744392525630683,
 'T2ST_BLEU': 100.00000000000004,
 'T2ST_chrF++': 100.0,
 'T2ST_chrF2++': 100.0,
 'T2ST_WER': 0.5062129707006541}

In [39]:
compute_metrics("ben","en_us", refs2, hyps_s2tt2, hyps_t2tt2, preds_s2s2, preds_t2s2)


Computing metrics for BEN → EN_US


{'S2TT_BLEU': 11.986062961075742,
 'S2TT_chrF++': 51.61641017074737,
 'S2TT_chrF2++': 51.61641017074737,
 'S2TT_WER': 0.6057547991640091,
 'T2TT_BLEU': 12.470873588504128,
 'T2TT_chrF++': 44.03763089981711,
 'T2TT_chrF2++': 44.03763089981711,
 'T2TT_WER': 0.5433760961388682,
 'S2ST_BLEU': 12.169109229511132,
 'S2ST_chrF++': 53.931480319342725,
 'S2ST_chrF2++': 53.931480319342725,
 'S2ST_WER': 0.6105122039137028,
 'T2ST_BLEU': 13.592883763682499,
 'T2ST_chrF++': 42.00545781690605,
 'T2ST_chrF2++': 42.00545781690605,
 'T2ST_WER': 0.5551223983302469}

In [40]:
compute_metrics("tel","en_us", refs3, hyps_s2tt3, hyps_t2tt3, preds_s2s3, preds_t2s3)


Computing metrics for TEL → EN_US


{'S2TT_BLEU': 9.578464408619825,
 'S2TT_chrF++': 44.29120051573812,
 'S2TT_chrF2++': 44.29120051573812,
 'S2TT_WER': 0.6132852423109971,
 'T2TT_BLEU': 60.427507947135354,
 'T2TT_chrF++': 84.21637053128809,
 'T2TT_chrF2++': 84.21637053128809,
 'T2TT_WER': 0.5262495941621558,
 'S2ST_BLEU': 8.73716785171588,
 'S2ST_chrF++': 39.62467006078068,
 'S2ST_chrF2++': 39.62467006078068,
 'S2ST_WER': 0.6039635892802974,
 'T2ST_BLEU': 60.427507947135354,
 'T2ST_chrF++': 84.21637053128809,
 'T2ST_chrF2++': 84.21637053128809,
 'T2ST_WER': 0.5351444780751428}

In [41]:
compute_metrics("tam","en_us", refs4, hyps_s2tt4, hyps_t2tt4, preds_s2s4, preds_t2s4)


Computing metrics for TAM → EN_US


{'S2TT_BLEU': 51.60202040000685,
 'S2TT_chrF++': 62.48674189397521,
 'S2TT_chrF2++': 62.48674189397521,
 'S2TT_WER': 0.6711805367864453,
 'T2TT_BLEU': 11.678449443205002,
 'T2TT_chrF++': 49.92483236542206,
 'T2TT_chrF2++': 49.92483236542206,
 'T2TT_WER': 0.5663497144591616,
 'S2ST_BLEU': 51.60202040000685,
 'S2ST_chrF++': 62.48674189397521,
 'S2ST_chrF2++': 62.48674189397521,
 'S2ST_WER': 0.658504086797589,
 'T2ST_BLEU': 11.678449443205002,
 'T2ST_chrF++': 49.92483236542206,
 'T2ST_chrF2++': 49.92483236542206,
 'T2ST_WER': 0.5782981519860814}

In [42]:
compute_metrics("mal","en_us", refs5, hyps_s2tt5, hyps_t2tt5, preds_s2s5, preds_t2s5)


Computing metrics for MAL → EN_US


{'S2TT_BLEU': 32.4069446727242,
 'S2TT_chrF++': 51.58750220445118,
 'S2TT_chrF2++': 51.58750220445118,
 'S2TT_WER': 0.6229671568681744,
 'T2TT_BLEU': 63.71804857892112,
 'T2TT_chrF++': 70.35589629053496,
 'T2TT_chrF2++': 70.35589629053496,
 'T2TT_WER': 0.539874157229546,
 'S2ST_BLEU': 32.4069446727242,
 'S2ST_chrF++': 51.58750220445118,
 'S2ST_chrF2++': 51.58750220445118,
 'S2ST_WER': 0.6148951631734149,
 'T2ST_BLEU': 63.71804857892112,
 'T2ST_chrF++': 70.35589629053496,
 'T2ST_chrF2++': 70.35589629053496,
 'T2ST_WER': 0.54284509294128}

In [43]:
compute_metrics("guj","en_us", refs6, hyps_s2tt6, hyps_t2tt6, preds_s2s6, preds_t2s6)


Computing metrics for GUJ → EN_US


{'S2TT_BLEU': 19.835441454182888,
 'S2TT_chrF++': 49.43201474537644,
 'S2TT_chrF2++': 49.43201474537644,
 'S2TT_WER': 0.5428466688130611,
 'T2TT_BLEU': 34.329452398451956,
 'T2TT_chrF++': 60.08415368642262,
 'T2TT_chrF2++': 60.08415368642262,
 'T2TT_WER': 0.47140989436656927,
 'S2ST_BLEU': 19.835441454182888,
 'S2ST_chrF++': 49.08333931577539,
 'S2ST_chrF2++': 49.08333931577539,
 'S2ST_WER': 0.5527190922758902,
 'T2ST_BLEU': 34.329452398451956,
 'T2ST_chrF++': 60.08415368642262,
 'T2ST_chrF2++': 60.08415368642262,
 'T2ST_WER': 0.4810689032393332}

In [45]:
compute_metrics("mar","en_us", refs7, hyps_s2tt7, hyps_t2tt7, preds_s2s7, preds_t2s7)


Computing metrics for MAR → EN_US


{'S2TT_BLEU': 45.274718709528955,
 'S2TT_chrF++': 60.303239136880016,
 'S2TT_chrF2++': 60.303239136880016,
 'S2TT_WER': 0.6141061966883621,
 'T2TT_BLEU': 21.651956746181064,
 'T2TT_chrF++': 53.01880673972587,
 'T2TT_chrF2++': 53.01880673972587,
 'T2TT_WER': 0.5098966102925325,
 'S2ST_BLEU': 45.274718709528955,
 'S2ST_chrF++': 60.303239136880016,
 'S2ST_chrF2++': 60.303239136880016,
 'S2ST_WER': 0.6104087010091004,
 'T2ST_BLEU': 21.186050864016675,
 'T2ST_chrF++': 52.95548816688813,
 'T2ST_chrF2++': 52.95548816688813,
 'T2ST_WER': 0.5190659386774854}

In [44]:
compute_metrics("pan","en_us", refs8, hyps_s2tt8, hyps_t2tt8, preds_s2s8, preds_t2s8)


Computing metrics for PAN → EN_US


{'S2TT_BLEU': 14.980800232509305,
 'S2TT_chrF++': 58.62758024251515,
 'S2TT_chrF2++': 58.62758024251515,
 'S2TT_WER': 0.5965989711491985,
 'T2TT_BLEU': 29.48993986902436,
 'T2TT_chrF++': 69.2584161771794,
 'T2TT_chrF2++': 69.2584161771794,
 'T2TT_WER': 0.4757341206345312,
 'S2ST_BLEU': 9.625807217196785,
 'S2ST_chrF++': 54.53016839979288,
 'S2ST_chrF2++': 54.53016839979288,
 'S2ST_WER': 0.5960282977583556,
 'T2ST_BLEU': 48.34389064001791,
 'T2ST_chrF++': 72.99348554655523,
 'T2ST_chrF2++': 72.99348554655523,
 'T2ST_WER': 0.48319325921869083}

In [46]:
compute_metrics("kan","en_us", refs9, hyps_s2tt9, hyps_t2tt9, preds_s2s9, preds_t2s9)


Computing metrics for KAN → EN_US


{'S2TT_BLEU': 47.587330964125215,
 'S2TT_chrF++': 67.22912468216506,
 'S2TT_chrF2++': 67.22912468216506,
 'S2TT_WER': 0.6320570666442101,
 'T2TT_BLEU': 76.24658586234858,
 'T2TT_chrF++': 82.57206226180148,
 'T2TT_chrF2++': 82.57206226180148,
 'T2TT_WER': 0.575623358094392,
 'S2ST_BLEU': 47.587330964125215,
 'S2ST_chrF++': 67.22912468216506,
 'S2ST_chrF2++': 67.22912468216506,
 'S2ST_WER': 0.627693012033382,
 'T2ST_BLEU': 76.24658586234858,
 'T2ST_chrF++': 82.57206226180148,
 'T2ST_chrF2++': 82.57206226180148,
 'T2ST_WER': 0.5870343522113848}

In [47]:
compute_metrics("ory","en_us", refs10, hyps_s2tt10, hyps_t2tt10, preds_s2s10, preds_t2s10)


Computing metrics for ORY → EN_US


{'S2TT_BLEU': 48.740622698799406,
 'S2TT_chrF++': 72.95153347169273,
 'S2TT_chrF2++': 72.95153347169273,
 'S2TT_WER': 0.6275503817546304,
 'T2TT_BLEU': 82.82477531331043,
 'T2TT_chrF++': 91.60457336453659,
 'T2TT_chrF2++': 91.60457336453659,
 'T2TT_WER': 0.5113733669275848,
 'S2ST_BLEU': 48.740622698799406,
 'S2ST_chrF++': 72.95153347169273,
 'S2ST_chrF2++': 72.95153347169273,
 'S2ST_WER': 0.6268162642308952,
 'T2ST_BLEU': 82.82477531331043,
 'T2ST_chrF++': 91.60457336453659,
 'T2ST_chrF2++': 91.60457336453659,
 'T2ST_WER': 0.5130236769776545}

In [48]:
compute_metrics("urd","en_us", refs11, hyps_s2tt11, hyps_t2tt11, preds_s2s11, preds_t2s11)


Computing metrics for URD → EN_US


{'S2TT_BLEU': 35.55304670431184,
 'S2TT_chrF++': 64.03077002758981,
 'S2TT_chrF2++': 64.03077002758981,
 'S2TT_WER': 0.6189562220362608,
 'T2TT_BLEU': 43.78826865860791,
 'T2TT_chrF++': 75.07593076816529,
 'T2TT_chrF2++': 75.07593076816529,
 'T2TT_WER': 0.5121603399715183,
 'S2ST_BLEU': 35.55304670431184,
 'S2ST_chrF++': 64.03077002758981,
 'S2ST_chrF2++': 64.03077002758981,
 'S2ST_WER': 0.6145540594050083,
 'T2ST_BLEU': 43.78826865860791,
 'T2ST_chrF++': 75.07593076816529,
 'T2ST_chrF2++': 75.07593076816529,
 'T2ST_WER': 0.516878890245686}

In [49]:
compute_metrics("asm","en_us", refs12, hyps_s2tt12, hyps_t2tt12, preds_s2s12, preds_t2s12)


Computing metrics for ASM → EN_US


{'S2TT_BLEU': 24.669426816409512,
 'S2TT_chrF++': 57.311094385046566,
 'S2TT_chrF2++': 57.311094385046566,
 'S2TT_WER': 0.6704331654481143,
 'T2TT_BLEU': 27.301208627090666,
 'T2TT_chrF++': 56.09181867259812,
 'T2TT_chrF2++': 56.09181867259812,
 'T2TT_WER': 0.5840625564985533,
 'S2ST_BLEU': 24.669426816409512,
 'S2ST_chrF++': 57.311094385046566,
 'S2ST_chrF2++': 57.311094385046566,
 'S2ST_WER': 0.6639281743212848,
 'T2ST_BLEU': 27.301208627090666,
 'T2ST_chrF++': 56.09181867259812,
 'T2ST_chrF2++': 56.09181867259812,
 'T2ST_WER': 0.5911567851255853}

In [50]:
compute_metrics("snd","en_us", refs13, hyps_s2tt13, hyps_t2tt13, preds_s2s13, preds_t2s13)


Computing metrics for SND → EN_US


{'S2TT_BLEU': 14.266268462755136,
 'S2TT_chrF++': 27.438009935295526,
 'S2TT_chrF2++': 27.438009935295526,
 'S2TT_WER': 0.8930249785976819,
 'T2TT_BLEU': 28.64284647416118,
 'T2TT_chrF++': 59.64667401553315,
 'T2TT_chrF2++': 59.64667401553315,
 'T2TT_WER': 0.5061408291347962,
 'S2ST_BLEU': 14.266268462755136,
 'S2ST_chrF++': 27.438009935295526,
 'S2ST_chrF2++': 27.438009935295526,
 'S2ST_WER': 0.881408672334438,
 'T2ST_BLEU': 28.64284647416118,
 'T2ST_chrF++': 59.64667401553315,
 'T2ST_chrF2++': 59.64667401553315,
 'T2ST_WER': 0.5345305138838374}

In [None]:
compute_metrics("npi","en_us", refs14, hyps_s2tt14, hyps_t2tt14, preds_s2s14, preds_t2s14)


Computing metrics for NPI → EN_US


{'S2TT_BLEU': 31.85277558379779,
 'S2TT_chrF++': 62.713740948011974,
 'S2TT_chrF2++': 62.713740948011974,
 'S2TT_WER': 0.5897884618031969,
 'T2TT_BLEU': 47.75205461960747,
 'T2TT_chrF++': 81.80625838235312,
 'T2TT_chrF2++': 81.80625838235312,
 'T2TT_WER': 0.48998421964692784,
 'S2ST_BLEU': 31.85277558379779,
 'S2ST_chrF++': 62.713740948011974,
 'S2ST_chrF2++': 62.713740948011974,
 'S2ST_WER': 0.5927473642050798,
 'T2ST_BLEU': 66.5912587079685,
 'T2ST_chrF++': 84.97440206659982,
 'T2ST_chrF2++': 84.97440206659982,
 'T2ST_WER': 0.4983339247508748}

: 