In [2]:
import torch
from torcheval.metrics.functional import word_error_rate

In [5]:
input = ["hello world", "welcome to the facebook"]
target = ["hello metaverse", "welcome to meta"]
word_error_rate(input, target)
# 1 substitution + 1 insertion + 1 substitution / 5 words = 3/5

tensor(0.6000, dtype=torch.float64)

In [6]:
word_error_rate("hello world", "hello metaverse")

tensor(0.5000, dtype=torch.float64)

In [7]:
word_error_rate("welcome to the facebook", "welcome to meta")

tensor(0.6667, dtype=torch.float64)

In [10]:
word_error_rate("hello world", "hello metaverse") + word_error_rate(
    "welcome to the facebook", "welcome to meta"
)

tensor(1.1667, dtype=torch.float64)

In [12]:
 (word_error_rate("hello world", "hello metaverse")
    + word_error_rate("welcome to the facebook", "welcome to meta")) / 2

tensor(0.5833, dtype=torch.float64)

In [7]:
from jiwer import wer

In [8]:
wer("welcome to meta", "welcome to the facebook")

0.6666666666666666

In [6]:
list(zip(input, target))

[('hello world', 'hello metaverse'),
 ('welcome to the facebook', 'welcome to meta')]

In [9]:
calculate_wer = lambda tpl: wer(tpl[1], tpl[0])

In [12]:
list(map(calculate_wer, zip(input, target)))

[0.5, 0.6666666666666666]

In [13]:
wer_list = list(map(calculate_wer, zip(input, target)))
average_wer = sum(wer_list) / len(wer_list)

In [14]:
average_wer

0.5833333333333333

In [19]:
sum(map(calculate_wer, zip(input, target)))

1.1666666666666665

In [20]:
sum(wer_list)

1.1666666666666665

In [22]:
len(list(zip(input, target)))

2

In [1]:
from open_whisper import utils

In [2]:
input = ["hello world", "welcome to the facebook"]
target = ["hello metaverse", "welcome to meta"]

In [5]:
pair_list = list(zip(input, target))
pair_list

[('hello world', 'hello metaverse'),
 ('welcome to the facebook', 'welcome to meta')]

In [6]:
utils.average_wer(pair_list)

50.0

In [7]:
pair_list = list(zip(target, input))
pair_list

[('hello metaverse', 'hello world'),
 ('welcome to meta', 'welcome to the facebook')]

In [8]:
utils.average_wer(pair_list)

58.33333333333333

In [10]:
import numpy as np

In [11]:
np.round(utils.average_wer(pair_list), 2)

58.33

In [1]:
from open_whisper import normalizers

In [3]:
normalizers.BasicTextNormalizer(remove_diacritics=True, split_letters=True).clean(
    "Steve: And now that kickstarter has raised $51,000 dollars, I REPEAT! $51,000!!! Probably"
)

'Steve  And now that kickstarter has raised  51 000 dollars  I REPEAT   51 000    Probably'

In [4]:
normalizers.BasicTextNormalizer(remove_diacritics=True, split_letters=True).clean(
    "Steve: now look, I�m willing to do anything for a good joke, but this is insane!! I mean"
)

'Steve  now look  I m willing to do anything for a good joke  but this is insane   I mean'

In [5]:
pair_list = [
    (
        "Steve: And now that kickstarter has raised $51,000 dollars, I REPEAT! $51,000!!! Probably",
        "Steve: now look, I�m willing to do anything for a good joke, but this is insane!! I mean",
    ),
    ("more by now!!", "it has over 4,000 backers�"),
]
pair_list

[('Steve: And now that kickstarter has raised $51,000 dollars, I REPEAT! $51,000!!! Probably',
  'Steve: now look, I�m willing to do anything for a good joke, but this is insane!! I mean'),
 ('more by now!!', 'it has over 4,000 backers�')]

In [9]:
from open_whisper.normalizers import BasicTextNormalizer, EnglishTextNormalizer


def clean_text(pair_list, normalizer, remove_diacrtics, split_letters):
    if normalizer == "basic":
        normalizer = BasicTextNormalizer(
            remove_diacritics=remove_diacrtics, split_letters=split_letters
        )
    elif normalizer == "english":
        normalizer = EnglishTextNormalizer(
            remove_diacritics=remove_diacrtics, split_letters=split_letters
        )
    else:
        raise ValueError("Unsupported normalizer")

    normalize = lambda pair: (
        normalizer.clean(pair[0]).lower(),
        normalizer.clean(pair[1]).lower(),
    )
    return list(map(normalize, pair_list))

In [10]:
clean_text(pair_list, "basic", True, True)

[('steve  and now that kickstarter has raised  51 000 dollars  i repeat   51 000    probably',
  'steve  now look  i m willing to do anything for a good joke  but this is insane   i mean'),
 ('more by now  ', 'it has over 4 000 backers ')]