In [407]:
import pandas as pd
import numpy as np
import string
import csv
from jiwer import wer 


In [408]:
def calculate(results, output_file):

    # read csv files 
    df = pd.read_csv(results)
    df = df.drop(0)                       # drop the separator row
    expected = np.array(df['Actual'])     # columns of actual sentences
    produced = np.array(df['Predicted'])  # columns of whisper-produced sentences

    # convert to all lower cases
    expected = np.array([string.lower() for string in expected])
    produced = np.array([string.lower() for string in produced])

    # remove all puncutations
    translator = str.maketrans('', '', string.punctuation)
    expected = np.array([string.translate(translator) for string in expected])
    produced = np.array([string.translate(translator) for string in produced])

    # compute wer
    wer_s = np.array([wer(e, p) for e, p in zip(expected, produced)])

    # add WER to the csv file 
    df['WER'] = wer_s
    print(df)

    # output to output_file 
    df.to_csv(output_file, index=False)



In [409]:
calculate("test.csv", "test_output.csv")

                          File Actual    Predicted  WER
1    APDYM4participant_gum.wav    gum   Thank you.  2.0
2    APEDI2participant_gum.wav    gum        Come.  1.0
3    APCHO6participant_gum.wav    gum        Calm.  1.0
4    APDMN7participant_gum.wav    gum        Come.  1.0
5    APFAN8participant_gum.wav    gum        Come.  1.0
..                         ...    ...          ...  ...
77   APDYO5participant_gum.wav    gum   Thank you.  2.0
78  APssss1participant_gum.wav   _gum         gum.  0.0
79   APEDJ3participant_gum.wav    gum        Come.  1.0
80   APDIO3participant_gum.wav    gum         gum.  0.0
81   APIDM8participant_gum.wav    gum  Don't know.  2.0

[81 rows x 4 columns]
