I used the jiwer package  
jiwer package https://pypi.org/project/jiwer/


In [2]:
from jiwer import wer
import string
import os
import re
import pandas as pd

# Define punctuation to be removed
punctuation = '.,?![]>♪"'

# List of directories to loop through
directories = ['./yt_asr/', './gold/', './yt_manual/']

video_id = []
asr_error = []
manual_error = []

for filename in os.listdir(directories[0]):  # Assuming all directories have the same files
    video_id.append(filename)
    
    for directory in directories:
        file_path = os.path.join(directory, filename)
        
        # Read the file
        with open(file_path, 'r') as file:
            text = file.read()

        # Remove specified punctuation
        if directory == './yt_manual/':
            # Remove hyphens that do not have word or numbers on either side
            text = re.sub(r'(?<!\w)-(?!\w)', '', text)
            # the manual files are taken directly from YouTube, so they are on multiple lines
            text = ''.join(ch for ch in text if ch not in punctuation).lower()
            # Remove colons that do not have numbers on both sides
            text = re.sub(r'(?<!\d):(?!\d)', '', text)
            # Remove words that appear between brackets
            text = re.sub(r'\(.*?\)', '', text)
            # replace newlines with a space
            text = text.replace('\n', ' ')
            # replace multiple spaces with a single space
            text = re.sub(' +', ' ', text)
        else:
            # The only difference is that I am not removing new lines (\n) because there aren't any in these texts
            text = re.sub(r'(?<!\w)-(?!\w)', '', text)
            text = ''.join(ch for ch in text if ch not in punctuation).lower()
            text = re.sub(r'(?<!\d):(?!\d)', '', text)
            text = re.sub(r'\(.*?\)', '', text)
            text = re.sub(' +', ' ', text)

        # Store the text based on its directory
        if directory == './gold/':
            gold = text
        elif directory == './yt_asr/':
            asr = text
        elif directory == './yt_manual/':
            manual = text

    # Calculate the WER for the ASR and manual transcripts
    asr_error.append(wer(gold, asr))
    manual_error.append(wer(gold, manual))

df = pd.DataFrame({'video_id': video_id, 'asr_error': asr_error, 'manual_error': manual_error})
df.round(3)

Unnamed: 0,video_id,asr_error,manual_error
0,-24pGXi756k.txt,0.002,0.43
1,8XMaizBLfaQ.txt,0.02,0.168
2,98dRHoH1t1M.txt,0.013,0.029
3,cR1EgtHf87c.txt,0.337,0.69
4,OQSBtvsm8Ro.txt,0.06,0.723
5,wanXeH7DQ8E.txt,0.029,0.458
6,WF5hx4QMznA.txt,0.04,0.219
7,WToYJpzbdnA.txt,0.333,0.068
8,y9d1C68V1EM.txt,0.023,0.299
9,ztbl6pE6V8M.txt,0.0,0.0


In [3]:
df.round(3).to_csv('wer_stats_asr_manual.csv', index=False, encoding='utf-8')

Regular boxplots (showing outliers)

In [4]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Make a blank figure
fig = make_subplots(rows=1, cols=1)
# change the background colour to grey
fig.update_layout(
    plot_bgcolor='#ececec')

# make a boxplot for the ASR word error rate
fig.add_trace(go.Box(y=df['asr_error'], name='YouTube ASR',
                marker_color = 'black'))

# make a boxplot for the manual transcript word error rate
fig.add_trace(go.Box(y=df['manual_error'], name = 'YouTube Manual',
                marker_color = 'black'))

# Add y-axis label
fig.update_yaxes(title_text='Word Error Rate')

fig.update_layout(showlegend=False)

fig.show()

Boxplots showing all boxpoints

In [5]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.io as pio

# Make a blank figure
fig = make_subplots(rows=1, cols=1)
# change the background colour to grey
fig.update_layout(
    plot_bgcolor='#ececec')

# make a boxplot for the ASR word error rate
fig.add_trace(go.Box(y=df['asr_error'], name='YouTube ASR',
                marker_color = 'black',
                boxpoints = 'all',
                marker_size=3))

# make a boxplot for the manual transcript word error rate
fig.add_trace(go.Box(y=df['manual_error'], name = 'YouTube Manual',
                marker_color = 'black',
                boxpoints = 'all',
                marker_size=3))

# Add y-axis label
fig.update_yaxes(title_text='Word Error Rate')

fig.update_layout(title_text='Comparison of Word Error Rates in ASR and Manual Transcripts',
                  showlegend=False)

# Set the figure-wide font
fig.update_layout(
    font=dict(
        family="Roboto",
        size=18,
    ),
    width=800
)

fig.show()
# fig.write_html('asr_manual_boxplots.html')

# increase scale to improve resolution of image, it also increases the file size
pio.write_image(fig, 'asr_manual_wer_boxplots.png', scale=5)

In [3]:
# pip install -U kaleido

Collecting kaleido
  Using cached kaleido-0.2.1-py2.py3-none-win_amd64.whl (65.9 MB)
Installing collected packages: kaleido
Successfully installed kaleido-0.2.1
Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.0.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip
