# Improving Amazon Transcribe transcriptions using Custom Vocabularies, Amazon Augmented AI (A2I), and Amazon SageMaker.



Visit https://github.com/aws-samples/amazon-a2i-sample-jupyter-notebooks for all A2I Sample Notebooks

## Introduction
blah blah blah

### Install Latest SDKs

In [None]:
# First, let's get the latest installations of our dependencies
!pip install --upgrade pip
!pip install boto3 --upgrade
!pip install -U botocore

### Setup

### Region, Bucket, and Paths
blah blah blah

In [None]:
from __future__ import print_function
import time
import boto3

### Roles and Permissions
blah blah blah

In [None]:
from sagemaker import get_execution_role

# Setting Role to the default SageMaker Execution Role
ROLE = get_execution_role()
display(ROLE)

## Basic Transcription Job
Here, we use Amazon Transcribe with default settings

In [None]:
job_name_1 = "AWS-sage-1"
# audio file path
job_uri_s3 = "s3://jashuang-sagemaker-5-22/transcribe-bucket/Fully-Managed Notebook Instances with Amazon SageMaker - a Deep Dive.mp4"
BUCKET = "jashuang-sagemaker-5-22"

In [None]:
# Here is a transcribe function
def transcribe(job_name, job_uri, out_bucket, format="mp4", vocab_name=None):
    """Transcribe a .wav or .mp4 file to text.
    Args:
        job_name (str): the name of the job that you specify;
                        the output json will be job_name.json
        job_uri (str): input path (in s3) to the file being transcribed
        out_bucket (str): s3 bucket name that you want the output json
                          to be placed in
        format (str): mp4 or wav for input file format;
                      defaults to mp4
        vocab_name (str): name of custom vocabulary used;
                          optional, defaults to None
    """
    
    if format not in ['mp3','mp4','wav','flac']:
        print("Invalid format")
        return

    try:
        transcribe = boto3.client("transcribe")
        print("------" + format)
        transcribe.start_transcription_job(
            TranscriptionJobName=job_name,
            Media={"MediaFileUri": job_uri},
            MediaFormat=format,
            LanguageCode="en-US",
            OutputBucketName=out_bucket,
            Settings={'VocabularyName': vocab_name}
        )
        
        while True:
            status = transcribe.get_transcription_job(TranscriptionJobName=job_name)
            if status['TranscriptionJob']['TranscriptionJobStatus'] in ['COMPLETED', 'FAILED']:
                break
            print("Not ready yet...")
            time.sleep(5)
        print(status)

    except Exception as e:
        print(e)

In [None]:
# transcribe(job_name_1, job_uri_s3, BUCKET)

### Get Transcripts, Confidence Scores, and Timestamps

In [None]:
def get_transcript_text_and_timestamps(bucket_name, file_name):
    """take json file from s3 bucket and returns a tuple of:
       entire transcript, list object of tuples of timestamp and individual sentences
    
    Args:
        bucket_name (str): name of s3 bucket
        file_name (str): name of file
    Returns:
        (
        entire_transcript: str,
        sentences_and_times: [ {start_time (sec) : float,
                                end_time (sec)   : float,
                                sentence         : str,
                                min_confidence   : float (minimum confidence score of that sentence)
                                } ],
        confidences:  [ {start_time (sec) : float,
                         end_time (sec)   : float,
                         content          : str, (single word/phrase)
                         confidence       : float (confidence score of the word/phrase)
                         } ],
        scores: list of confidence scores
        )
    """
    s3_clientobj = s3.get_object(Bucket=bucket_name, Key=file_name)
    s3_clientdata = s3_clientobj["Body"].read().decode("utf-8")

    original = json.loads(s3_clientdata)
    items = original["results"]["items"]
    entire_transcript = original["results"]["transcripts"]

    sentences_and_times = []
    temp_sentence = ""
    temp_start_time = 0
    temp_min_confidence = 1.0
    newSentence = True
    
    confidences = []
    scores = []

    i = 0
    for item in items:
        # always add the word
        if item["type"] == "punctuation":
            temp_sentence = (
                temp_sentence.strip() + item["alternatives"][0]["content"] + " "
            )
        else:
            temp_sentence = temp_sentence + item["alternatives"][0]["content"] + " "
            temp_min_confidence = min(temp_min_confidence,
                                      float(item["alternatives"][0]["confidence"]))
            confidences.append({"start_time": float(item["start_time"]),
                                "end_time": float(item["end_time"]),
                                "content": item["alternatives"][0]["content"],
                                "confidence": float(item["alternatives"][0]["confidence"])
                               })
            scores.append(float(item["alternatives"][0]["confidence"]))

        # if this is a new sentence, and it starts with a word, save the time
        if newSentence == True:
            if item["type"] == "pronunciation":
                temp_start_time = float(item["start_time"])
            newSentence = False
        # else, keep going until you hit a punctuation
        else:
            if (
                item["type"] == "punctuation"
                and item["alternatives"][0]["content"] != ","
            ):
                # end time of sentence is end_time of previous word
                end_time = items[i-1]["end_time"] if i-1 >= 0 else items[0]["end_time"]
                sentences_and_times.append(
                    {"start_time": temp_start_time,
                     "end_time": end_time,
                     "sentence": temp_sentence.strip(),
                     "min_confidence": temp_min_confidence
                    }
                )
                # reset the temp sentence and relevant variables
                newSentence = True
                temp_sentence = ""
                temp_min_confidence = 1.0
                
        i = i + 1

    return entire_transcript, sentences_and_times, confidences, scores

In [None]:
entire_transcript_1, sentences_and_times_1, confidences_1, scores_1 = get_transcript_text_and_timestamps("jashuang-sagemaker-5-22","AWS-sage-1.json")

In [None]:
print(sentences_and_times_1[0])

### Save the first transcript to a txt file

In [None]:
file0 = open("originaltranscript.txt","w") 
for tup in sentences_and_times_1:
    file0.write(tup['sentence'] + "\n") 
file0.close()

### Histogram of confidence scores

In [None]:
from matplotlib import pyplot as plt

plt.xlim([min(scores)-0.1, max(scores)+0.1])
plt.hist(scores, bins=20, alpha=0.5)
plt.title('Plot of confidence scores')
plt.xlabel('Confidence score')
plt.ylabel('Frequency')

plt.show()

### Histogram of low confidence scores

In [None]:
bad_scores = [i for i in scores if i < 0.9]

In [None]:
plt.xlim([min(bad_scores)-0.1, 1.0])
plt.hist(bad_scores, bins=20, alpha=0.5)
plt.title('Plot of confidence scores less than 0.9')
plt.xlabel('Confidence score')
plt.ylabel('Frequency')

plt.show()

### Workteam or Workforce
blah blah blah

In [None]:
WORKTEAM_ARN= "arn:aws:sagemaker:us-west-2:688520471316:workteam/private-crowd/jashuang-test-workforce"

### Clients

In [None]:
import io
import json
import uuid
import time
import boto3
import botocore

# Amazon SageMaker client
sagemaker = boto3.client('sagemaker', REGION)

# Amazon Augment AI (A2I) client
a2i = boto3.client('sagemaker-a2i-runtime')

s3 = boto3.client('s3', REGION)

## Create Control Plane Resources

### Create Human Task UI

In [3]:
template = r"""
<script src="https://assets.crowd.aws/crowd-html-elements.js"></script>

<crowd-form>
    <video controls>
        <source src="{{ task.input.audioPath | grant_read_access }}#t={{ task.input.start_time }},{{ task.input.end_time }}"
            type="audio/mp4">
        Your browser does not support the audio element.
    </video>
    <p>If you need to replay the audio, please refresh the page.</p>

    <h3>Instructions</h3>
    <p>Transcribe the audio </p>
    <p>Ignore "umms", "hmms", "uhs" and other non-textual phrases. </p>
    <p>The original transcript is <strong>"{{ task.input.original_words }}"</strong>. If the text matches the audio, please retype the same transcription.</p>
    <p>Click the space below to start typing.</p>
    <crowd-text-area name="transcription" rows="2"></crowd-text-area>

    <full-instructions header="Transcription Instructions">
        <h2>Instructions</h2>
        <p>Click the play button once and listen carefully to the audio section clip. Type what you hear in the box
            below.</p>
    </full-instructions>

</crowd-form>
"""

def create_task_ui():
    '''
    Creates a Human Task UI resource.

    Returns:
    struct: HumanTaskUiArn
    '''
    response = sagemaker.create_human_task_ui(
        HumanTaskUiName=taskUIName,
        UiTemplate={'Content': template})
    return response

In [None]:
# Task UI name - this value is unique per account and region. You can also provide your own value here.
taskUIName = 'ui-transcribe-' + str(uuid.uuid4()) 

# Create task UI
humanTaskUiResponse = create_task_ui()
humanTaskUiArn = humanTaskUiResponse['HumanTaskUiArn']
print(humanTaskUiArn)

In [None]:
# Flow definition name - this value is unique per account and region. You can also provide your own value here.
flowDefinitionName = 'fd-transcribe-demo-' + str(uuid.uuid4()) 

create_workflow_definition_response = sagemaker.create_flow_definition(
        FlowDefinitionName= flowDefinitionName,
        RoleArn= ROLE,
        HumanLoopConfig= {
            "WorkteamArn": WORKTEAM_ARN,
            "HumanTaskUiArn": humanTaskUiArn,
            "TaskCount": 1,
            "TaskDescription": "Identify the word(s) spoken in the provided audio clip",
            "TaskTitle": "Determine Words/Phrases of Audio Clip"
        },
        OutputConfig={
            "S3OutputPath" : OUTPUT_PATH
        }
    )
flowDefinitionArn = create_workflow_definition_response['FlowDefinitionArn'] # let's save this ARN for future use

In [None]:
# Describe flow definition - status should be active
for x in range(60):
    describeFlowDefinitionResponse = sagemaker.describe_flow_definition(FlowDefinitionName=flowDefinitionName)
    print(describeFlowDefinitionResponse['FlowDefinitionStatus'])
    if (describeFlowDefinitionResponse['FlowDefinitionStatus'] == 'Active'):
        print("Flow Definition is active")
        break
    time.sleep(2)

## Human Loops
### Sending sequences of words/phrases of low confidence for review
As we iterate through the list of words and their confidence scores, we create a HumanLoop task whenever the confidence score is below a threshold. The task consists of a sequence of words "neighboring" the word with low confidence, since it is possible that nearby words/phrases were also mis-transcribed.

In [None]:
# use this to get the words near a word with poor confidence,
# since it is possible that the transcription also mis-transcribed nearby words/phrases
def get_word_neighbors(words, index):
    """
    gets the words transcribe found at most 3 away from the input index
    Returns:
        list: words at most 3 away from the input index
        int: starting time of the first word in the list
        int: ending time of the last word in the list
    """
    i = max(0, index - 3)
    j = min(len(words) - 1, index + 3)
    return words[i: j + 1], words[i]["start_time"], words[j]["end_time"]

In [None]:
# Sample data, human loop started
human_loops_started = []
CONFIDENCE_SCORE_THRESHOLD = .4
i = 0
for obj in confidences_1:
    word = obj["content"]
    neighbors, start_time, end_time = get_word_neighbors(confidences_1, i)
    
#     print(f'Processing word: \"{obj["content"]}\"')
    
    # Our condition for when we want to engage a human for review
    if (obj["confidence"] < CONFIDENCE_SCORE_THRESHOLD):
        
        # get the original sequence of words
        sequence = ""
        for block in neighbors:
            sequence += block['content'] + " "
        
        humanLoopName = str(uuid.uuid4())
        # "initialValue": word,
        inputContent = {
            "audioPath": job_uri_s3,
            "start_time": start_time,
            "end_time": end_time,
            "original_words": sequence
        }
        start_loop_response = a2i.start_human_loop(
            HumanLoopName=humanLoopName,
            FlowDefinitionArn=flowDefinitionArn,
            HumanLoopInput={
                "InputContent": json.dumps(inputContent)
            }
        )
        human_loops_started.append(humanLoopName)
        print(f'Confidence score of {obj["confidence"]} is less than the threshold of {CONFIDENCE_SCORE_THRESHOLD}')
        print(f'Starting human loop with name: {humanLoopName}')
        print(f'Sending words from times {start_time} to {end_time} to review')
        print(f'The original transcription is ""{sequence}"" \n')
#     else:
# #         print(f'SentimentScore of {obj["confidence"]} is above threshold of {CONFIDENCE_SCORE_THRESHOLD}')
# #         print('No human loop created. \n')

    i=i+1

### Check Status of Human Loop

In [None]:
completed_human_loops = []
for human_loop_name in human_loops_started:
    resp = a2i.describe_human_loop(HumanLoopName=human_loop_name)
    print(f'HumanLoop Name: {human_loop_name}')
    print(f'HumanLoop Status: {resp["HumanLoopStatus"]}')
    print(f'HumanLoop Output Destination: {resp["HumanLoopOutput"]}')
    print('\n')
    
    if resp["HumanLoopStatus"] == "Completed":
        completed_human_loops.append(resp)

### Wait For Workers to Complete Task

In [None]:
# Wait For Workers to Complete Task
workteamName = WORKTEAM_ARN[WORKTEAM_ARN.rfind('/') + 1:]
print("Navigate to the private worker portal and do the tasks. Make sure you've invited yourself to your workteam!")
print('https://' + sagemaker.describe_workteam(WorkteamName=workteamName)['Workteam']['SubDomain'])

### Check Status of Human Loop Again

In [None]:
completed_human_loops = []
for human_loop_name in human_loops_started:
    resp = a2i.describe_human_loop(HumanLoopName=human_loop_name)
    print(f'HumanLoop Name: {human_loop_name}')
    print(f'HumanLoop Status: {resp["HumanLoopStatus"]}')
    print(f'HumanLoop Output Destination: {resp["HumanLoopOutput"]}')
    print('\n')
    
    if resp["HumanLoopStatus"] == "Completed":
        completed_human_loops.append(resp)


### View Task Results

Once work is completed, Amazon A2I stores results in your S3 bucket and sends a Cloudwatch event. Your results should be available in the S3 OUTPUT_PATH when all work is completed.

In [None]:
import re
import pprint

pp = pprint.PrettyPrinter(indent=4)

for resp in completed_human_loops:
    splitted_string = re.split('s3://' +  BUCKET + '/', resp['HumanLoopOutput']['OutputS3Uri'])
    output_bucket_key = splitted_string[1]

    response = s3.get_object(Bucket=BUCKET, Key=output_bucket_key)
    content = response["Body"].read()
    json_output = json.loads(content)
    pp.pprint(json_output)
    print('\n')

### Get Technical Terms
To get the technical terms identified by human review, we accumulate all human-reviewed words into a list and 

In [None]:
corrected_words = []

for resp in completed_human_loops:
    splitted_string = re.split('s3://' +  BUCKET + '/', resp['HumanLoopOutput']['OutputS3Uri'])
    output_bucket_key = splitted_string[1]

    response = s3.get_object(Bucket=BUCKET, Key=output_bucket_key)
    content = response["Body"].read()
    json_output = json.loads(content)
    
    # add the human-reviewed answers split by spaces
    corrected_words += json_output['humanAnswers'][0]['answerContent']['transcription'].split(" ")

In [None]:
# Create dictionary of English words
# Note that this corpus of words is not 100% exhaustive
import nltk
nltk.download('words')
from nltk.corpus import words
my_dict=set(words.words()) 

In [None]:
# Function for extracting words not in the English language corpus
def unusual_words(text):
    text_vocab = set(w.lower() for w in text)
    english_vocab = set(w.lower() for w in my_dict)
    unusual = text_vocab - english_vocab
    return sorted(unusual)

# Function for removing contractions
# https://en.wikipedia.org/wiki/Wikipedia:List_of_English_contractions
contractions = { 
"ain't": "am not / are not / is not / has not / have not",
"aren't": "are not / am not",
"can't": "cannot",
"can't've": "cannot have",
"'cause": "because",
"could've": "could have",
"couldn't": "could not",
"couldn't've": "could not have",
"didn't": "did not",
"doesn't": "does not",
"don't": "do not",
"hadn't": "had not",
"hadn't've": "had not have",
"hasn't": "has not",
"haven't": "have not",
"he'd": "he had / he would",
"he'd've": "he would have",
"he'll": "he shall / he will",
"he'll've": "he shall have / he will have",
"he's": "he has / he is",
"how'd": "how did",
"how'd'y": "how do you",
"how'll": "how will",
"how's": "how has / how is / how does",
"I'd": "I had / I would",
"I'd've": "I would have",
"I'll": "I shall / I will",
"I'll've": "I shall have / I will have",
"I'm": "I am",
"I've": "I have",
"isn't": "is not",
"it'd": "it had / it would",
"it'd've": "it would have",
"it'll": "it shall / it will",
"it'll've": "it shall have / it will have",
"it's": "it has / it is",
"let's": "let us",
"ma'am": "madam",
"mayn't": "may not",
"might've": "might have",
"mightn't": "might not",
"mightn't've": "might not have",
"must've": "must have",
"mustn't": "must not",
"mustn't've": "must not have",
"needn't": "need not",
"needn't've": "need not have",
"o'clock": "of the clock",
"oughtn't": "ought not",
"oughtn't've": "ought not have",
"shan't": "shall not",
"sha'n't": "shall not",
"shan't've": "shall not have",
"she'd": "she had / she would",
"she'd've": "she would have",
"she'll": "she shall / she will",
"she'll've": "she shall have / she will have",
"she's": "she has / she is",
"should've": "should have",
"shouldn't": "should not",
"shouldn't've": "should not have",
"so've": "so have",
"so's": "so as / so is",
"that'd": "that would / that had",
"that'd've": "that would have",
"that's": "that has / that is",
"there'd": "there had / there would",
"there'd've": "there would have",
"there's": "there has / there is",
"they'd": "they had / they would",
"they'd've": "they would have",
"they'll": "they shall / they will",
"they'll've": "they shall have / they will have",
"they're": "they are",
"they've": "they have",
"to've": "to have",
"wasn't": "was not",
"we'd": "we had / we would",
"we'd've": "we would have",
"we'll": "we will",
"we'll've": "we will have",
"we're": "we are",
"we've": "we have",
"weren't": "were not",
"what'll": "what shall / what will",
"what'll've": "what shall have / what will have",
"what're": "what are",
"what's": "what has / what is",
"what've": "what have",
"when's": "when has / when is",
"when've": "when have",
"where'd": "where did",
"where's": "where has / where is",
"where've": "where have",
"who'll": "who shall / who will",
"who'll've": "who shall have / who will have",
"who's": "who has / who is",
"who've": "who have",
"why's": "why has / why is",
"why've": "why have",
"will've": "will have",
"won't": "will not",
"won't've": "will not have",
"would've": "would have",
"wouldn't": "would not",
"wouldn't've": "would not have",
"y'all": "you all",
"y'all'd": "you all would",
"y'all'd've": "you all would have",
"y'all're": "you all are",
"y'all've": "you all have",
"you'd": "you had / you would",
"you'd've": "you would have",
"you'll": "you shall / you will",
"you'll've": "you shall have / you will have",
"you're": "you are",
"you've": "you have"
}
def remove_contractions(word_list):
    return [word for word in word_list if word not in contractions]

### View Technical/Uncommon Words
After removing contractions, human-reviewed words that are not in the English language corpus are likely to be the technical terms we are looking for.

In [None]:
for word in remove_contractions(corrected_words):
    if word not in my_dict:
        print(word)

### Create Custom Vocabulary
Using the technical terms identified above, we've created a custom vocabulary table. A custom vocabulary table enables options to tell Amazon Transcribe how each technical term is pronounced and how it should be displayed.

More details on how to form a custom vocabulary table can be found here: https://docs.aws.amazon.com/transcribe/latest/dg/how-vocabulary.html#create-vocabulary-table

In [None]:
finalized_words=[['Phrase','IPA','SoundsLike','DisplayAs'],
                 ['E.C.-Two','','ee-see-too','EC2'],
                 ['E.C.-Two-instance','','ee-see-too-in-stunce','EC2 instance'],
                 ['lambda','','lam-duh','Lambda'],
                 ['S.D.K.','','ess-dee-kay','SDK'],
                 ['boto-three','','boe-toe-three','Boto3'],
                 ['S.-Three','','ess-three','S3'],
                 ['github','','git-hub','Github'],
                 ['sagemaker','','sage-may-ker','SageMaker'],
                 ['E.B.S.','','ee-bee-ess','EBS'],
                 ['G.P.U.','','gee-pee-you','GPU'],
                 ['git-repository','','git-ree-paw-zih-tor-ee','Git repository'],
                 ['jupyter','','joo-pih-ter','Jupyter'],
                 ['kernel','','ker-null','kernel'],
                 ['config','','con-fig','config'],
                 ['configs','','con-figs','configs'],
                 ['D.B.-pedia','','dee-bee-pee-dee-yuh','dbpedia'],
                 ['git-clone','','','git clone'],
                 ['notebook-instance','','','notebook instance'],
                 ['V.P.C.','','','VPC'],
                ]

### Write the Table to a Txt File

In [None]:
custom_vocab_file_name = "customvocab3.txt"
file1 = open(custom_vocab_file,"w")
template = '{}\t{}\t{}\t{}\n'
for line in finalized_words:
    file1.write(template.format(line[0],
                                line[1],
                                line[2],
                                line[3])
               )
file1.close()

### Upload Custom Vocabulary File to S3

In [None]:
import logging
import boto3
from botocore.exceptions import ClientError


def upload_file(file_name, bucket, object_name=None):
    """Upload a file to an S3 bucket

    :param file_name: File to upload
    :param bucket: Bucket to upload to
    :param object_name: S3 object name. If not specified then file_name is used
    :return: True if file was uploaded, else False
    """

    # If S3 object_name was not specified, use file_name
    if object_name is None:
        object_name = file_name

    # Upload the file
    s3_client = boto3.client('s3')
    try:
        response = s3_client.upload_file(file_name, bucket, object_name)
    except ClientError as e:
        logging.error(e)
        return False
    return True

In [None]:
upload_file(custom_vocab_file_name, BUCKET)

### Create the Custom Vocabulary

In [None]:
transcribe = boto3.client("transcribe")
response = transcribe.create_vocabulary(
    VocabularyName='aws-sagemaker-vocab-4',
    LanguageCode='en-US',
    VocabularyFileUri='s3://' + BUCKET + '/' + custom_vocab_file_name
)

In [None]:
pp.pprint(response)

In [None]:
# Get the status of the vocab you created again (must wait until its VocabularyState is READY)
response2 = transcribe.get_vocabulary(
    VocabularyName='aws-sagemaker-vocab-4'
)
pp.pprint(response2)

### Re-transcribe using the Custom Vocabulary

In [None]:
job_name_2='AWS-sage-improved-1'
vocab_improved='aws-sagemaker-vocab-3'

In [None]:
transcribe(job_name_2, job_uri_s3, BUCKET, vocab_name=vocab_improved)

In [None]:
entire_transcript_2,sentences_and_times_2, confidences_2, scores_2 = get_transcript_text_and_timestamps(BUCKET,
                                                                                                      job_name_2+".json")
                                                                                                       

### Write the Improved Transcript to Txt File

In [None]:
# save the improved transcript
file4 = open("improvedtranscript_2.txt","w") 
for tup in sentences_and_times_2:
    file4.write(tup['sentence'] + "\n") 
file4.close()

## Calculating Word Error Rate (WER)
We'll be using a lightweight open-source Python library called JiWER for calculating WER between transcripts.

For more details, see: https://pypi.org/project/jiwer/

In [None]:
!pip install jiwer

In [None]:
from jiwer import wer
import jiwer

In [None]:
# small example
ground_truth = "hello world"
hypothesis = "hello duck"

In [None]:
wer(ground_truth, hypothesis)

In [None]:
# Create a transformation function to preprocess transcript
transformation = jiwer.Compose([
    jiwer.ToLowerCase(),
    jiwer.RemoveMultipleSpaces(),
    jiwer.RemovePunctuation(),
    jiwer.RemoveWhiteSpace(replace_by_space=True),
    jiwer.SentencesToListOfWords(),
    jiwer.SentencesToListOfWords(word_delimiter=" "),
    jiwer.RemoveEmptyStrings()
]) 

### Get the original transcript (before applying the custom vocabulary)

In [None]:
hypothesis_2_original = ""
f3 = open("originaltranscript.txt", "r")
for line in f3:
    if line.strip() == "--STOP--":
        break
    hypothesis_2_original += (line.strip() + " ")
f3.close()

### Get the "Ground Truth" transcript

In [None]:
ground_truth_2 = ""
f1 = open("ground_truth.txt", "r")
for line in f1:
    if line.strip() == "--STOP--":
        break
    ground_truth_2 += (line.strip() + " ")
f1.close()

### Get the new transcript (after applying the custom vocabulary)

In [None]:
hypothesis_2 = ""
f2 = open("improvedtranscript_2.txt", "r")
for line in f2:
    if line.strip() == "--STOP--":
        break
    hypothesis_2 += (line.strip() + " ")
f2.close()

### Compute Baseline Accuracy

In [None]:
jiwer.wer(
    ground_truth_2, 
    hypothesis_2_original, 
    truth_transform=transformation, 
    hypothesis_transform=transformation
)

### Compute New Accuracy

In [None]:
jiwer.wer(
    ground_truth_2, 
    hypothesis_2, 
    truth_transform=transformation, 
    hypothesis_transform=transformation
)