# JsonToDictation

> Convert AWS Transcribe JSON format to Dictation Format

In [None]:
#| default_exp core

In [None]:
#| hide
from nbdev.showdoc import *
import json

In [None]:
#| export

class JsonToDictation:
    "A class that will convert the AWS Transcribe to Dictation"

    def __init__(self,
                 speakers,
                 data # JSON data that has been generated by AWS Transcribe
                ):
        self.transcribe_data = data
        self.titles = []
        self.num_speakers = 0
        self.speakers = speakers
        self.speaker_map = []
        self.text = ''
        self.load()


    def load(self):
        self.titles = self.transcribe_data["results"]["transcripts"]
        self.num_speakers = self.transcribe_data["results"]["speaker_labels"]["speakers"] 
        speaker_set = set(self.speakers)
        #self.num_speakers = len(speaker_set)
        self.getSpeakerMap()
        
    def getSpeakerMap(self):
        for segment in self.transcribe_data["results"]["speaker_labels"]["segments"]:
            speaker = segment["speaker_label"]            
            self.speaker_map.append(speaker)
        return (self.speaker_map)

    def applyFormattingRules(self, word: str) -> str:
        # translate some spellings into standarized format
        if word.upper() == "OK":
            return "Okay "
        if word == "Cuz":
            return "Because "
        if word == "cuz":
            return "because "
            
        # remove um and ah from text output
        if word.upper() == "UM":
            return ""
        if word.upper() == "AH":
            return ""

        # none of the above, then return word with a space after
        return f"{word} "       
        
    def convert(self):
        str = ''
        current_speaker = ''
        for item in self.transcribe_data["results"]["items"]:
            alternatives = item["alternatives"]
            speaker_label = item["speaker_label"]
            speaker_index = self.speaker_map.index(speaker_label)
            speaker = self.speakers[speaker_index]
            word = alternatives[0]["content"]
            type = item["type"]

            if current_speaker != speaker:
                if current_speaker != '':
                    str += "\n"
                    
                current_speaker = speaker                
                str += f"{speaker}:\t"

            if type == "punctuation":
                str = str[:-1]

            str += self.applyFormattingRules(word)
            
        self.text = str        
        return (str)        
            
    def summary(self):
        print(f"{self.num_speakers=}")
        print(f"{self.speakers=}")
        print(f"{self.speaker_map=}")
        print(f"{speakers=}")


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()

In [None]:
with open('data/transcribe_example.json') as transcribe_file:
  transcribe_json = json.load(transcribe_file)

speakers = ("BOB","JANE")
dictation = JsonToDictation(speakers,transcribe_json)
dictation.summary()

assert(dictation.num_speakers==2)

self.num_speakers=2
self.speakers=('BOB', 'JANE')
self.speaker_map=['spk_0', 'spk_1']
speakers=('BOB', 'JANE')


In [None]:
text = dictation.convert()
print (text)
assert(text=="BOB:\tbeen on hold for an hour. \nJANE:\tSorry about that. ")

BOB:	been on hold for an hour. 
JANE:	Sorry about that. 
