diff --git a/deepgram_captions/converters.py b/deepgram_captions/converters.py index cf4ab20..3ca3323 100644 --- a/deepgram_captions/converters.py +++ b/deepgram_captions/converters.py @@ -2,13 +2,32 @@ from .helpers import chunk_array, replace_text_with_word +class ConverterException(Exception): + pass + + class DeepgramConverter: - def __init__(self, dg_response): + def __init__(self, dg_response, use_exception: bool = True): if not isinstance(dg_response, dict): self.response = json.loads(dg_response.to_json()) else: self.response = dg_response + if use_exception: + one_valid_transcription = False + for channel in self.response["results"]["channels"]: + if channel["alternatives"][0]["transcript"] != "": + one_valid_transcription = True + break + if "utterances" in self.response["results"]: + for utterance in self.response["results"]["utterances"]: + if utterance["transcript"] != "": + one_valid_transcription = True + break + + if not one_valid_transcription: + raise ConverterException("No valid transcriptions found in response") + def get_lines(self, line_length): results = self.response["results"] content = [] @@ -19,7 +38,6 @@ def get_lines(self, line_length): content.extend(chunk_array(utterance["words"], line_length)) else: content.append(utterance["words"]) - else: words = results["channels"][0]["alternatives"][0]["words"] diarize = ( diff --git a/deepgram_captions/helpers.py b/deepgram_captions/helpers.py index 6358c56..2cd93f7 100644 --- a/deepgram_captions/helpers.py +++ b/deepgram_captions/helpers.py @@ -1,6 +1,10 @@ from datetime import datetime +class EmptyTranscriptException(Exception): + pass + + def seconds_to_timestamp(seconds, format="%H:%M:%S.%f"): seconds = round(seconds, 3) dt = datetime.utcfromtimestamp(seconds) diff --git a/deepgram_captions/srt.py b/deepgram_captions/srt.py index 646cef8..567bd9c 100644 --- a/deepgram_captions/srt.py +++ b/deepgram_captions/srt.py @@ -1,4 +1,4 @@ -from .helpers import seconds_to_timestamp +from .helpers import seconds_to_timestamp, EmptyTranscriptException def srt(converter, line_length=None): @@ -12,6 +12,9 @@ def srt(converter, line_length=None): current_speaker = None + if not lines[0]: + raise EmptyTranscriptException("No transcript data found") + for words in lines: output.append(str(entry)) entry += 1 diff --git a/deepgram_captions/webvtt.py b/deepgram_captions/webvtt.py index f841151..c2996e6 100644 --- a/deepgram_captions/webvtt.py +++ b/deepgram_captions/webvtt.py @@ -1,4 +1,4 @@ -from .helpers import seconds_to_timestamp +from .helpers import seconds_to_timestamp, EmptyTranscriptException def webvtt(converter, line_length=None): @@ -22,6 +22,9 @@ def webvtt(converter, line_length=None): if hasattr(converter, "get_lines") and callable(getattr(converter, "get_lines")): lines = converter.get_lines(line_length) + if not lines[0]: + raise EmptyTranscriptException("No transcript data found") + speaker_labels = "speaker" in lines[0][0] for words in lines: