In [1]:
import json
from deepgram import Deepgram
import os
import asyncio
from dotenv import load_dotenv
import mimetypes
import guardrails as gd
import openai
import string

In [2]:
# Load environment variables for API Keys (see .env.example)
ENV_FILE = "../.env.local"
if os.path.exists(ENV_FILE):
    load_dotenv(ENV_FILE, override=True)
    print("Loaded .env.local")
else:
    print("No .env.local found")

Loaded .env.local


## Discretize (i.e. Transcribe)

In [3]:
TRANSCRIPT_INPUT_FILE = "earnings.json"

In [4]:
# Transcribe with Deepgram if needed
if not TRANSCRIPT_INPUT_FILE.endswith(".json"):
    OUTPUT_FILE = TRANSCRIPT_INPUT_FILE.replace(".mp3", ".json")
    DEEPGRAM_API_KEY = os.environ.get("DEEPGRAM_API_KEY")
    MIMETYPE, _ = mimetypes.guess_type(TRANSCRIPT_INPUT_FILE)
    audio = open(TRANSCRIPT_INPUT_FILE, 'rb')
    source = {
      'buffer': audio,
      'mimetype': MIMETYPE
    }
    deepgram = Deepgram(DEEPGRAM_API_KEY)
    transcript = await asyncio.create_task(
        deepgram.transcription.prerecorded(
        source,
        {
            'smart_format': True,
            'model': 'nova',
        }
        )
    )
    with open(OUTPUT_FILE, 'w') as outfile:
        json.dump(transcript, outfile)
    transcript_text = transcript["results"]["channels"][0]["alternatives"][0]["transcript"]
else:
    with open(TRANSCRIPT_INPUT_FILE, "r") as f:
        transcript = json.load(f)
        transcript_text = transcript["results"]["channels"][0]["alternatives"][0]["transcript"]


## Generate and Attribute

In [5]:
guard = gd.Guard.from_rail("audio.rail")

In [6]:
transcript_text = transcript["results"]["channels"][0]["alternatives"][0]["transcript"]

In [7]:
question = "What is the revenue?"
raw_llm_output, validated_output = guard(
    openai.ChatCompletion.create,
    prompt_params={"transcript": transcript_text, "user_question": question},
    model="gpt-3.5-turbo-16k",
    temperature=0,
)

if validated_output is None or  "transcript_qa" not in validated_output:
    print(f"Error: {validated_output}")
else:
    answer = validated_output["transcript_qa"]["answer"]
    excerpts = [e["excerpt"] for e in validated_output["transcript_qa"]["transcript_excerpts"]]

    print(f"Answer: {answer}")
    print(f"Excerpts: {excerpts}")


Answer: $56,200,000,000
Excerpts: ['This quarter, revenue was 56,200,000,000 dollars, up 8% 10% in constant currency.']


## Localize

In [8]:
def clean_string(text):
    text = text.translate(str.maketrans('', '', string.punctuation))
    text = text.replace(" ", "") 
    text = text.lower()
    return text

def localize_chunk(transcript, chunk_text):
    """
        Localizes a str within a list

        transcript: a list of {'word': str, 'start': float, 'end': float}
        
        chunk_text: string
            chunk_text exists as a string of words in the transcript list. each word in 
            chunk_text is a word in transcript[i]['word']

        return the start and end indices within the transcript list
    """
    chunk_words = chunk_text.split(" ")
    for i, word_info in enumerate(transcript):
        if clean_string(word_info['punctuated_word']) == clean_string(chunk_words[0]): # found start of string
            found = True
            for j in range(1, len(chunk_words)): # see if it continues
                if clean_string(transcript[i+j]['punctuated_word']) != clean_string(chunk_words[j]):
                    found = False
                    break
            if found:
                return i, i + len(chunk_words) - 1

    return None, None

## Visualize
There can be a range of "UIs" built to show the results of the GAL process, but for this notebook we will just
print out the results in a formatted string.

In [9]:
output_str = f"""
Answer: {answer}

I generated this answer based on the following excerpts:
"""
for excerpt in excerpts:
    start_index, end_index = localize_chunk(transcript["results"]["channels"][0]["alternatives"][0]["words"], excerpt)
    start_time, end_time = transcript["results"]["channels"][0]["alternatives"][0]["words"][start_index]["start"], transcript["results"]["channels"][0]["alternatives"][0]["words"][end_index]["end"]
    output_str += f"\t'{excerpt}' found at time {start_time}s to {end_time}s in {TRANSCRIPT_INPUT_FILE}\n"

print(output_str)



Answer: $56,200,000,000

I generated this answer based on the following excerpts:
	'This quarter, revenue was 56,200,000,000 dollars, up 8% 10% in constant currency.' found at time 1483.8656s to 1489.5656s in earnings.mp3

