# Summary Chain


In [2]:
from langchain.chat_models import ChatOpenAI
from langchain.prompts import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    MessagesPlaceholder,
    HumanMessagePromptTemplate,
    PromptTemplate
)

from langchain.output_parsers import StructuredOutputParser, CommaSeparatedListOutputParser, PydanticOutputParser, CommaSeparatedListOutputParser
from langchain_core.runnables import RunnableLambda
from pydantic import BaseModel, Field
from typing import List

from pydub import AudioSegment

In [3]:
role = """ 
    Your receive transcriptions of audio messages. Your job is to label the subject
    of the message. You tag the message with multiple lables. You are both general and 
    specific. Your tags generally belong to a category, theme, or topic. 

    Messages could be about anything. Messages could be a boult multiple topics. In the 
    case of multiple topics tag with lables that fit the main topic. Ensure that a 
    general theme is tagged. All labels should have a general association with each other.
    
    It is likely that there are errors in the transcription, don't let that distract you.

    -- EXAMPLE 1 --
    Message: So I had this idea about a new app. The app will be a social media app that tracks
    users across the platform to determine their interests. Profile information will be used to
    present the user hyper-targeted ads.

    Output: technology, social media, advertising, computer science, software development,
    artificial intelligence, machine learning, data science, data analytics, data engineering

    -- EXAMPLE 2 --
    Message: I have a new idea for a business. I want to start a new business that sells a sweet
    and tangy beverage. The beverage will be made from a fruit that is grown in the tropics called
    lemon. Before starting the business I will need to create a financial model and marketing plan.

    Output: business, entrepreneurship, finance, marketing, economics, accounting, management

    FORMAT INSTRUCTIONS:
    You must follow these instructions for formating output....

"""

output_parser = CommaSeparatedListOutputParser()

system_prompt = PromptTemplate(
    template=role + "\n{format_instructions}",
    input_variables=[],
    partial_variables={"format_instructions": output_parser.get_format_instructions()}
)

query = "{transcription}"

prompt = ChatPromptTemplate(messages=[
            SystemMessagePromptTemplate(prompt=system_prompt), 
            HumanMessagePromptTemplate.from_template(query)
        ])

model = ChatOpenAI(model="gpt-3.5-turbo")
# model = ChatOpenAI(model="gpt-4")

label_chain = prompt | model | output_parser

###### CLEAN UP ########
from operator import itemgetter
# Prompt Template
# ---------------
class CleanUp():
    def __init__(self):
        role = """ 
            Your receive transcriptions of audio messages. It is likely that there are
            errors in the transcription. Your job is to correct these errors. 

            You will do this by identifying the errors and correcting them. There is no 
            need to retype the entire message. You should simple output text that can 
            be used to identify the error and the correction.

            You will be provided with labels that describe the content of the message generally.
            Some labels will be more applicable than others. These labels should help you
            identify the errors by providing context.

            Acronyms and abbreviations are commonly transcribed incorrectly. The labels should 
            be very helpful in identifying these errors. If label is helpful and an acronym of 
            the raw transcription does not make sense, then use the label to correct the error.

            REMINDER: Use minimal viable text to uniquely identify the error. Punctuation, grammar,
            spelling, capitalization, and acronyms are all fair game.

            -- EXAMPLE 1 --
            Labels: shopping, produce, fruit, grocery store, supermarket, food, shopping list
            Transcription: I went to the store early and bought fruit. Except I forgot to 
            buy orangutans.

            Error: Except I forgot to buy orangutans.

            Correction: Except I forgot to buy oranges.

            -- EXAMPLE 2 --
            Labels: technology, social media, advertising, computer science, software development
            Transcription: So I had this idea about a new app. The app will be a social media app
            that tracks users across the platform to determine their interests. The app will utilize
            a new type of AI model called LOLms that can be used to interpret users posts. I will use
            one from a company called OpenAI called GBTT-4.

            Error: AI model called LOLms

            Correction: AI model called LLMs

            Error: OpenAI called GBTT-4

            Correction: OpenAI called GPT-4

            FOLLOW OUTPUT INSTRUCTIONS CAREFULLY

        """

        # Define a custom Pydantic model for error correction pairs
        class ErrorCorrectionPair(BaseModel):
            error: str = Field(description="The incorrect part of the transcription")
            correction: str = Field(description="The corrected part of the transcription")

        class ErrorCorrectionContainer(BaseModel):
            error_correction_pairs: List[ErrorCorrectionPair] = Field(description="A list of error correction pairs")

        # Define a PydanticOutputParser with the custom Pydantic model
        correction_parser = PydanticOutputParser(pydantic_object=ErrorCorrectionContainer)

        query = "Labels: {labels}\n Transcription: {transcription}"

        system_prompt = PromptTemplate(
            template=role + "\n{format_instructions}",
            input_variables=[],
            partial_variables={"format_instructions": correction_parser.get_format_instructions()}
        )

        prompt = ChatPromptTemplate(messages=[
                    SystemMessagePromptTemplate(prompt=system_prompt), 
                    HumanMessagePromptTemplate.from_template(query)
                ])


        # model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.)
        model = ChatOpenAI(model="gpt-4", temperature=0.0)

        # Update the correction_chain to include the parser
        self.chain = (
            {
            'transcription' : itemgetter('transcription'),
            'labels' : itemgetter('labels'),
            'error_correction_pairs' : prompt | model | correction_parser
            }
            | RunnableLambda(self.apply_corrections)
        )

    # Define the apply_corrections method inside the CleanUp class
    @staticmethod
    def apply_corrections(input_dict):
        # from pprint import pprint
        # pprint(input_dict)
        transcription = input_dict['transcription']
        error_correction_pairs = input_dict['error_correction_pairs'].error_correction_pairs
        for pair in error_correction_pairs:
            transcription = transcription.replace(pair.error, pair.correction)

        output_dict = input_dict
        output_dict['transcription'] = transcription

        print("Fixed transcription: ", transcription)
        return output_dict
        
    def invoke(self, input_dict):
        return self.chain.invoke(input_dict)
    


# Prompt Template
# ---------------
role = """ 
    You have mastery of the English language. You specialize in helping writers and speakers better
    communicate there ideas. You are a professional editor. You are a master of grammar and spelling.
    
    Your receive transcriptions of audio messages. You will be provided with labels that describe the
    audio message. These labels should make sense in the context of the message. 
    
    Your job is to summarize the transcription. You should summarize the message in context to one
    or more of the labels. The summary should be contain all important details of the original message.
    However, the summary should be concise. Please keep the summary to be the same length or shorter
    than the original message. Audio message is in first person, so the summary should be in first person.

    Keep in mind that the transcription may not be concise or struggle to properly articulate idea. This
    is the crux of you task. Interpret the messages intent without introducing new ideas. Interpret and 
    concolidate.

    KEEP THE SUMMARY IN THE VOICE OF THE ORIGINAL SPEAKER. KEEP IT FIRST PERSON.
"""
query = "Labels: {labels}\n Transcription: {transcription}"

prompt = ChatPromptTemplate(messages=[
            SystemMessagePromptTemplate.from_template(role),
            HumanMessagePromptTemplate.from_template(query)
        ])

# model = ChatOpenAI(model="gpt-3.5-turbo")
model = ChatOpenAI(model="gpt-4")

final_chain = prompt | model







# Execution



In [6]:
import os
import assemblyai as aai

# Get the API key from the environment variable
api_key = os.getenv('ASSEMBLYAI_API_KEY')

# Set the API key in the AssemblyAI settings
aai.settings.api_key = api_key

transcriber = aai.Transcriber()

# Example using a URL
# audio_transcript = transcriber.transcribe("https://storage.googleapis.com/aai-web-samples/news.mp4")

# Example using a local file (uncomment the line below to use)
# audio_transcript = transcriber.transcribe("./test_audio.mp3")
# audio_transcript = transcriber.transcribe("./lattice.wav")

from pydub import AudioSegment

# Load your .m4a file
m4a_audio = AudioSegment.from_file("./vmail.m4a", format="m4a")

# Convert to mp3
m4a_audio.export("./dh_one_of_n.mp3", format="mp3")

audio_transcript = transcriber.transcribe("./dh_one_of_n.mp3")

print(audio_transcript.text)


It. Sweet. We're recording this phone call now, just so you know. So anything you say is going to be recorded. Okay. So, yeah, it's going correct in my voice now because I need the audio file from this conversation we're about to have. Okay. Are you cool with that? I'm cool with that. Okay. Give me permission. Sweet. So let me tell you about what I developed as a proof of concept over this week. And then let me tell you later what it's going to turn into, which is a little bit more grandiose and wonky, and it's going to be hard to follow initially. Okay. I mean, you got a second, right? Maybe five minutes at least. Yeah, I got five. I'm getting ready. Sweet. Sweet. And it really just needs to be me talking. So what I did was write a program that takes an audio file. That's why we're recording this. And it converts it into text, like your speech to text feature. Right. I then take the text file and I clean it up right away because there's going to be things that are misspelled and punct

In [7]:

# Invoke the chain with the labels and transcription
c = CleanUp()
# c.invoke({"labels": labels, "transcription": transcription})

# big_chain = label_chain | c.chain
big_chain = (
    {
        "transcription": itemgetter('transcription'), 
        "labels": label_chain
    }
    | c.chain
    | final_chain

)


# joint list with '\n'

summary = big_chain.invoke({"transcription": audio_transcript.text}).content

print(f'\n\n SUMMARY: \n\n {summary} \n\n')


Fixed transcription:  It's sweet. We're recording this phone call now, just so you know. So anything you say is going to be recorded. Okay. So, yeah, it's going to correct my voice now because I need the audio file from this conversation we're about to have. Okay. Are you cool with that? I'm cool with that. Okay. Give me permission. Sweet. So let me tell you about what I developed as a proof of concept over this week. And then let me tell you later what it's going to turn into, which is a little bit more grandiose and wonky, and it's going to be hard to follow initially. Okay. I mean, you got a second, right? Maybe five minutes at least. Yeah, I got five. I'm getting ready. Sweet. Sweet. And it really just needs to be me talking. So what I did was write a program that takes an audio file. That's why we're recording this. And it converts it into text, like your speech to text feature. Right. I then take the text file and I clean it up right away because there's going to be things that a

In [22]:
summary

"Summary: \nIn Bhargav's performance review, his supervisor, who previously mentored him, notes significant challenges. Despite specialization in Design Analysis, particularly Finite Element Analysis (FEA), Bhargav struggles with producing quality work consistently and timely. His mentor-turned-supervisor has attempted to suggest improvements lightheartedly, but has seen minimal progress. Now in a supervisory role, the expectation is for Bhargav to heed the feedback and make necessary changes, which is proving difficult."