In [12]:
from langchain import hub
from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder,FewShotChatMessagePromptTemplate,PromptTemplate



In [16]:
chat_template = ChatPromptTemplate.from_messages(
    [
        ("system", "You are a helpful AI bot. Your name is {name}."),
        ("human", "Hello, how are you doing?"),
        ("ai", "I'm doing well, thanks!"),
        ("human", "{user_input}"),
    ]
)

chat_template.format_messages(name="Bob", user_input="What is your name?")

[SystemMessage(content='You are a helpful AI bot. Your name is Bob.'),
 HumanMessage(content='Hello, how are you doing?'),
 AIMessage(content="I'm doing well, thanks!"),
 HumanMessage(content='What is your name?')]

In [17]:
from langchain_core.messages import SystemMessage
from langchain_core.prompts import HumanMessagePromptTemplate

chat_template = ChatPromptTemplate.from_messages(
    [
        SystemMessage(
            content=(
                "You are a helpful assistant that re-writes the user's text to "
                "sound more upbeat."
            )
        ),
        HumanMessagePromptTemplate.from_template("{text}"),
    ]
)
messages = chat_template.format_messages(text="I don't like eating tasty things")
print(messages)

[SystemMessage(content="You are a helpful assistant that re-writes the user's text to sound more upbeat."), HumanMessage(content="I don't like eating tasty things")]


In [29]:
from langchain_community.document_loaders import TextLoader

loader = TextLoader("./bella_vista.txt")
docs = loader.load()

In [40]:
docs[0]

Document(metadata={'source': './bella_vista.txt'}, page_content="Q: What are the hours of operation for Bella Vista?\nA: Bella Vista is open from 11 a.m. to 11 p.m. from Monday to Saturday. On Sundays, we welcome guests from 12 p.m. to 10 p.m.\n\nQ: What type of cuisine does Bella Vista serve?\nA: Bella Vista offers a delightful blend of Mediterranean and contemporary American cuisine. We pride ourselves on using the freshest ingredients, many of which are sourced locally.\n\nQ: Do you offer vegetarian or vegan options at Bella Vista?\nA: Absolutely! Bella Vista boasts a diverse menu that includes a variety of vegetarian and vegan dishes. Our chefs are also happy to customize dishes based on dietary needs.\n\nQ: Is Bella Vista family-friendly?\nA: Yes, Bella Vista is a family-friendly establishment. We have a dedicated kids' menu and offer high chairs and booster seats for our younger guests.\n\nQ: Can I book private events at Bella Vista?\nA: Certainly! Bella Vista has a private dinin

In [36]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=20,
)
documents = text_splitter.split_documents(docs)

In [44]:
from langchain_core.documents import Document

test = "In this podcast, the speakers discuss the pressing issue of the environment and the need for immediate attention. Speaker_1 emphasizes the importance of reducing carbon emissions and investing in renewable energy. Speaker_2 adds that while renewable energy is crucial, we must also prioritize conserving natural habitats and protecting wildlife from the effects of climate change. Speaker_1 agrees with both points but highlights the importance of considering the economic impact of environmental policies. They stress the need for solutions that balance ecological preservation with economic growth. Overall, the speakers advocate for a holistic approach to addressing environmental challenges."
document = Document(
    page_content= test,
    metadata={"source": "audio.wav"}
)

document


Document(metadata={'source': 'audio.wav'}, page_content='In this podcast, the speakers discuss the pressing issue of the environment and the need for immediate attention. Speaker_1 emphasizes the importance of reducing carbon emissions and investing in renewable energy. Speaker_2 adds that while renewable energy is crucial, we must also prioritize conserving natural habitats and protecting wildlife from the effects of climate change. Speaker_1 agrees with both points but highlights the importance of considering the economic impact of environmental policies. They stress the need for solutions that balance ecological preservation with economic growth. Overall, the speakers advocate for a holistic approach to addressing environmental challenges.')

In [45]:
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=100,
    chunk_overlap=20,
)
documents = text_splitter.split_documents([document])
documents



[Document(metadata={'source': 'audio.wav'}, page_content='In this podcast, the speakers discuss the pressing issue of the environment and the need for'),
 Document(metadata={'source': 'audio.wav'}, page_content='and the need for immediate attention. Speaker_1 emphasizes the importance of reducing carbon'),
 Document(metadata={'source': 'audio.wav'}, page_content='of reducing carbon emissions and investing in renewable energy. Speaker_2 adds that while renewable'),
 Document(metadata={'source': 'audio.wav'}, page_content='while renewable energy is crucial, we must also prioritize conserving natural habitats and'),
 Document(metadata={'source': 'audio.wav'}, page_content='habitats and protecting wildlife from the effects of climate change. Speaker_1 agrees with both'),
 Document(metadata={'source': 'audio.wav'}, page_content='agrees with both points but highlights the importance of considering the economic impact of'),
 Document(metadata={'source': 'audio.wav'}, page_content='economic im

In [54]:
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_openai import ChatOpenAI

llm = ChatOpenAI()


db=FAISS.from_documents(documents,OpenAIEmbeddings())


db

<langchain_community.vectorstores.faiss.FAISS at 0x10edbee30>

In [53]:
query="renewable energy is important "
result=db.similarity_search(query)
result

[Document(metadata={'source': 'audio.wav'}, page_content='of reducing carbon emissions and investing in renewable energy. Speaker_2 adds that while renewable'),
 Document(metadata={'source': 'audio.wav'}, page_content='while renewable energy is crucial, we must also prioritize conserving natural habitats and'),
 Document(metadata={'source': 'audio.wav'}, page_content='and the need for immediate attention. Speaker_1 emphasizes the importance of reducing carbon'),
 Document(metadata={'source': 'audio.wav'}, page_content='advocate for a holistic approach to addressing environmental challenges.')]

In [56]:
## Design ChatPrompt Template
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template("""
Answer the following question based only on the provided context. 
Think step by step before providing a detailed answer. 
I will tip you $1000 if the user finds the answer helpful. 
<context>
{context}
</context>
Question: {input}""")

prompt

ChatPromptTemplate(input_variables=['context', 'input'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], template='\nAnswer the following question based only on the provided context. \nThink step by step before providing a detailed answer. \nI will tip you $1000 if the user finds the answer helpful. \n<context>\n{context}\n</context>\nQuestion: {input}'))])

In [57]:
## Chain Introduction
## Create Stuff Docment Chain

from langchain.chains.combine_documents import create_stuff_documents_chain

document_chain=create_stuff_documents_chain(llm,prompt)


In [58]:
retriever=db.as_retriever()
retriever

VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x10edbee30>)

In [59]:
from langchain.chains import create_retrieval_chain
retrieval_chain=create_retrieval_chain(retriever,document_chain)

In [60]:
response=retrieval_chain.invoke({"input":"why  is renewable energy important ? "})

In [62]:
response

{'input': 'why  is renewable energy important ? ',
 'context': [Document(metadata={'source': 'audio.wav'}, page_content='of reducing carbon emissions and investing in renewable energy. Speaker_2 adds that while renewable'),
  Document(metadata={'source': 'audio.wav'}, page_content='while renewable energy is crucial, we must also prioritize conserving natural habitats and'),
  Document(metadata={'source': 'audio.wav'}, page_content='and the need for immediate attention. Speaker_1 emphasizes the importance of reducing carbon'),
  Document(metadata={'source': 'audio.wav'}, page_content='advocate for a holistic approach to addressing environmental challenges.')],
 'answer': 'Renewable energy is important because it helps reduce carbon emissions, which in turn helps combat climate change and its negative effects on the environment. Additionally, investing in renewable energy sources helps to decrease our reliance on fossil fuels, which are finite resources with harmful environmental impacts

In [4]:
from pyannote.audio import Pipeline
import time 

start = time.time()
pipeline = Pipeline.from_pretrained(
    "pyannote/speaker-diarization-3.1",
    use_auth_token="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")

# send pipeline to GPU (when available)
import torch
pipeline.to(torch.device("mps"))

# apply pretrained pipeline
diarization = pipeline("/Users/vinayak/AI/projects/voice-assistant-bot/environment_debate.wav")

# print the result
for turn, _, speaker in diarization.itertracks(yield_label=True):
    print(f"start={turn.start:.1f}s stop={turn.end:.1f}s speaker_{speaker}")
# start=0.2s stop=1.5s speaker_0
# start=1.8s stop=3.9s speaker_1
# start=4.2s stop=5.7s speaker_0

# ...

end = time.time()

print(f"Time taken by algo is {end-start}")

start=0.0s stop=3.8s speaker_SPEAKER_01
start=4.0s stop=7.8s speaker_SPEAKER_01
start=9.1s stop=14.5s speaker_SPEAKER_00
start=14.8s stop=17.6s speaker_SPEAKER_00
start=19.1s stop=20.4s speaker_SPEAKER_02
start=20.7s stop=24.6s speaker_SPEAKER_02
start=25.0s stop=29.2s speaker_SPEAKER_02
Time taken by algo is 3.493901014328003


In [8]:
sample

{'path': '1272-128104-0000.flac',
 'array': array([0.00238037, 0.0020752 , 0.00198364, ..., 0.00042725, 0.00057983,
        0.0010376 ]),
 'sampling_rate': 16000}

In [14]:
import whisper
import time 

start = time.time()
model = whisper.load_model("base")

# load audio and pad/trim it to fit 30 seconds
audio = whisper.load_audio("/Users/vinayak/AI/projects/voice-assistant-bot/environment_debate.wav")
audio = whisper.pad_or_trim(audio)

# make log-Mel spectrogram and move to the same device as the model
mel = whisper.log_mel_spectrogram(audio).to(model.device)

# detect the spoken language
_, probs = model.detect_language(mel)
print(f"Detected language: {max(probs, key=probs.get)}")

# decode the audio
options = whisper.DecodingOptions()
result = whisper.decode(model, mel, options)

# print the recognized text
print(result.text)

print(f"final time : {time.time()-start}")

Detected language: en
The environment is a pressing issue that requires immediate attention. We need to reduce carbon emissions and invest in renewable energy. While renewable energy is important, we also need to focus on conserving natural habitats and protecting wildlife from the effects of climate change. I agree with both points, but we must also consider the economic impact of environmental policies.
final time : 23.406076192855835


In [20]:
from transformers import WhisperProcessor, WhisperForConditionalGeneration
from datasets import load_dataset, Audio

model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-tiny.en")
processor = WhisperProcessor.from_pretrained("openai/whisper-tiny.en")

dataset = load_dataset("distil-whisper/librispeech_long", "clean", split="validation")
dataset = dataset.cast_column("audio", Audio(16_000))

sample = next(iter(dataset))
inputs = processor(sample["audio"]["array"], padding=True, truncation=False, return_attention_mask=True, return_tensors="pt")

outputs = model.generate(**inputs, return_segments=True)

print(outputs)

Downloading readme: 100%|██████████| 480/480 [00:00<00:00, 2.35MB/s]
Downloading data: 100%|██████████| 1.98M/1.98M [00:01<00:00, 1.87MB/s]
Generating validation split: 100%|██████████| 1/1 [00:00<00:00, 43.53 examples/s]
It is strongly recommended to pass the `sampling_rate` argument to this function. Failing to do so can result in silent errors that might be hard to debug.
The attention mask is not set and cannot be inferred from input because pad token is same as eos token.As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


{'sequences': tensor([[50363,  1770,    13,  2264,   346,   353,   318,   262, 46329,   286,
           262,  3504,  6097,    11,   290,   356,   389,  9675,   284,  7062,
           465, 21443,    13, 50687, 50687,  5414,   318,  1770,    13,  2264,
           346,   353,   338,  5642,  1342,  3499,   621,   465,  2300,    13,
         50927, 50927,   679,  4952,   514,   326,   379,   428, 43856,  1622,
           286,   262,   614,    11,   351,  6786,   290, 32595, 12023, 28236,
         51205, 51205,   878,   514,    11,   985,  2915,  7428,   422,  6600,
           290,   663,  2482,  3051,   749, 14704,   284,   262,  2000,    13,
         51551, 50363,   679,   468, 12296, 17188,  1771,  7361, 26113, 18881,
          1122,   338,   670,   318,  1107,  8312,   706,   477,    11,   290,
         50647, 50647,   460,  7073,   287,   340,   475,  1310,   286, 26898,
           314,   400, 22260,    13, 50863, 50863,  5164, 10076,   338,  5986,
           389,   257,  3297,   286,  

In [32]:
import os
from pydub import AudioSegment

def combine_audio_files(output_dir): 
    # Counter to keep track of the combined files
    count = 1 

    # Loop through each speaker folder in the output directory
    for speaker_folder in os.listdir(output_dir):
        speaker_path = os.path.join(output_dir, speaker_folder)
        
        if os.path.isdir(speaker_path):
            # List all WAV files in the speaker folder
            wav_files = [f for f in os.listdir(speaker_path) if f.endswith('.wav')]
            
            if wav_files:
                # Initialize an empty AudioSegment
                combined_audio = AudioSegment.empty()
                
                # Load and concatenate all WAV files
                for wav_file in wav_files:
                    wav_path = os.path.join(speaker_path, wav_file)
                    audio = AudioSegment.from_wav(wav_path)
                    combined_audio += audio
                
                # Export the combined audio to a new file
                output_path = os.path.join(speaker_path, f"combined_speaker_{count}.wav")
                combined_audio.export(output_path, format="wav")
                
                print(f"Combined audio saved to: {output_path}")
                count += 1 

combine_audio_files(output_dir="output")
print("All audio files have been processed.")


Combined audio saved to: output/speaker_SPEAKER_00/combined_speaker_1.wav
Combined audio saved to: output/speaker_SPEAKER_01/combined_speaker_2.wav
Combined audio saved to: output/speaker_SPEAKER_02/combined_speaker_3.wav
All audio files have been processed.


In [44]:
from pathlib import Path
from openai import OpenAI
from dotenv import load_dotenv 
import time 

start = time.time()
load_dotenv()


client = OpenAI()

file_path = "/Users/vinayak/AI/projects/podcast-chatbot/output/speaker_SPEAKER_01/combined_speaker_2.wav"

def transcribe_audio(file_path): 
  audio_file= open(file_path, "rb")
  transcript = client.audio.transcriptions.create(
    model="whisper-1",
    response_format="text", # Default output format is json,if want in json format, just comment out response format
    file=audio_file 
  )
  return transcript

transcript = transcribe_audio(file_path)
print(transcript)

def assign_speaker(file_path):
    transcript = transcribe_audio(file_path)
    # Extract the speaker folder name from the file path
    speaker_folder = os.path.basename(os.path.dirname(file_path))
    
    # Extract the speaker name
    speaker_name = speaker_folder.replace("speaker_", "")
    
    # Format the transcript with the speaker name
    formatted_transcript = f"{speaker_name}: {transcript}"
    
    return formatted_transcript


assign_speaker(transcript,file_path)



The environment is a pressing issue that requires immediate attention. We need to reduce carbon emissions and invest in renewable energy.



'SPEAKER_01: The environment is a pressing issue that requires immediate attention. We need to reduce carbon emissions and invest in renewable energy.\n'

In [45]:
# Define your transcribe_audio function
def transcribe_audio(file_path): 
    audio_file = open(file_path, "rb")
    transcript = client.audio.transcriptions.create(
        model="whisper-1",
        response_format="text",  # Default output format is json; if you want in json format, just comment out response format
        file=audio_file 
    )
    return transcript

# Update the assign_speaker function
def assign_speaker(output_dir):
    transcripts = []
    
    # Loop through each speaker folder in the output directory
    for speaker_folder in os.listdir(output_dir):
        speaker_path = os.path.join(output_dir, speaker_folder)
        
        if os.path.isdir(speaker_path):
            # Look for the combined_speaker file
            combined_files = [f for f in os.listdir(speaker_path) if f.startswith('combined_speaker') and f.endswith('.wav')]
            
            for combined_file in combined_files:
                combined_file_path = os.path.join(speaker_path, combined_file)
                
                # Transcribe the audio
                transcript = transcribe_audio(combined_file_path)
                
                # Extract the speaker folder name from the file path
                speaker_name = speaker_folder.replace("speaker_", "")
                
                # Format the transcript with the speaker name
                formatted_transcript = f"{speaker_name}: {transcript}"
                
                # Add to the list of transcripts
                transcripts.append(formatted_transcript)
    
    return transcripts


# Example usage
output_dir = "output"
transcripts = assign_speaker(output_dir)
for transcript in transcripts:
    print(transcript)

SPEAKER_00: and protecting wildlife from the effects of climate change. While renewable energy is important, we also need to focus on conserving natural habitats.

SPEAKER_01: The environment is a pressing issue that requires immediate attention. We need to reduce carbon emissions and invest in renewable energy.

SPEAKER_02: but we must also consider the economic impact of environmental policies. We need solutions that balance ecological preservation with economic growth. I agree with both points.

