In [7]:
# ✅ Step 1: Install dependencies
!pip install -q openai-whisper langchain langchain-community faiss-cpu pydub google-generativeai langchain-google-genai
!apt-get install -y ffmpeg

# ✅ Step 2: Upload an audio file
from google.colab import files
uploaded = files.upload()

# ✅ Step 3: Transcribe using Whisper
import whisper
model = whisper.load_model("base")
audio_path = list(uploaded.keys())[0]
transcription_result = model.transcribe(audio_path)
transcript = transcription_result["text"]
print("📄 Transcript (first 1000 chars):\n", transcript[:1000])

# ✅ Step 4: Set up LangChain + Gemini for Q&A
import os
import google.generativeai as genai
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain_google_genai import GoogleGenerativeAIEmbeddings, ChatGoogleGenerativeAI
from langchain.chains import RetrievalQA

# 🔑 Gemini API Key
os.environ["GOOGLE_API_KEY"] = "AIzaSyBMCmURqYamt-GVoZu5Fv5LaKJDQ5sUI4Q"  # ⬅️ Replace with your Gemini 1.5 API key
genai.configure(api_key=os.environ["GOOGLE_API_KEY"])

# 🔹 Split transcript into chunks
splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
docs = splitter.create_documents([transcript])

# 🔹 Embed chunks and create vectorstore
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
vectorstore = FAISS.from_documents(docs, embeddings)
retriever = vectorstore.as_retriever()

# 🔹 Build RetrievalQA chain
llm = ChatGoogleGenerativeAI(model="models/gemini-1.5-flash", temperature=0.2)
qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=retriever, return_source_documents=False)

# ✅ Step 5: Ask questions about the audio
while True:
    query = input("💬 Ask something about the audio (or type 'exit'): ")
    if query.lower() == "exit":
        break
    answer = qa_chain.run(query)
    print("🤖", answer)


Reading package lists... Done
Building dependency tree... Done
Reading state information... Done
ffmpeg is already the newest version (7:4.4.2-0ubuntu0.22.04.1).
0 upgraded, 0 newly installed, 0 to remove and 30 not upgraded.


Saving harvard.wav to harvard (1).wav




📄 Transcript (first 1000 chars):
  The stale smell of old beer lingers. It takes heat to bring out the odor. A cold dip restores health and zest. A salt pickle tastes fine with ham. Tacos al pastor are my favorite. A zestful food is the hot cross bun.
💬 Ask something about the audio (or type 'exit'): how to bring out odor in beer?


  answer = qa_chain.run(query)


🤖 The provided text states that heat brings out the odor in old beer.


KeyboardInterrupt: Interrupted by user