In [1]:
# STEP 1: Install everything needed
!pip install -q faiss-cpu sentence-transformers git+https://github.com/openai/whisper.git pydub transformers
!apt install -y ffmpeg
!pip install -q --upgrade optree

# STEP 2: Upload your audio
from google.colab import files
uploaded = files.upload()
file_path = next(iter(uploaded))

# STEP 3: Transcribe the audio
import whisper
print("\n🔍 Transcribing audio...")
model = whisper.load_model("base")
result = model.transcribe(file_path)
transcript = result["text"]
print("\n📝 Transcription done!")

# STEP 4: Split transcript into chunks
import re

def split_text(text, max_len=40):
    sentences = re.split(r'(?<=[.!?]) +', text)
    chunks = []
    current = ""
    for sentence in sentences:
        if len((current + sentence).split()) <= max_len:
            current += " " + sentence
        else:
            chunks.append(current.strip())
            current = sentence
    chunks.append(current.strip())
    return chunks

chunks = split_text(transcript)

# STEP 5: Embed chunks using SentenceTransformer + Store in FAISS
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

embed_model = SentenceTransformer("all-MiniLM-L6-v2")
embeddings = embed_model.encode(chunks)

dimension = embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(embeddings)

# STEP 6: Load a QA model (Distilled BERT for Question Answering)
from transformers import pipeline

qa_pipeline = pipeline("question-answering", model="distilbert-base-uncased-distilled-squad")

# STEP 7: Ask user questions and generate refined answers
while True:
    query = input("\n💬 Ask a question about the audio (or type 'exit'): ")
    if query.lower() == "exit":
        break
    query_embed = embed_model.encode([query])
    D, I = index.search(query_embed, k=1)
    top_chunk = chunks[I[0][0]]

    print("\n🔍 Most relevant chunk:")
    print("➡️", top_chunk)

    print("\n🤖 Generating answer...")
    answer = qa_pipeline(question=query, context=top_chunk)
    print("✅ Answer:", answer['answer'])


  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.7/30.7 MB[0m [31m42.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m99.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m28.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m883.7/883.7 kB[0m [31m39.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m664.8/664.8 MB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m211.5/211.5 MB[0m [31m5.3 MB/s[0m eta [36m0:0

Saving harvard.wav to harvard.wav

🔍 Transcribing audio...


100%|████████████████████████████████████████| 139M/139M [00:01<00:00, 105MiB/s]



📝 Transcription done!


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/451 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/265M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

Device set to use cpu



💬 Ask a question about the audio (or type 'exit'): What is this about?

🔍 Most relevant chunk:
➡️ A zestful food is the hot cross bun.

🤖 Generating answer...
✅ Answer: hot cross bun

💬 Ask a question about the audio (or type 'exit'): what is a hot cross bun? 

🔍 Most relevant chunk:
➡️ A zestful food is the hot cross bun.

🤖 Generating answer...
✅ Answer: A zestful food

💬 Ask a question about the audio (or type 'exit'): exit


In [None]:
!pip uninstall -y optree
!pip install optree==0.10.0
!pip install --upgrade transformers


Found existing installation: optree 0.10.0
Uninstalling optree-0.10.0:
  Successfully uninstalled optree-0.10.0
Collecting optree==0.10.0
  Using cached optree-0.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (45 kB)
Using cached optree-0.10.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (287 kB)
Installing collected packages: optree
Successfully installed optree-0.10.0


In [None]:
!pip install openai-whisper




In [None]:
!pip install pydub




In [None]:
pip install -U langchain-community



In [None]:
!pip install faiss-cpu


