In [1]:
pip install numpy faiss-cpu transformers sentence-transformers SpeechRecognition gTTS pdfplumber

Collecting faiss-cpu
  Downloading faiss_cpu-1.8.0.post1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.7 kB)
Collecting sentence-transformers
  Downloading sentence_transformers-3.1.0-py3-none-any.whl.metadata (23 kB)
Collecting SpeechRecognition
  Downloading SpeechRecognition-3.10.4-py2.py3-none-any.whl.metadata (28 kB)
Collecting gTTS
  Downloading gTTS-2.5.3-py3-none-any.whl.metadata (4.1 kB)
Collecting pdfplumber
  Downloading pdfplumber-0.11.4-py3-none-any.whl.metadata (41 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m42.0/42.0 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
Collecting pdfminer.six==20231228 (from pdfplumber)
  Downloading pdfminer.six-20231228-py3-none-any.whl.metadata (4.2 kB)
Collecting pypdfium2>=4.18.0 (from pdfplumber)
  Downloading pypdfium2-4.30.0-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (48 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.5/48.5 kB[0m [31m3.2 MB/

In [3]:
import os
import numpy as np
import faiss
from transformers import pipeline
from sentence_transformers import SentenceTransformer
import speech_recognition as sr
from gtts import gTTS
import pdfplumber

In [4]:
# Load pre-trained models for QA and sentence embedding
qa_model = pipeline('question-answering', model='deepset/roberta-base-squad2')
embedding_model = SentenceTransformer('all-MiniLM-L6-v2')

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json:   0%|          | 0.00/571 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/496M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/79.0 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/899k [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/772 [00:00<?, ?B/s]



modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [5]:
# Speech-to-Text function using SpeechRecognition
def speech_to_text(audio_file):
    recognizer = sr.Recognizer()
    with sr.AudioFile(audio_file) as source:
        audio = recognizer.record(source)
    try:
        text = recognizer.recognize_google(audio)
        print(f"Transcribed audio to text: {text}")
        return text
    except sr.UnknownValueError:
        return "Could not understand the audio"

In [6]:
# Text-to-Speech function using gTTS
def text_to_speech(text, filename='output.mp3'):
    tts = gTTS(text)
    tts.save(filename)
    print(f"Saved answer as audio in file: {filename}")
    return filename

In [9]:
# Function to extract text from a PDF
def extract_text_from_pdf(pdf_path):
    text = ''
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            page_text = page.extract_text()
            if page_text:
                text += page_text
    return text

In [8]:
# Load and index the document
def index_document_with_embeddings(text):
    paragraphs = text.split('\n\n')
    embeddings = embedding_model.encode(paragraphs)
    index = faiss.IndexFlatL2(embeddings.shape[1])
    index.add(np.array(embeddings))
    return paragraphs, index

In [10]:
# Search the most relevant paragraph using FAISS
def search_document_faiss(query, paragraphs, index):
    query_embedding = embedding_model.encode([query])
    distances, indices = index.search(query_embedding, 1)
    return paragraphs[indices[0][0]]

In [11]:
# Get the answer from the relevant paragraph using QA model
def get_answer_from_paragraph(query, paragraph):
    answer = qa_model(question=query, context=paragraph)
    return answer['answer']

In [12]:
# Extract text from the PDF file and index it
document_path = '/content/Career Counseling Training Guide 2023 - 24 - FREE E-Book.pdf'
pdf_text = extract_text_from_pdf(document_path)
indexed_paragraphs, faiss_index = index_document_with_embeddings(pdf_text)

In [13]:
# Function to handle a text-based query
def handle_text_query(query):
    relevant_paragraph = search_document_faiss(query, indexed_paragraphs, faiss_index)
    answer = get_answer_from_paragraph(query, relevant_paragraph)
    print(f"Question: {query}\nAnswer: {answer}")
    return answer

In [14]:
# Function to handle an audio-based query
def handle_audio_query(audio_file):
    # Convert audio to text using speech recognition
    question = speech_to_text(audio_file)
    if question == "Could not understand the audio":
        print(question)
        return
    # Get the answer
    answer = handle_text_query(question)
    return answer

In [16]:
# Test the system with both text and audio input
if __name__ == '__main__':
    # Test with a text query
    print("Text-based Question Test:")
    text_question = "What is the purpose of career counselling?"
    answer = handle_text_query(text_question)

    # Convert the answer to speech
    audio_file = text_to_speech(answer)

    # Test with an audio query
    print("\nAudio-based Question Test:")
    audio_question_path = '/content/question.wav'
    handle_audio_query(audio_question_path)

Text-based Question Test:
Question: What is the purpose of career counselling?
Answer: fostering understanding, guidance, and growth
Saved answer as audio in file: output.mp3

Audio-based Question Test:
Transcribed audio to text: what is the purpose of career counseling
Question: what is the purpose of career counseling
Answer: to guide the
student
