In [36]:
!pip install PyMuPDF
!pip install SpeechRecognition
!pip install gtts
!pip install PyAudio
!pip install rogue

Collecting rogue
  Downloading rogue-0.0.2.tar.gz (5.4 kB)
  Preparing metadata (setup.py): started
  Preparing metadata (setup.py): finished with status 'done'
Building wheels for collected packages: rogue
  Building wheel for rogue (setup.py): started
  Building wheel for rogue (setup.py): finished with status 'done'
  Created wheel for rogue: filename=rogue-0.0.2-py3-none-any.whl size=7218 sha256=463a933dbcd4d9b58a8568b6ed018a81ce23a3035010095aeb6eff420867f7ae
  Stored in directory: c:\users\dhanush adithiya\appdata\local\pip\cache\wheels\88\65\0c\e2d3efe66c4b48cb42ed2a2c5b310b9b5884c42238096f4414
Successfully built rogue
Installing collected packages: rogue
Successfully installed rogue-0.0.2


In [4]:
import fitz  
import speech_recognition as sr
import torch
from transformers import AutoTokenizer, AutoModelForQuestionAnswering
from gtts import gTTS
import os
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from rouge_score import rouge_scorer
import nltk

In [38]:
class QuestionBot:
    def __init__(self, language="en"):
        self.language = language
        if language == "en":
            self.model_name = "bert-large-uncased-whole-word-masking-finetuned-squad"
            self.sr_lang = "en-US"
            self.tts_lang = "en"
        elif language == "hi":
            self.model_name = "deepset/xlm-roberta-base-squad2"
            self.sr_lang = "hi-IN"
            self.tts_lang = "hi"
        elif language == "es":
            self.model_name = "deepset/xlm-roberta-base-squad2"
            self.sr_lang = "es-ES"
            self.tts_lang = "es"
        else:
            raise ValueError("Unsupported language. Choose from 'en', 'hi', or 'es'.")
        
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
        self.model = AutoModelForQuestionAnswering.from_pretrained(self.model_name)
        self.text = ""
        
        # Initialize ROUGE scorer
        self.rouge_scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
        
    def parse_pdf(self, path_to_file):
        try:
            doc = fitz.open(path_to_file)
            self.text = ""
            for page in doc:
                self.text += page.get_text()
            doc.close()
            return True
        except Exception as e:
            print(f"Error parsing PDF: {e}")
            return False
    
    def get_voice_input(self):
        r = sr.Recognizer()
        try:
            with sr.Microphone() as source:
                print("Ask your question...")
                r.adjust_for_ambient_noise(source, duration=1)
                audio = r.listen(source, timeout=10, phrase_time_limit=5)
            
            question = r.recognize_google(audio, language=self.sr_lang)
            print("You asked:", question)
            return question
        except sr.UnknownValueError:
            print("Could not understand the audio.")
            return None
        except sr.RequestError as e:
            print(f"Speech recognition error: {e}")
            return None
        except sr.WaitTimeoutError:
            print("No speech detected within timeout.")
            return None
    
    def get_answer(self, question):
        if not self.text:
            return "No document loaded. Please parse a PDF first."
        
        try:
            inputs = self.tokenizer.encode_plus(
                question,
                self.text,
                return_tensors="pt",
                truncation=True,
                max_length=512,
                padding=True
            )
            
            with torch.no_grad():
                outputs = self.model(**inputs)
            
            answer_start = torch.argmax(outputs.start_logits)
            answer_end = torch.argmax(outputs.end_logits) + 1
            
            # Handle case where answer_end is before answer_start
            if answer_end <= answer_start:
                return "Could not find a suitable answer in the document."
            
            answer = self.tokenizer.convert_tokens_to_string(
                self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:answer_end])
            )
            
            return answer.strip()
        except Exception as e:
            return f"Error generating answer: {e}"
    
    def play_answer(self, answer):
        if not answer or answer.startswith("Error") or answer.startswith("Could not"):
            print("Cannot play invalid answer:", answer)
            return False
        
        try:
            print("Answer:", answer)
            tts = gTTS(text=answer, lang=self.tts_lang)
            tts.save("answer.mp3")
            
            # Cross-platform audio playing
            if os.name == 'nt':  # Windows
                os.system("start answer.mp3")
            elif os.name == 'posix':  # Mac/Linux
                os.system("afplay answer.mp3")  # Mac
                # os.system("mpg123 answer.mp3")  # Linux alternative
            
            return True
        except Exception as e:
            print(f"Error playing answer: {e}")
            return False
    
    def evaluate_bleu(self, reference, candidate):
        try:
            # Tokenize the texts
            reference_tokens = reference.lower().split()
            candidate_tokens = candidate.lower().split()
            
            # Use smoothing function to handle edge cases
            smoothie = SmoothingFunction().method4
            
            # Calculate BLEU score
            bleu_score = sentence_bleu([reference_tokens], candidate_tokens, smoothing_function=smoothie)
            
            return round(bleu_score, 4)
        except Exception as e:
            print(f"Error calculating BLEU score: {e}")
            return 0.0
    
    def evaluate_rouge(self, reference, candidate):
        try:
            scores = self.rouge_scorer.score(reference, candidate)
            
            rouge_scores = {
                'rouge1': {
                    'precision': round(scores['rouge1'].precision, 4),
                    'recall': round(scores['rouge1'].recall, 4),
                    'fmeasure': round(scores['rouge1'].fmeasure, 4)
                },
                'rouge2': {
                    'precision': round(scores['rouge2'].precision, 4),
                    'recall': round(scores['rouge2'].recall, 4),
                    'fmeasure': round(scores['rouge2'].fmeasure, 4)
                },
                'rougeL': {
                    'precision': round(scores['rougeL'].precision, 4),
                    'recall': round(scores['rougeL'].recall, 4),
                    'fmeasure': round(scores['rougeL'].fmeasure, 4)
                }
            }
            
            return rouge_scores
        except Exception as e:
            print(f"Error calculating ROUGE scores: {e}")
            return {}
    
    def run(self, pdf_path):
        if not self.parse_pdf(pdf_path):
            return
        
        question = self.get_voice_input()
        if not question:
            print("Voice input failed or no question detected.")
            return
        
        answer = self.get_answer(question)
        print("Answer:", answer)
        self.play_answer(answer)


In [43]:
bot = QuestionBot(language="en")

bot.parse_pdf("english.pdf")

question = bot.get_voice_input()

if question:
    answer = bot.get_answer(question)
    print("Answer:", answer)

    bot.play_answer(answer)

    reference = "FinGPT is an open-source framework specifically designed for developing Large Language Models (LLMs) tailored to the finance sector. Unlike proprietary models like BloombergGPT"
    print("BLEU Score:", bot.evaluate_bleu(reference, answer))
    print("ROUGE Score:", bot.evaluate_rouge(reference, answer))


Some weights of the model checkpoint at bert-large-uncased-whole-word-masking-finetuned-squad were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Ask your question...
You asked: fin GPT
Answer: [SEP]
Answer: [SEP]
BLEU Score: 0
ROUGE Score: {'rouge1': {'precision': 0.0, 'recall': 0.0, 'fmeasure': 0.0}, 'rouge2': {'precision': 0.0, 'recall': 0.0, 'fmeasure': 0.0}, 'rougeL': {'precision': 0.0, 'recall': 0.0, 'fmeasure': 0.0}}


In [None]:
bot_hi = QuestionBot(language="hi")
bot_hi.parse_pdf("hindi.pdf")

question_hi = bot_hi.get_voice_input()

if question_hi:
    answer_hi = bot_hi.get_answer(question_hi)
    print("Answer (Hindi):", answer_hi)

    bot_hi.play_answer(answer_hi)

    reference_hi = "भारत एक विविधताओं से भरा देश है जहाँ विभिन्न धर्म, भाषाएँ और संस्कृतियाँ एक साथ coexist करती हैं।"
    
    print("BLEU Score (Hindi):", bot_hi.evaluate_bleu(reference_hi, answer_hi))
    print("ROUGE Score (Hindi):", bot_hi.evaluate_rouge(reference_hi, answer_hi))


In [None]:
bot_es = QuestionBot(language="es")
bot_es.parse_pdf("spanish.pdf")

question_es = bot_es.get_voice_input()

if question_es:
    answer_es = bot_es.get_answer(question_es)
    print("Answer (Spanish):", answer_es)

    bot_es.play_answer(answer_es)

    reference_es = "España es un país con una rica historia y una diversidad cultural impresionante."
    
    print("BLEU Score (Spanish):", bot_es.evaluate_bleu(reference_es, answer_es))
    print("ROUGE Score (Spanish):", bot_es.evaluate_rouge(reference_es, answer_es))

| Model Type         | Model Name                                                      |
| ------------------ | --------------------------------------------------------------- |
| Foundation (EN)    | `bert-large-uncased-whole-word-masking-finetuned-squad`         |
| Indic (HI)         | `deepset/xlm-roberta-base-squad2` (Multilingual; handles Hindi) |
| International (ES) | `deepset/xlm-roberta-base-squad2`   |


📄 PDF Text Extraction: via PyMuPDF

🗣️ Voice Input: Google STT via speech_recognition library

🤖 QA Inference: HuggingFace Transformers with AutoModelForQuestionAnswering

🔊 Voice Output: via gTTS (supports en, hi, es languages)

| **Aspect**                  | **Foundation Model (English)** `bert-large-uncased-whole-word-masking-finetuned-squad` | **Indic Model (Hindi)** `xlm-roberta-base-squad2`          | **International Model (Spanish)** `xlm-roberta-base-squad2` |
| --------------------------- | -------------------------------------------------------------------------------------- | ---------------------------------------------------------- | ----------------------------------------------------------- |
| **Language Fluency**        | ✅✅ (Very fluent, native-level for English)                                             | ✅ (Decent fluency, occasional awkward phrasing)            | ✅✅ (Fluent for Spanish, better than Hindi)                  |
| **Named Entity Handling**   | ✅✅ (Strong entity recognition for English names, dates, etc.)                          | ✅ (Sometimes misses local named entities)                  | ✅ (Handles Spanish entities well, but not perfect)          |
| **Accuracy of Extraction**  | **High** (optimized on SQuAD)                                                          | **Medium** (xlm-roberta is generalist, not Indic-specific) | **Medium** (xlm-roberta is multilingual, decent accuracy)   |
| **Speech-to-Text Accuracy** | ✅✅ (Google Speech in `en-US` is very accurate)                                         | ✅ (Decent, but errors increase with accents or dialects)   | ✅✅ (Spanish `es-ES` recognition is well-supported)          |
| **Text-to-Speech Quality**  | **Good** (gTTS `en` has natural pronunciation and intonation)                          | **Moderate** (gTTS `hi` often sounds robotic)              | **Good** (gTTS `es` produces pleasant and accurate output)  |


In [6]:
class QuestionBot:
    def __init__(self, language="en"):
        self.language = language
        if language == "en":
            self.model_name = "bert-large-uncased-whole-word-masking-finetuned-squad"
            self.sr_lang = "en-US"
            self.tts_lang = "en"
        
        elif language == "hi":
            self.model_name = "LingoIITGN/ganga-1b"
            self.sr_lang = "hi-IN"
            self.tts_lang = "hi"
        
        elif language == "es":
            self.model_name = "mrm8488/bert-base-spanish-wwm-cased-finetuned-spa-squad2-es"
            self.sr_lang = "es-ES"
            self.tts_lang = "es"
        else:
            raise ValueError("Unsupported language. Choose from 'en', 'hi', or 'es'.")
        
        self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
        self.model = AutoModelForQuestionAnswering.from_pretrained(self.model_name)
        self.text = ""
        
        self.rouge_scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True)
        
    def parse_pdf(self, path_to_file):
        try:
            doc = fitz.open(path_to_file)
            self.text = ""
            for page in doc:
                self.text += page.get_text()
            doc.close()
            return True
        except Exception as e:
            print(f"Error parsing PDF: {e}")
            return False
    
    def get_voice_input(self):
        r = sr.Recognizer()
        try:
            with sr.Microphone() as source:
                print("Ask your question...")
                r.adjust_for_ambient_noise(source, duration=1)
                audio = r.listen(source, timeout=10, phrase_time_limit=5)
            
            question = r.recognize_google(audio, language=self.sr_lang)
            print("You asked:", question)
            return question
        except sr.UnknownValueError:
            print("Could not understand the audio.")
            return None
        except sr.RequestError as e:
            print(f"Speech recognition error: {e}")
            return None
        except sr.WaitTimeoutError:
            print("No speech detected within timeout.")
            return None
    
    def get_answer(self, question):
        if not self.text:
            return "No document loaded. Please parse a PDF first."
        
        try:
            inputs = self.tokenizer.encode_plus(
                question,
                self.text,
                return_tensors="pt",
                truncation=True,
                max_length=512,
                padding=True
            )
            
            with torch.no_grad():
                outputs = self.model(**inputs)
            
            answer_start = torch.argmax(outputs.start_logits)
            answer_end = torch.argmax(outputs.end_logits) + 1
            
            # Handle case where answer_end is before answer_start
            if answer_end <= answer_start:
                return "Could not find a suitable answer in the document."
            
            answer = self.tokenizer.convert_tokens_to_string(
                self.tokenizer.convert_ids_to_tokens(inputs["input_ids"][0][answer_start:answer_end])
            )
            
            return answer.strip()
        except Exception as e:
            return f"Error generating answer: {e}"
    
    def play_answer(self, answer):
        if not answer or answer.startswith("Error") or answer.startswith("Could not"):
            print("Cannot play invalid answer:", answer)
            return False
        
        try:
            print("Answer:", answer)
            tts = gTTS(text=answer, lang=self.tts_lang)
            tts.save("answer.mp3")
            
            if os.name == 'nt':
                os.system("start answer.mp3")
            
            return True
        except Exception as e:
            print(f"Error playing answer: {e}")
            return False
    
    def evaluate_bleu(self, reference, candidate):
        try:
            reference_tokens = reference.lower().split()
            candidate_tokens = candidate.lower().split()
            
            smoothie = SmoothingFunction().method4
            
            bleu_score = sentence_bleu([reference_tokens], candidate_tokens, smoothing_function=smoothie)
            
            return round(bleu_score, 4)
        except Exception as e:
            print(f"Error calculating BLEU score: {e}")
            return 0.0
    
    def evaluate_rouge(self, reference, candidate):
        try:
            scores = self.rouge_scorer.score(reference, candidate)
            
            rouge_scores = {
                'rouge1': {
                    'precision': round(scores['rouge1'].precision, 4),
                    'recall': round(scores['rouge1'].recall, 4),
                    'fmeasure': round(scores['rouge1'].fmeasure, 4)
                },
                'rouge2': {
                    'precision': round(scores['rouge2'].precision, 4),
                    'recall': round(scores['rouge2'].recall, 4),
                    'fmeasure': round(scores['rouge2'].fmeasure, 4)
                },
                'rougeL': {
                    'precision': round(scores['rougeL'].precision, 4),
                    'recall': round(scores['rougeL'].recall, 4),
                    'fmeasure': round(scores['rougeL'].fmeasure, 4)
                }
            }
            
            return rouge_scores
        except Exception as e:
            print(f"Error calculating ROUGE scores: {e}")
            return {}
    
    def run(self, pdf_path):
        if not self.parse_pdf(pdf_path):
            return
        
        question = self.get_voice_input()
        if not question:
            print("Voice input failed or no question detected.")
            return
        
        answer = self.get_answer(question)
        print("Answer:", answer)
        self.play_answer(answer)


In [15]:
bot_hi = QuestionBot(language="hi")
bot_hi.parse_pdf("hindi.pdf")

question_hi = bot_hi.get_voice_input()

if question_hi:
    answer_hi = bot_hi.get_answer(question_hi)
    print("Answer (Hindi):", answer_hi)

    bot_hi.play_answer(answer_hi)

    reference_hi = "भारत एक विविधताओं से भरा देश है जहाँ विभिन्न धर्म, भाषाएँ और संस्कृतियाँ एक साथ coexist करती हैं।"
    
    print("BLEU Score (Hindi):", bot_hi.evaluate_bleu(reference_hi, answer_hi))
    print("ROUGE Score (Hindi):", bot_hi.evaluate_rouge(reference_hi, answer_hi))


Some weights of MistralForQuestionAnswering were not initialized from the model checkpoint at LingoIITGN/ganga-1b and are newly initialized: ['qa_outputs.bias', 'qa_outputs.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Ask your question...
You asked: संस्कृत विवाह के बारे में बताइए
Answer (Hindi): एक विविधताओं से भरा देश है जहाँ विभिन्न धर्म, भाषाएँ और संस्कृतियाँ एक साथ coexist करती हैं। 
इसकी ऐतिहासिक धरोहर, जैसे कि ताज
Answer: एक विविधताओं से भरा देश है जहाँ विभिन्न धर्म, भाषाएँ और संस्कृतियाँ एक साथ coexist करती हैं। 
इसकी ऐतिहासिक धरोहर, जैसे कि ताज
BLEU Score (Hindi): 0.72
ROUGE Score (Hindi): {'rouge1': {'precision': 1.0, 'recall': 1.0, 'fmeasure': 1.0}, 'rouge2': {'precision': 0.0, 'recall': 0.0, 'fmeasure': 0.0}, 'rougeL': {'precision': 1.0, 'recall': 1.0, 'fmeasure': 1.0}}


In [17]:
bot_en = QuestionBot(language="en")
bot_en.parse_pdf("english.pdf")

question_en = bot_en.get_voice_input()

if question_en:
    answer_en = bot_en.get_answer(question_en)
    print("Answer (English):", answer_en)

    bot_en.play_answer(answer_en)

    reference_en = "Finance is an intricate field, it is influenced by a multitude of factors. These factors can be anything from Russia invading Ukraine to Trump’s new tariff system."
    
    print("BLEU Score (English):", bot_en.evaluate_bleu(reference_en, answer_en))
    print("ROUGE Score (English):", bot_en.evaluate_rouge(reference_en, answer_en))


Some weights of the model checkpoint at bert-large-uncased-whole-word-masking-finetuned-squad were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Ask your question...
You asked: finance and what factors
Answer (English): influenced by a multitude of factors. these factors can be anything from russia invading ukraine to trump ’ s new tariff system
Answer: influenced by a multitude of factors. these factors can be anything from russia invading ukraine to trump ’ s new tariff system
BLEU Score (English): 0.5889
ROUGE Score (English): {'rouge1': {'precision': 1.0, 'recall': 0.75, 'fmeasure': 0.8571}, 'rouge2': {'precision': 1.0, 'recall': 0.7407, 'fmeasure': 0.8511}, 'rougeL': {'precision': 1.0, 'recall': 0.75, 'fmeasure': 0.8571}}


In [18]:
bot_es = QuestionBot(language="es")  
bot_es.parse_pdf("spanish.pdf") 

question_es = "¿Cuáles son algunas ciudades famosas de España y por qué lo son?"  
if question_es:
    answer_es = bot_es.get_answer(question_es)
    print("Answer (Spanish):", answer_es)

    bot_es.play_answer(answer_es)

    reference_es = "Ciudades como Madrid, Barcelona y Sevilla son famosas por su arquitectura, gastronomía y vida nocturna."
    
    print("BLEU Score (Spanish):", bot_es.evaluate_bleu(reference_es, answer_es))
    print("ROUGE Score (Spanish):", bot_es.evaluate_rouge(reference_es, answer_es))


tokenizer_config.json:   0%|          | 0.00/135 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config.json:   0%|          | 0.00/465 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/439M [00:00<?, ?B/s]

Some weights of the model checkpoint at mrm8488/bert-base-spanish-wwm-cased-finetuned-spa-squad2-es were not used when initializing BertForQuestionAnswering: ['bert.pooler.dense.bias', 'bert.pooler.dense.weight']
- This IS expected if you are initializing BertForQuestionAnswering from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForQuestionAnswering from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


Answer (Spanish): madrid, barcelona y sevilla
Answer: madrid, barcelona y sevilla
BLEU Score (Spanish): 0.0639
ROUGE Score (Spanish): {'rouge1': {'precision': 1.0, 'recall': 0.25, 'fmeasure': 0.4}, 'rouge2': {'precision': 1.0, 'recall': 0.2, 'fmeasure': 0.3333}, 'rougeL': {'precision': 1.0, 'recall': 0.25, 'fmeasure': 0.4}}


model.safetensors:   0%|          | 0.00/439M [00:00<?, ?B/s]