In [18]:
# pip install transformers

In [19]:
# pip install torch

In [20]:
from transformers import BertForQuestionAnswering
from transformers import BertTokenizer
import torch
from torch.optim import AdamW
import numpy as np

In [21]:
import textwrap

In [22]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [23]:
from transformers import AutoModelForQuestionAnswering, AutoTokenizer, pipeline

model = AutoModelForQuestionAnswering.from_pretrained('deepset/roberta-base-squad2')
tokenizer = AutoTokenizer.from_pretrained('deepset/roberta-base-squad2')

In [24]:
model.to(device)
model.train()

RobertaForQuestionAnswering(
  (roberta): RobertaModel(
    (embeddings): RobertaEmbeddings(
      (word_embeddings): Embedding(50265, 768, padding_idx=1)
      (position_embeddings): Embedding(514, 768, padding_idx=1)
      (token_type_embeddings): Embedding(1, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): RobertaEncoder(
      (layer): ModuleList(
        (0-11): 12 x RobertaLayer(
          (attention): RobertaAttention(
            (self): RobertaSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): RobertaSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (Lay

In [25]:
# pip install datasets


In [26]:
from datasets import load_dataset

# Load the SQuAD dataset
dataset = load_dataset("squad", "plain_text", split="train")


Found cached dataset squad (/Users/shashwatbindal/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453)


In [27]:
train_features = []

for example in dataset:
    start_positions = []
    end_positions = []

    context = example["context"]
    questions = example["question"]
    answers = example["answers"]
    answer_start=0
    answer_end=0
    tokenized_context = tokenizer.tokenize(context)
    tokenized_question = tokenizer.tokenize(questions)
    tokenized_input = ["[CLS]"] + tokenized_question + ["[SEP]"] + tokenized_context + ["[SEP]"]
    inputs = tokenizer.convert_tokens_to_ids(tokenized_input)
    char_to_token = []
    token_index = 1  # Skip the [CLS] token
    for char_index, char in enumerate(context):
            if char != " " and char != "\n":
                char_to_token.append(token_index)
            if token_index < len(tokenized_input) - 1 and char_index + 1 < len(tokenized_input[token_index]):
                token_index += 1

    for answer in answers["answer_start"]:
        answer_start = answer
        token_start = char_to_token[min(answer_start, len(char_to_token) - 1)]
        token_start = min(token_start, len(inputs) - 1)
        start_positions.append(token_start)



    for text in answers["text"]:
        answer_end = answer_start + len(text) - 1
        token_end = char_to_token[min(answer_end, len(char_to_token) - 1)]
        token_end = min(token_end, len(inputs) - 1)
        end_positions.append(token_end)

    


Token indices sequence length is longer than the specified maximum sequence length for this model (513 > 512). Running this sequence through the model will result in indexing errors


In [28]:
train_features.append(
            {
                "input_ids": inputs,
                "attention_mask": [1] * len(inputs),
                "start_positions": start_positions,
                "end_positions": end_positions,
            }
        )

In [29]:
# pip install wrapt


In [30]:
import torch
from transformers import  AdamW
from torch.utils.data import DataLoader, RandomSampler
from transformers import AdamW
import torch.optim as optim

# Define the batch size and number of training epochs
batch_size = 4
num_epochs =50

# Convert the train_features list to a PyTorch DataLoader
train_dataset = torch.utils.data.TensorDataset(
    torch.tensor([f["input_ids"] for f in train_features], dtype=torch.long),
    torch.tensor([f["attention_mask"] for f in train_features], dtype=torch.long),
    torch.tensor([f["start_positions"] for f in train_features], dtype=torch.long),
    torch.tensor([f["end_positions"] for f in train_features], dtype=torch.long),
)
train_sampler = RandomSampler(train_dataset)
train_dataloader = DataLoader(train_dataset, sampler=train_sampler, batch_size=batch_size)

# Load the pre-trained BERT model for question answering
# model = BertForQuestionAnswering.from_pretrained("bert-large-uncased-whole-word-masking-finetuned-squad")
model = AutoModelForQuestionAnswering.from_pretrained('deepset/roberta-base-squad2')

# Set the model to training mode
model.train()

# Define the optimizer and learning rate
# optimizer = AdamW(model.parameters(), lr=3e-6)
optimizer = optim.Adamax(model.parameters(), lr=9e-6)


In [31]:
for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}")
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    for batch in train_dataloader:
        batch = tuple(t.to(device) for t in batch)
        input_ids, attention_mask, start_positions, end_positions = batch
        
        optimizer.zero_grad()
        
        outputs = model(
            input_ids=input_ids,
            attention_mask=attention_mask,
            start_positions=start_positions,
            end_positions=end_positions
        )
        
        loss = outputs.loss
        loss.backward()
        optimizer.step()

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [32]:
save_path = "/Users/shashwatbindal/Fine_Tuned_modelRobert"

# Save the fine-tuned model
# model.save_pretrained(save_path)

In [33]:
from transformers import AutoTokenizer, AutoModelForQuestionAnswering


# Load the fine-tuned BERT model

model = AutoModelForQuestionAnswering.from_pretrained(save_path)
tokenizer = AutoTokenizer.from_pretrained('deepset/roberta-base-squad2')

# Prepare the input
question = "what is coronary artery disease  ?"
context = """Heart surgery, also known as cardiac surgery, is a complex and intricate procedure performed on the heart to treat a variety of conditions. It is a critical and highly specialized field of medicine that requires a skilled team of healthcare professionals, including cardiac surgeons, anesthesiologists, nurses, and other support staff.

There are various types of heart surgeries performed depending on the specific condition being treated. Some common types include coronary artery bypass grafting (CABG), valve replacement or repair, congenital heart defect repairs, and heart transplant.

Coronary artery bypass grafting (CABG) is one of the most common heart surgeries. It is performed to treat coronary artery disease, which occurs when the arteries that supply blood to the heart become narrowed or blocked. During this procedure, the surgeon takes a healthy blood vessel from another part of the body, usually the leg or chest, and grafts it onto the blocked coronary artery. This bypasses the blockage and restores blood flow to the heart.

Valve replacement or repair is another type of heart surgery. It is performed to treat diseased or damaged heart valves. Heart valves are responsible for ensuring proper blood flow through the heart chambers. When valves become narrowed or leaky, they can impair the heart's ability to pump blood effectively. During valve replacement surgery, the damaged valve is removed and replaced with a mechanical valve or a biological tissue valve. In some cases, the valve can be repaired rather than replaced."""
inputs = tokenizer.encode_plus(question, context, add_special_tokens=True, return_tensors="pt")

# Perform inference
with torch.no_grad():
    outputs = model(**inputs)

start_logits = outputs.start_logits
end_logits = outputs.end_logits

# Process the logits to obtain the predicted answer span
start_index = torch.argmax(start_logits)
end_index = torch.argmax(end_logits)

# Decode the predicted answer span
all_tokens = tokenizer.convert_ids_to_tokens(inputs.input_ids[0])
answer = tokenizer.convert_tokens_to_string(all_tokens[start_index:end_index+1])

# Print the predicted answer
print("Question:", question)
print("Predicted Answer:", answer)


Question: what is coronary artery disease  ?
Predicted Answer:  when the arteries that supply blood to the heart become narrowed or blocked


In [60]:

def robert_question_answer(tokenizer, model,question, context, max_len=2000):
    inputs = tokenizer.encode_plus(question, context, add_special_tokens=True, return_tensors="pt")
    input_ids = inputs["input_ids"].to(model.device)
    attention_mask = inputs["attention_mask"].to(model.device)

    with torch.no_grad():
        outputs = model(input_ids=input_ids, attention_mask=attention_mask)
        start_logits = outputs.start_logits
        end_logits = outputs.end_logits

    start_index = torch.argmax(start_logits)
    end_index = torch.argmax(end_logits)

    answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[0][start_index:end_index + 1]))
    return answer.strip()

def split_passage_and_process(tokenizer, model, question, passage, max_len=10000, chunk_size=500):
    # Split the passage into smaller chunks
    passage_chunks = textwrap.wrap(passage, width=chunk_size)

    answers = []
    # Process each chunk separately
    for chunk in passage_chunks:
        answer = robert_question_answer(tokenizer, model, question, chunk, max_len=max_len)
        if answer != "Sorry!, I could not find an answer in the passage.":
            answers.append(answer)

    # Combine the answers from all chunks
    combined_answer = " ".join(answers)
    return combined_answer

In [35]:
from datasets import load_dataset
from torch.utils.data import DataLoader
import torch

# Load the fine-tuned BERT model

model = AutoModelForQuestionAnswering.from_pretrained(save_path)
tokenizer = AutoTokenizer.from_pretrained('deepset/roberta-base-squad2')

# Load the SQuAD validation dataset
datasetv = load_dataset("squad", "plain_text", split="validation[:500]")


Found cached dataset squad (/Users/shashwatbindal/.cache/huggingface/datasets/squad/plain_text/1.0.0/d6ec3ceb99ca480ce37cdd35555d6cb2511d223b9150cce08a837ef62ffea453)


In [36]:
len(datasetv)

500

In [37]:

# # Load the fine-tuned BERT model

model = AutoModelForQuestionAnswering.from_pretrained(save_path)
tokenizer = AutoTokenizer.from_pretrained('deepset/roberta-base-squad2')
# # Prepare the input


def generate_predictions(dataset, tokenizer, model):
    predictions = []
    for example in dataset:
        inputs = tokenizer.encode_plus(example["question"], example["context"], add_special_tokens=True, return_tensors="pt")
        input_ids = inputs["input_ids"].to(model.device)
        attention_mask = inputs["attention_mask"].to(model.device)

        with torch.no_grad():
            outputs = model(input_ids=input_ids, attention_mask=attention_mask)
            start_logits = outputs.start_logits
            end_logits = outputs.end_logits

        start_index = torch.argmax(start_logits)
        end_index = torch.argmax(end_logits)

        answer = tokenizer.convert_tokens_to_string(tokenizer.convert_ids_to_tokens(input_ids[0][start_index:end_index + 1]))
        predictions.append(answer.strip())

    return predictions


In [38]:
predictions = generate_predictions(datasetv, tokenizer, model)


In [39]:
true_answers=[]
for ans in datasetv:
    true_answers.append(ans["answers"]["text"])
    # print(ans["answers"]["text"])


In [40]:
predictions[5:10],true_answers[5:10]

(['golden anniversary',
  'February 7, 2016',
  'American Football Conference',
  'golden anniversary',
  'American Football Conference'],
 [['"golden anniversary"', 'gold-themed', '"golden anniversary'],
  ['February 7, 2016', 'February 7', 'February 7, 2016'],
  ['American Football Conference',
   'American Football Conference',
   'American Football Conference'],
  ['"golden anniversary"', 'gold-themed', 'gold'],
  ['American Football Conference',
   'American Football Conference',
   'American Football Conference']])

In [68]:
import sounddevice as sd
import soundfile as sf

# Set the audio settings
sample_rate = 44100
duration = 6 # Duration in seconds
# output_file = "audio.wav"


In [69]:

# import speech_recognition as sr


In [43]:
def askquestion(audio_file,output_file):
    print("Recording audio...")
    audio = sd.rec(int(sample_rate * duration), samplerate=sample_rate, channels=1)
    sd.wait()

    # Save the audio to a file
    sf.write(output_file, audio, sample_rate)

    print(f"Audio saved to {output_file}")
    r = sr.Recognizer()

    # Path to the audio file

    # Load the audio file
    with sr.AudioFile(audio_file) as source:
        # Read the audio data
        audio = r.record(source)
    text=""    

    try:
        # Recognize speech from the audio
        text = r.recognize_google(audio)
        print("Transcription:", text)
    except sr.UnknownValueError:
        print("Speech recognition could not understand audio")
    except sr.RequestError as e:
        print("Could not request results from Google Speech Recognition service; {0}".format(e))

    return text
    

In [44]:
from gtts import gTTS

def text_to_speech(text, output_file):
    # Create a gTTS object with the text and desired language
    tts = gTTS(text=text, lang='en')

    # Save the audio to a file
    tts.save(output_file)

# Example usage

output_file = "output.mp3"

# text_to_speech(text, output_file)

In [45]:
import pygame

def play_mp3(file_path):
    pygame.mixer.init()
    pygame.mixer.music.load(file_path)
    pygame.mixer.music.play()

In [70]:
def Passageready():

    output_file2="passage.mp3"
    text_to_speech("Give the passage you want to ask questions from", output_file2)
    play_mp3(output_file2)
    passage=input("Enter the Passage: ")
    return passage


In [71]:
def QuestionAnswer(passage):
    
    output_file="Ask.mp3"
    text_to_speech("Ask the question", output_file)
    play_mp3(output_file)
    question=askquestion("question.wav","question.wav")
    ans  = robert_question_answer( tokenizer, model,question, passage)
    output_file3="answer.mp3"
    text_to_speech("The Answer to the question....: "+question+"..."+"is....:"+ans+"...", output_file3)
    play_mp3(output_file3)
    return ans


In [72]:
passage=Passageready()
passage

'hi shashwat is good'

In [73]:
ans=QuestionAnswer(passage)

Recording audio...
Audio saved to question.wav
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
Transcription: how is Shashwat


In [62]:
passage

'hi i am shashwat'

In [74]:
ans

'good'

In [52]:
# pip install opencv-python


In [53]:
# pip install --upgrade pytesseract

In [54]:
# import cv2
# import pytesseract

# # Load the image using OpenCV
# image = cv2.imread('/Users/shashwatbindal/Downloads/WhatsApp Image 2023-06-15 at 13.42.55.jpeg')

# # Preprocess the image (if required)
# # Apply any necessary preprocessing techniques such as resizing, noise removal, etc.

# # Perform OCR using Tesseract
# text = pytesseract.image_to_string(image)

# # Save the extracted text to a text file
# with open('output.txt', 'w') as file:
#     file.write(text)

# print('Text extracted and saved to output.txt')

In [55]:
true_answers[:5],predictions[:5]


([['Denver Broncos', 'Denver Broncos', 'Denver Broncos'],
  ['Carolina Panthers', 'Carolina Panthers', 'Carolina Panthers'],
  ['Santa Clara, California',
   "Levi's Stadium",
   "Levi's Stadium in the San Francisco Bay Area at Santa Clara, California."],
  ['Denver Broncos', 'Denver Broncos', 'Denver Broncos'],
  ['gold', 'gold', 'gold']],
 ['Denver Broncos',
  'Carolina Panthers',
  "Levi's Stadium in the San Francisco Bay Area at Santa Clara, California",
  'Denver Broncos',
  'gold'])

In [67]:
exact_match_count = 0
total_examples = len(true_answers)

for true_answer, prediction in zip(true_answers, predictions):
    if prediction in true_answer:
        exact_match_count += 1

exact_match_score = exact_match_count / total_examples

# Print the exact match score
print("Exact match score:", exact_match_score)

Exact match score: 0.882


In [None]:
class YourQuestionAnsweringModel:
    def __init__(self):
        self.passage = ""

    def get_passage(self):
        output_file = "passage.mp3"
        text_to_speech("Give the passage you want to ask questions from", output_file)
        play_mp3(output_file)
        self.passage = input("Enter the Passage: ")

    def generate_answer(self, question):
        # Use self.passage and question to generate the answer
        # Your question answering model implementation here
        # Replace the return statement with your own answer generation logic
        
        return "Answer to the question"

