In [38]:
!pip install transformers datasets evaluate PyPDF2




In [39]:
import os
import torch
import torch.nn as nn
from torch.utils.data import DataLoader, Dataset
from transformers import T5Tokenizer, T5ForConditionalGeneration, Trainer, TrainingArguments
import PyPDF2

In [48]:
from torch.utils.data import Dataset

class TextDataset(Dataset):
    def __init__(self, questions_answers, tokenizer, max_length=512):
        self.questions_answers = questions_answers
        self.tokenizer = tokenizer
        self.max_length = max_length

    def __len__(self):
        return len(self.questions_answers)

    def __getitem__(self, idx):
        question, answer = self.questions_answers[idx]

        # Preprocess input and output for the model
        input_text = f"question: {question} context: {pdf_text}"
        input_encoding = self.tokenizer(
            input_text, padding="max_length", truncation=True, max_length=self.max_length, return_tensors="pt"
        )
        target_encoding = self.tokenizer(
            answer, padding="max_length", truncation=True, max_length=self.max_length, return_tensors="pt"
        )

        # Return input_ids and labels (target)
        return {
            'input_ids': input_encoding['input_ids'].flatten(),  # Flatten the tensor
            'attention_mask': input_encoding['attention_mask'].flatten(),  # Flatten the tensor
            'labels': target_encoding['input_ids'].flatten()  # Flatten the tensor
        }


In [49]:
# Step 2: Load the tokenizer and model
tokenizer = T5Tokenizer.from_pretrained('t5-base')
model = T5ForConditionalGeneration.from_pretrained('t5-base')



In [50]:
# Step 3: Extract text from the PDF file
def extract_text_from_pdf(pdf_path):
    text = ""
    with open(pdf_path, 'rb') as file:
        reader = PyPDF2.PdfReader(file)
        for page in reader.pages:
            text += page.extract_text() + "\n"
    return text

# Load your PDF file
pdf_path = 'physics_for_beginners-_PDf(copy).pdf'
pdf_text = extract_text_from_pdf(pdf_path)

In [51]:
# Step 4: Define your set of questions and answers
def prepare_questions_answers(pdf_text):
    # Example set of questions based on the PDF content; adjust as necessary
    questions_answers = [
        {
        "query": "What is the definition of physics?",
        #"context": pdf_text,
        "answer": "Physics is the science that deals with matter, energy, motion, and force. It studies the fundamental building blocks of the universe and how they interact."
    },
    {
        "query": "What is an inertial frame of reference according to Newton's laws?",
        #"context": pdf_text,
        "answer": "An inertial frame of reference is one in which Newton's laws of motion are valid, meaning that objects in motion stay in motion unless acted upon by an external force. This frame does not accelerate and moves uniformly in a straight line."
    },
    {
        "query": "How did Roemer estimate the speed of light?",
        #"context": pdf_text,
        "answer": "Roemer estimated the speed of light by studying the eclipses of Jupiter’s satellites and noticing that the time between eclipses varied depending on the distance between Earth and Jupiter."
    },
    {
        "query": "What is the interference pattern of waves?",
        #"context": pdf_text,
        "answer": "When two identical waves overlap, they can interfere constructively, creating points of maximum displacement, or destructively, canceling each other out at points of zero displacement."
    },
    {
        "query": "How does special relativity redefine space and time?",
        #"context": pdf_text,
        "answer": "Special relativity views space and time as a single four-dimensional entity known as 'space-time,' where measurements of time and distance are relative to the observer’s motion."
    },
    {
        "query": "What is the cosmic speed limit in Einstein’s mechanics?",
        #"context": pdf_text,
        "answer": "The cosmic speed limit is the speed of light (c). No object can be accelerated to exceed the speed of light, as mass increases infinitely as it approaches c."
    },
    {
        "query": "How does general relativity differ from special relativity?",
        #"context": pdf_text,
        "answer": "While special relativity deals with non-accelerating frames of reference, general relativity extends the theory to include gravity and acceleration, describing gravity as the curvature of space-time caused by mass."
    },
    {
        "query": "What does the Heisenberg Uncertainty Principle state?",
        #"context": pdf_text,
        "answer": "The Heisenberg Uncertainty Principle states that it is impossible to know both the position and momentum of a particle with absolute certainty. The more precisely one is known, the less precisely the other can be known."
    },
    {
        "query": "What is the wave-particle duality concept proposed by de Broglie?",
        #"context": pdf_text,
        "answer": "De Broglie proposed that particles, such as electrons, can behave as waves, similar to how light, traditionally considered a wave, can exhibit particle-like properties."
    },
    {
        "query": "What is the Copenhagen Interpretation of quantum mechanics?",
        #"context": pdf_text,
        "answer": "The Copenhagen Interpretation, mainly attributed to Niels Bohr, suggests that particles exist in all possible states until they are observed, at which point the wave function collapses into one observable state."
    },
    {
        "query": "According to Einstein’s theories, how do different observers measure events differently?",
        #"context": pdf_text,
        "answer": "In relativity, observers moving at different velocities will measure different times and distances for the same events, and they may disagree about the simultaneity or order of events."
    },
    {
        "query": "What is Richard Feynman’s 'sum-over-histories' approach?",
        #"context": pdf_text,
        "answer": "Feynman’s 'sum-over-histories' approach suggests that particles, when moving from point A to point B, take into account every possible path, and the overall outcome is a sum of all these possible paths."
    },
    {
        "query": "How is average acceleration calculated?",
        #"context": pdf_text,
        "answer": "Average acceleration is the change in velocity divided by the time interval during which the change occurs."
    },
    {
        "query": "What is the principle of energy conservation?",
        #"context": pdf_text,
        "answer": "The principle of energy conservation states that energy cannot be created or destroyed, only converted from one form to another."
    },
    {
        "query": "How does light behave when it strikes a polished surface?",
        #"context": pdf_text,
        "answer": "When light strikes a polished surface, it is reflected, similar to a billiard ball bouncing off the side of a pool table."
    },
    {
        "query": "What is the mass-energy equivalence as described by Einstein?",
        #"context": pdf_text,
        "answer": "The mass-energy equivalence is described by Einstein's famous equation E = mc^2, which states that mass and energy are interchangeable."
    },
    {
        "query": "What did J.J. Thomson discover using cathode rays?",
        #"context": pdf_text,
        "answer": "J.J. Thomson discovered the electron as a negatively charged particle by studying the behavior of cathode rays under electric and magnetic fields."
    },
    {
        "query": "What is dark matter?",
        #"context": pdf_text,
        "answer": "Dark matter is a hypothetical form of matter that does not emit or interact with electromagnetic radiation, making it invisible, but it is believed to constitute most of the matter in the universe."
    },
    {
        "query": "What discovery was made about the expansion of the universe in 1998?",
        #"context": pdf_text,
        "answer": "In 1998, astronomers discovered that the expansion of the universe is accelerating, a finding attributed to the influence of dark energy."
    },
    {
        "query": "What dual nature does a photon exhibit?",
        #"context": pdf_text,
        "answer": "A photon exhibits both wave-like and particle-like properties, a phenomenon known as wave-particle duality."
    },
    {
        "query": "What is the goal of string theory in physics?",
        #"context": pdf_text,
        "answer": "String theory aims to unify general relativity and quantum mechanics by proposing that the fundamental constituents of reality are one-dimensional strings, not point-like particles."
    },
    {
        "query": "What challenge did Einstein pursue in his later years?",
        #"context": pdf_text,
        "answer": "Einstein spent the last decades of his life searching for a unified field theory that could describe all fundamental forces within a single framework."
    },
    {
        "query": "What happens when two waves meet in phase?",
        #"context": pdf_text,
        "answer": "When two waves meet in phase, their crests and troughs reinforce each other, creating a wave of greater amplitude."
    },
    {
        "query": "How is temperature related to the energy of particles?",
        #"context": pdf_text,
        "answer": "Temperature is a measure of the average kinetic energy of the particles in a substance. Higher temperatures correspond to higher particle motion."
    },
    {
        "query": "What is a standing wave?",
        #"context": pdf_text,
        "answer": "A standing wave is a wave that oscillates in place without traveling, with points of maximum and minimum amplitude remaining fixed."
    },
    {
        "query": "How does probability play a role in quantum mechanics?",
        #"context": pdf_text,
        "answer": "In quantum mechanics, the behavior of particles is described probabilistically, and the exact outcome of an event cannot be predicted, only the likelihood of different outcomes."
    },
    {
        "query": "What does Planck’s law state about energy?",
        #"context": pdf_text,
        "answer": "Planck’s law states that energy is quantized and can only be emitted or absorbed in discrete amounts, called quanta."
    }
    ]
    return questions_answers

questions_answers = prepare_questions_answers(pdf_text)

In [52]:
# Step 5: Create the dataset and DataLoader
train_dataset = TextDataset(questions_answers, tokenizer)
train_dataloader = DataLoader(train_dataset, batch_size=2, shuffle=True, num_workers=0)


In [53]:
# Step 6: Set up training arguments
training_args = TrainingArguments(
    output_dir='./results',
    num_train_epochs=3,
    per_device_train_batch_size=2,
    save_steps=10,
    save_total_limit=2,
    logging_dir='./logs',
)

In [54]:
# Step 7: Define Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
)

  self.scaler = torch.cuda.amp.GradScaler(**kwargs)


In [55]:
# Step 8: Train the model
trainer.train()

Step,Training Loss


TrainOutput(global_step=42, training_loss=3.5777769542875744, metrics={'train_runtime': 182.9338, 'train_samples_per_second': 0.443, 'train_steps_per_second': 0.23, 'total_flos': 49325589135360.0, 'train_loss': 3.5777769542875744, 'epoch': 3.0})

In [56]:
# Step 9: Save the fine-tuned model
trainer.save_model('./fine_tuned_model')

In [58]:
# Step 10: Load fine-tuned model for inference
model = T5ForConditionalGeneration.from_pretrained('./fine_tuned_model')

# Load the tokenizer from the original pre-trained model
tokenizer = T5Tokenizer.from_pretrained('t5-base') # Use the appropriate base model name if it's different



In [59]:
# Step 11: Function to answer questions based on context
def get_answer(question, context, model, tokenizer):
    input_text = f"question: {question} context: {context}"
    input_ids = tokenizer(input_text, return_tensors='pt').input_ids

    with torch.no_grad():
        output = model.generate(input_ids)

    answer = tokenizer.decode(output[0], skip_special_tokens=True)
    return answer

# Example usage with a specific question
example_question = "What is the principle of conservation of energy?"
context = "The principle states that energy cannot be created or destroyed, only transformed from one form to another."  # Adjust the context as needed
answer = get_answer(example_question, context, model, tokenizer)

# Print the final answer only
print(f"Final Answer: {answer}")



Final Answer: that energy cannot be created or destroyed, only transformed from one form to another


In [None]:
# Step 11: Function to answer questions based on context
def get_answer(question, context, model, tokenizer):
    input_text = f"question: {question} context: {context}"
    input_ids = tokenizer(input_text, return_tensors='pt').input_ids

    with torch.no_grad():
        output = model.generate(input_ids)

    answer = tokenizer.decode(output[0], skip_special_tokens=True)
    return answer

# Test with a list of questions
test_questions = [
    "What is the principle of conservation of energy?",
    "What is discussed in the line about Newton's second law?",
]

# Iterate through the test questions and get answers
for question in test_questions:
    answer = get_answer(question, pdf_text, model, tokenizer)  # Use pdf_text as context
    print(f"Question: {question}")
    print(f"Answer: {answer}\n")
