In [None]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
import os

# Specify the path to your folder in Google Drive containing the PDF files
folder_path = "/content/drive/MyDrive/Data"

# Change the current working directory to the specified folder
os.chdir(folder_path)

# List all files in the current directory (optional)
files = os.listdir()
print("Files in the folder:", files)


Files in the folder: ['paper.pdf', 'bert_embeddings.npy', 'indices.faiss', 'resume.pdf', 'inputFile.txt']


In [None]:


def read_txt(file_path):
    """
    Reads text from a .txt file and returns it as a string.

    Args:
        file_path (str): Path to the .txt file.

    Returns:
        str: The extracted text from the .txt file.
    """
    with open(file_path, 'r', encoding='utf-8') as file:
        text = file.read()
    return text

# Example usage
file_path = 'inputFile.txt'
txt_text = read_txt(file_path)
print(txt_text)


Introduction to Quantum Physics

Quantum physics, also known as quantum mechanics or quantum theory, is a fundamental branch of physics that deals with the behavior of matter and energy at the smallest scales, typically at the level of atoms and subatomic particles. Developed in the early 20th century, quantum physics has revolutionized our understanding of the universe, providing insights into the nature of reality that are often counterintuitive and challenging to grasp.

Historical Background

The roots of quantum physics can be traced back to the late 19th and early 20th centuries when classical physics could not explain certain experimental observations. Key milestones in the development of quantum theory include:

 Blackbody Radiation: Max Planck's solution to the ultraviolet catastrophe in 1900 introduced the concept of quantized energy levels, laying the groundwork for quantum theory.

Photoelectric Effect: In 1905, Albert Einstein explained the photoelectric effect by proposin

In [None]:
import re
def preprocess_text(document_text):
    """
    Preprocesses the document text for embedding generation.
    """
    # Split document into paragraphs by newlines
    paragraphs = document_text.split('\n\n')

    # Remove extra spaces within paragraphs and normalize text
    paragraphs = [re.sub(r'\s+', ' ', p).strip().lower() for p in paragraphs if p.strip()]

    return paragraphs

In [None]:
from sentence_transformers import SentenceTransformer, util
import faiss
import numpy as np

# Load Sentence Transformers model (BERT-based)
model = SentenceTransformer('all-MiniLM-L6-v2')

def generate_paragraph_embeddings(document_text):
    """
    Generates embeddings for each paragraph in the document text using Sentence Transformers.
    Returns a list of embeddings and a corresponding list of paragraphs.
    """
    # Split document into paragraphs
    paragraphs = document_text.split('\n\n')
    # Generate embeddings for each paragraph
    embeddings = model.encode(paragraphs)
    return embeddings, paragraphs

# Example usage


embeddings, paragraphs = generate_paragraph_embeddings(txt_text)

def store_embeddings_in_faiss(embeddings):
    """
    Stores the embeddings in a FAISS index.
    """
    embeddings = np.array(embeddings, dtype=np.float32)
    index = faiss.IndexFlatIP(embeddings.shape[1])
    index.add(embeddings)
    return index

# Store embeddings in FAISS index
index = store_embeddings_in_faiss(embeddings)
faiss.write_index(index, '/content/drive/MyDrive/Data/indices.faiss')

def query_document(query_text, model, index, paragraph_list):
    """
    Queries the FAISS index with the query text and returns the top K similar paragraphs.
    """
    # Generate embedding for the query text
    query_embedding = model.encode(query_text)
    query_embedding = np.array([query_embedding], dtype=np.float32)

    # Search the FAISS index for similar embeddings
    D, I = index.search(query_embedding, k=5)  # Retrieve top 5 similar paragraphs

    # Retrieve and print the text of the top K similar paragraphs
    similar_paragraphs = [paragraph_list[i] for i in I[0]]
    return similar_paragraphs



In [None]:
# Example query
query_text = "What is the black body radiation?"
similar_paragraphs = query_document(query_text, model, index, paragraphs)
print("Top 5 similar paragraphs:")
for paragraph in similar_paragraphs:
    print(paragraph)


Top 5 similar paragraphs:
 Blackbody Radiation: Max Planck's solution to the ultraviolet catastrophe in 1900 introduced the concept of quantized energy levels, laying the groundwork for quantum theory.
where is the reduced Planck constant, is the wave function, and \( \hat{H} \) is the Hamiltonian operator representing the total energy of the system.
Photoelectric Effect: In 1905, Albert Einstein explained the photoelectric effect by proposing that light is composed of discrete packets of energy called photons, each with energy proportional to its frequency.
Atomic Structure: Niels Bohr's model of the atom in 1913 introduced quantized electron orbits, explaining the stability of atoms and the emission spectra of elements.
Historical Background


In [None]:
combined_text = ' '.join(similar_paragraphs)
print(combined_text)

 Blackbody Radiation: Max Planck's solution to the ultraviolet catastrophe in 1900 introduced the concept of quantized energy levels, laying the groundwork for quantum theory. where is the reduced Planck constant, is the wave function, and \( \hat{H} \) is the Hamiltonian operator representing the total energy of the system. Photoelectric Effect: In 1905, Albert Einstein explained the photoelectric effect by proposing that light is composed of discrete packets of energy called photons, each with energy proportional to its frequency. Atomic Structure: Niels Bohr's model of the atom in 1913 introduced quantized electron orbits, explaining the stability of atoms and the emission spectra of elements. Historical Background


In [None]:
from transformers import pipeline

# Define the pipeline with the T5 model
pipe = pipeline("text2text-generation", model="MBZUAI/LaMini-Flan-T5-248M")


config.json:   0%|          | 0.00/1.53k [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/990M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/142 [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/2.50k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/2.42M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.20k [00:00<?, ?B/s]

In [None]:
def generate_responses(query_text, similar_paragraphs):
    """
    Generates responses using the T5 model based on the query and similar paragraphs as context.
    """
    responses = []

    # Format input for the T5 model
    context_text = " ".join(similar_paragraphs)
    input_text = f"Query: {query_text}. Context: {context_text} Give answer from the content given above. If you are not sure about the answer, reply NOT SURE"

    # Generate response using the T5 model
    response = pipe(input_text, max_length=150, num_return_sequences=1)
    responses.append(response[0]['generated_text'])

    return responses

response=generate_responses(query_text,similar_paragraphs)
print(response)

["The black body radiation refers to the concept of quantized energy levels introduced by Max Planck's solution to the ultraviolet catastrophe in 1900."]


In [None]:
!pip install -qU langchain_experimental langchain_openai langchain_community langchain ragas chromadb langchain-groq fastembed pypdf

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m332.8/332.8 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m327.5/327.5 kB[0m [31m10.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m9.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m127.4/127.4 kB[0m [31m9.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m547.8/547.8 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.1/71.1 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.0/92.0 kB[0m [31m6.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━