In [1]:
import os
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_google_genai import ChatGoogleGenerativeAI

  from tqdm.autonotebook import tqdm, trange


Validation of environment variables

In [2]:
load_dotenv()

# Fetch environment variables from .env
LANGCHAIN_TRACING_V2 = os.getenv("LANGCHAIN_TRACING_V2")
LANGCHAIN_ENDPOINT = os.getenv("LANGCHAIN_ENDPOINT")
LANGCHAIN_API_KEY = os.getenv("LANGCHAIN_API_KEY")
LANGCHAIN_PROJECT = os.getenv("LANGCHAIN_PROJECT")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

# Optional print to verify
print("LANGCHAIN_TRACING_V2:", LANGCHAIN_TRACING_V2)
print("LANGCHAIN_ENDPOINT:", LANGCHAIN_ENDPOINT)
print("LANGCHAIN_API_KEY:", LANGCHAIN_API_KEY)
print("LANGCHAIN_PROJECT:", LANGCHAIN_PROJECT)
print("GOOGLE_API_KEY:", GOOGLE_API_KEY)

LANGCHAIN_TRACING_V2: true
LANGCHAIN_ENDPOINT: https://api.smith.langchain.com
LANGCHAIN_API_KEY: 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
LANGCHAIN_PROJECT: pr-new-starter-46
GOOGLE_API_KEY: AIzaSyA8G3UEtOaguEcKbVJu-PSkhFzduu5BSBw


Load the Gemini Model(or any model of your choice)

In [3]:
chat = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0.4, google_api_key=GOOGLE_API_KEY, convert_system_message_to_human=True)


Function to extract text from pdf

In [4]:
def extract_text_from_pdf(pdf_path):
    reader = PdfReader(pdf_path)
    text = ""
    for page in reader.pages:
        text += page.extract_text()
    return text

Creating chunks for batching

In [5]:
def chunk_text(text, chunk_size=1000):
    sentences = text.split('. ')
    chunks = []
    current_chunk = []
    current_length = 0
    
    for sentence in sentences:
        if current_length + len(sentence.split()) > chunk_size:
            chunks.append(" ".join(current_chunk))
            current_chunk = []
            current_length = 0
        current_chunk.append(sentence)
        current_length += len(sentence.split())
    
    if current_chunk:
        chunks.append(" ".join(current_chunk))
    
    return chunks

Storing these chunks in a FAISS DB

In [6]:
def create_faiss_index(chunks):
    model = SentenceTransformer('all-MiniLM-L6-v2')  # Using a smaller, efficient model
    chunk_embeddings = model.encode(chunks)
    index = faiss.IndexFlatL2(chunk_embeddings.shape[1])  # L2 distance for similarity
    index.add(np.array(chunk_embeddings))
    return index, chunk_embeddings

Retrieval Function from the FAISS DB

In [7]:
def retrieve_relevant_chunks(query, index, chunks, chunk_embeddings, top_k=3):
    model = SentenceTransformer('all-MiniLM-L6-v2')
    query_embedding = model.encode([query])
    _, I = index.search(query_embedding, top_k)
    return [chunks[i] for i in I[0]]

Post Retrieval Processing

In [8]:
def answer_question(chat, retrieved_chunks, user_query):
    # Combine retrieved chunks and user's query
    context = " ".join(retrieved_chunks)
    message = [
        SystemMessage(content=f"Use the following context to answer: {context}"),
        HumanMessage(content=user_query)
    ]
    result = chat.invoke(message)
    return result

In [60]:

pdf_path = r"C:\Users\Arnav\Desktop\msd\attention_is_all_you_need.pdf"

In [44]:
pdf_text = extract_text_from_pdf(pdf_path)

In [45]:
chunks = chunk_text(pdf_text)

In [46]:
index, chunk_embeddings = create_faiss_index(chunks)



In [57]:
user_query = "In brief give me the major important topics being discussed in this pdf"
retrieved_chunks = retrieve_relevant_chunks(user_query, index, chunks, chunk_embeddings)



In [58]:
response = answer_question(chat, retrieved_chunks, user_query)

# Display the response
print(response.content)



The paper presents a new neural network architecture called the Transformer, which is based solely on attention mechanisms and dispenses with recurrence and convolutions. The Transformer is shown to be superior in quality to existing models on two machine translation tasks, while being more parallelizable and requiring significantly less time to train. The major important topics being discussed in this paper are:

1. The Transformer architecture, which consists of an encoder and a decoder, each of which is composed of a stack of identical layers.

2. The self-attention mechanism, which allows each position in the input or output sequence to attend to all other positions.

3. The multi-head attention mechanism, which allows the model to jointly attend to information from different representation subspaces at different positions.

4. The position-wise feed-forward network, which is applied to each position in the input or output sequence separately and identically.

5. The training proce