In [1]:
import os 
import sys 
from langchain_core.prompts import PromptTemplate,ChatPromptTemplate
from langchain_google_genai  import GoogleGenerativeAI,GoogleGenerativeAIEmbeddings
from langchain_community.document_loaders import PyPDFLoader
from langchain_groq import ChatGroq
from langchain_huggingface import HuggingFaceEmbeddings

In [2]:
from dotenv import load_dotenv,find_dotenv

_ = load_dotenv(find_dotenv())
groq_api_key=os.getenv('GROQ_API_KEY')

os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")

In [3]:
llm=ChatGroq(groq_api_key=groq_api_key,
             model_name="llama-3.3-70b-versatile")

In [4]:
loader = PyPDFLoader("C:\\QNARAG\\IEEE_SMC_2025_Omar.pdf")

In [5]:
doc = loader.load()

In [6]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
final_documents = text_splitter.split_documents(doc[:20])  # splitting

In [7]:
#embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
from langchain_community.vectorstores import FAISS

In [9]:
vectorstore = FAISS.from_documents(final_documents,embeddings)

In [10]:
prompt=ChatPromptTemplate.from_template(
"""
You are an AI research assistant with expertise in analyzing academic papers. 
Answer the user's question based only on the provided context.
If the answer is not found in the context, state that clearly.
Context: {context}
Questions:{input}

"""
)

In [11]:
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain


In [12]:
retriver=vectorstore.as_retriever()


In [13]:
import time 
document_chain=create_stuff_documents_chain(llm,prompt)
retriever=vectorstore.as_retriever()
retrieval_chain=create_retrieval_chain(retriever,document_chain)
start=time.process_time()
response=retrieval_chain.invoke({'input':"Explain the summary of the paper in detail.",
                                 'context':" ".join([doc.page_content for doc in final_documents[:3]])})
print("Response time :",time.process_time()-start)
#print(response['answer'])
if isinstance(response['answer'], list):  
    print(response['answer'][0])  # take the first response
else:
    print(response['answer'])

Response time : 0.0625
The provided context does not contain a summary of a paper. It appears to be a collection of references and a discussion of the performance of various deep learning models, including ResNet, Xception, and MobileNetV2, on a specific task, possibly related to image segmentation or object detection.

The text describes the performance of these models in terms of their Intersection over Union (IoU) scores, which is a common metric used to evaluate the performance of models in image segmentation tasks. The discussion highlights the strengths and weaknesses of each model, including their ability to generalize, converge, and trade off accuracy for efficiency.

However, without a clear summary of the paper, it is difficult to provide a detailed explanation of the paper's main contributions, methodology, or conclusions. If you could provide more context or clarify which specific paper you would like me to summarize, I would be happy to try and assist you further.


In [14]:


# The .stream() method returns a generator that yields dictionary chunks.
# We iterate through the generator to get the response as it's created.
for chunk in retrieval_chain.stream({'input': "Explain the summary of the paper in detail."}):
    # The actual generated text is usually in the 'answer' key of the chunk.
    if "answer" in chunk:
        # Print the chunk of the answer without a newline, and flush the output
        # to ensure it appears immediately in the console.
        print(chunk['answer'], end="", flush=True)

#   print("\n\nResponse time :", time.process_time() - start)

The provided context appears to be a portion of a research paper discussing the performance of various deep learning models on a specific task, likely related to computer vision and scene understanding for autonomous vehicles. Here's a detailed summary based on the given context:

1. **Data Preprocessing**: The paper mentions a preprocessing pipeline where images are normalized to a [0, 1] range, and masks retain their original color structure. A consistent seed is used to ensure proper alignment between images and masks. This step is crucial for preparing the dataset for training and validation.

2. **Model Performance Comparison**: The paper compares the performance of several deep learning models, including ResNet-50, Xception, MobileNetV2, ResNet18, and InceptionResNetV2. The performance is evaluated based on the Intersection over Union (IoU) metric, which measures the accuracy of the model in segmenting objects or scenes.

3. **Training and Validation IoU**: The results show that 

In [15]:
# import fitz  # PyMuPDF
# import os
# from langchain_core.prompts import ChatPromptTemplate
# from langchain_groq import ChatGroq

# # --- Configuration ---
# # Make sure to set your Groq API key in your environment variables
# # For example: os.environ["GROQ_API_KEY"] = "YOUR_API_KEY"
# # You can get a key from the Groq console.

# PDF_PATH = "Human_Segmentation_Research.pdf"  # IMPORTANT: Place your PDF in the same directory

# # --- 1. Text Extraction from PDF ---
# # Note: Image extraction has been removed as Groq models are not multimodal.

# def extract_pdf_text(pdf_path):
#     """Extracts text from a PDF file."""
#     if not os.path.exists(pdf_path):
#         print(f"Error: PDF file not found at {pdf_path}")
#         # Create a dummy file to avoid crashing the rest of the script
#         with open(pdf_path, "w") as f:
#             f.write("Dummy PDF content. Please replace with your actual PDF.")
    
#     doc = fitz.open(pdf_path)
#     text_content = ""
#     for page_num in range(len(doc)):
#         page = doc.load_page(page_num)
#         text_content += page.get_text()
            
#     return text_content

# # --- 2. LLM Invocation with Groq ---

# def explain_document(pdf_path, question):
#     """
#     Extracts text from a PDF and uses the Groq Llama 3 model to answer a question.
#     """
#     print(f"Analyzing text from '{pdf_path}'...")
#     text_content = extract_pdf_text(pdf_path)

#     # Initialize the Groq model.
#     # We are using Llama 3, which is a powerful text-based model.
#     # It cannot process images.
#     try:
#         llm = ChatGroq(model_name="llama-3.3-70b-versatile")
#     except Exception as e:
#         print(f"Error initializing Groq LLM. Have you set your GROQ_API_KEY? Error: {e}")
#         return

#     # A prompt template designed for text-based analysis.
#     prompt_template = ChatPromptTemplate.from_template(
#         "You are an AI research assistant. Based on the following context from a research paper, "
#         "please answer this question: {question}\n\n"
#         "--- Text Context ---\n{context}"
#     )

#     # Create a simple chain to pipe the prompt and the LLM.
#     chain = prompt_template | llm

#     # --- 3. Stream the response ---
#     print("\n--- Generating Explanation (Streaming) ---")
#     full_response = ""
#     # We use a limited context window to fit within the model's limits.
#     for chunk in chain.stream({"question": question, "context": text_content[:8000]}):
#         # The actual generated text is in the 'content' attribute of the chunk.
#         print(chunk.content, end="", flush=True)
#         full_response += chunk.content
        
#     return full_response


# # --- Example Usage ---
# if __name__ == "__main__":
#     # The question is now focused on text, not graphs.
#     user_question = "Author details of this paper."
    
#     if not os.path.exists(PDF_PATH):
#          print("\n\n-------------------------------------------------------------")
#          print(f"WARNING: The file '{PDF_PATH}' was not found.")
#          print("Please add the PDF to your project directory to run this example.")
#          print("-------------------------------------------------------------")
#     else:
#         explanation = explain_document(PDF_PATH, user_question)
#         print("\n\n--- Analysis Complete ---")