In [51]:
import os
import requests
import pdfplumber
from langchain_groq import ChatGroq
import google.generativeai as genai
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_pinecone import PineconeVectorStore
from langchain_core.embeddings import Embeddings
from langchain.schema import Document
from pinecone import Pinecone
import numpy as np
import arxiv
import wikipedia

In [52]:
os.environ['GROQ_API_KEY'] = '<YOUR API KEY>'
os.environ["GEMINI_API_KEY"] = '<YOUR API KEY>'
os.environ['PINECONE_API_KEY'] = '<YOUR API KEY>'
os.environ['PINECONE_ENV'] = 'reserach-rag'

In [53]:
genai.configure(api_key=os.environ["GEMINI_API_KEY"])

def get_gemini_model(model_name="gemini-pro", temperature=0.4):
    return genai.GenerativeModel(model_name)

def generate_gemini_response(model, prompt):
    response = model.generate_content(
        prompt,
        generation_config=get_generation_config(),
        safety_settings=get_safety_settings()
    )
    if response.candidates and len(response.candidates) > 0:
        return response.candidates[0].content.parts[0].text
    return ''


In [54]:
def download_pdf(pdf_url, save_path="temp_paper.pdf"):
    response = requests.get(pdf_url)
    if response.status_code == 200:
        with open(save_path, "wb") as file:
            file.write(response.content)
        return save_path
    return None

def extract_text_from_pdf(pdf_path):
    text = ""
    with pdfplumber.open(pdf_path) as pdf:
        for page in pdf.pages:
            extracted_text = page.extract_text()
            if extracted_text:
                text += extracted_text + "\n"
    return text.strip()

def summarize_text(text):
    model = get_gemini_model()
    prompt_text = f"Summarize the following research paper very concisely:\n{text[:5000]}"  # Truncate to 5000 chars
    summary = generate_gemini_response(model, prompt_text)
    return summary

In [55]:
def search_arxiv(query, max_results=2):
    client = arxiv.Client()
    search = arxiv.Search(query=query, max_results=max_results, sort_by=arxiv.SortCriterion.Relevance)
    
    arxiv_docs = []
    
    for result in client.results(search):
        pdf_link = next((link.href for link in result.links if 'pdf' in link.href), None)
        
        # Download, extract, and summarize PDF if link exists
        if pdf_link:
            pdf_path = download_pdf(pdf_link)
            if pdf_path:
                text = extract_text_from_pdf(pdf_path)
                summary = summarize_text(text)
            else:
                summary = "PDF could not be downloaded."
        else:
            summary = "No PDF available."

        content = f"""
        **Title:** {result.title}
        **Authors:** {', '.join(author.name for author in result.authors)}
        **Published:** {result.published.strftime('%Y-%m-%d')}
        **Abstract:** {result.summary}
        **PDF Summary:** {summary}
        **PDF Link:** {pdf_link if pdf_link else 'Not available'}
        """

        arxiv_docs.append(Document(page_content=content, metadata={"source": "arXiv", "title": result.title}))
    
    return arxiv_docs

In [56]:
def search_wikipedia(query, max_results=2):
    try:
        page_titles = wikipedia.search(query, results=max_results)
        return [Document(page_content=wikipedia.page(title).content[:2000], metadata={"source": "Wikipedia", "title": title}) for title in page_titles]
    except (wikipedia.exceptions.DisambiguationError, wikipedia.exceptions.PageError):
        return []

In [57]:
def retrieve_documents(query):
    retrieved_docs = retriever.get_relevant_documents(query)
    arxiv_docs = search_arxiv(query)
    wiki_docs = search_wikipedia(query)

    summarized_context = []
    user_context = []
    for doc in retrieved_docs:
        user_context.append(f"**{doc.metadata.get('source', 'Unknown Source')}**: {doc.page_content}...")  # Truncate & keep relevant info

    for doc in arxiv_docs:
        summarized_context.append(f"**ArXiv - {doc.metadata.get('title', 'Unknown Title')}**:\n{doc.page_content}...")  # Keep summary only

    for doc in wiki_docs:
        summarized_context.append(f"**Wikipedia - {doc.metadata.get('title', 'Unknown Title')}**:\n{doc.page_content}...")  # Avoid full raw text

    return user_context , summarized_context

chain = create_retrieval_chain(retriever, question_answer_chain)

In [58]:
def chat_with_llm(question):
    user_context , summarized_context = retrieve_documents(question)

    input_data = {
        "input": question,
        "context": "\n\n".join(user_context),  
        "additional_context": "\n\n".join(summarized_context)
    }
    return chain.invoke(input_data), summarized_context 

In [59]:
out, s = chat_with_llm('Black hole mergers')

In [60]:
out['answer']

'Here is a summary of black hole mergers based on the provided context and sources:\n\n**Black Hole Mergers:**\n\nBlack hole mergers occur when two black holes in a binary system collide, releasing an immense amount of energy in the form of gravitational waves. This event is of great interest scientifically, as it provides a means to test the 2nd Law of Black Hole Thermodynamics and offers a chance to directly detect gravitational waves.\n\n**Formation of Binary Black Holes:**\n\nBinary black holes can form through the merger of high-mass binary star systems or through dynamic processes and mutual capture. They can also result from galactic mergers, leading to the formation of binary supermassive black holes.\n\n**Effects of Mergers:**\n\nDuring the merger process, the orbiting black holes emit gravitational waves, causing the orbit to decay and the orbital period to decrease. This stage is known as binary black hole inspiral. Once the black holes are close enough, they merge, and the 

In [61]:
len(str(s))

7839

In [62]:
s

["**ArXiv - Evolution of massive black holes**:\n\n        **Title:** Evolution of massive black holes\n        **Authors:** Marta Volonteri\n        **Published:** 2007-09-12\n        **Abstract:** Supermassive black holes are nowadays believed to reside in most local\ngalaxies. Accretion of gas and black hole mergers play a fundamental role in\ndetermining the two parameters defining a black hole: mass and spin. I briefly\nreview here some of the physical processes that are conducive to the evolution\nof the massive black hole population. I'll discuss black hole formation\nprocesses that are likely to place at early cosmic epochs, and how massive\nblack hole evolve in a hierarchical Universe. The mass of the black holes that\nwe detect today in nearby galaxy has mostly been accumulated by accretion of\ngas. While black hole--black hole mergers do not contribute substantially to\nthe final mass of massive black holes, they influence the occupancy of galaxy\ncenters by black hole, owin