In [19]:
import os
import glob
import fitz  # PyMuPDF for PDF processing
import numpy as np
import faiss
# from langchain.llms import Groq
from langchain_groq import ChatGroq
from langchain.embeddings import HuggingFaceEmbeddings
from dotenv import load_dotenv

# Load API keys from .env
load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")

# Initialize LLM
groq_llm = ChatGroq(api_key=GROQ_API_KEY)

def load_pdfs(folder_path, chunk_size=500):
    pdf_files = glob.glob(os.path.join(folder_path, "*.pdf"))
    documents = []
    
    for file in pdf_files:
        with fitz.open(file) as doc:
            text = "\n".join([page.get_text("text") for page in doc])
            
            # Chunking the text
            for i in range(0, len(text), chunk_size):
                documents.append(text[i:i+chunk_size])
    
    return documents


# Initialize HuggingFace embeddings
embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
embeddings = np.array([embedding_model.embed_query(doc) for doc in documents])

# Store in FAISS index
d = embeddings.shape[1]
index = faiss.IndexFlatL2(d)
index.add(embeddings)

def query_rag_system(user_query):
    query_embedding = embedding_model.embed_query(user_query)
    D, I = index.search(np.array([query_embedding]), k=3)  # Retrieve top 3 similar docs
    similar_docs = [documents[i] for i in I[0]]
    
    # Construct a prompt for the LLM
    prompt = f"Based on the following research papers, answer the query: {user_query}\n\n"
    for i, doc in enumerate(similar_docs):
        prompt += f"Research Paper {i+1}: {doc[:500]}...\n\n"
    prompt += "Provide a concise and informative response."
    
    # Get response from Groq LLM
    response = groq_llm.predict(prompt)
    
    return similar_docs, response

# Example Query
test_query = "What are the recent advancements in AI-powered drug discovery?"
similar_papers, response = query_rag_system(test_query)

print("Relevant Papers:")
for paper in similar_papers:
    print(paper[:300], "...\n")

print("\nGenerated Response:")
print(response)

Relevant Papers:
Expert Opinion on Drug Discovery
ISSN: (Print) (Online) Journal homepage: www.tandfonline.com/journals/iedc20
Induﬆrializing AI-powered drug discovery: lessons
learned from the Patrimony computing platform
Mickaël Guedj, Jack Swindle, Antoine Hamon, Sandra Hubert, Emiko
Desvaux, Jessica Laplume, Lau ...

See discussions, stats, and author profiles for this publication at: https://www.researchgate.net/publication/380731266
A COMPREHENSIVE STUDY ON AI-POWERED DRUG DISCOVERY: RAPID
DEVELOPMENT OF PHARMACEUTICAL RESEARCH
Article  in  Journal of Emerging Technologies and Innovative Research · February 20 ...

Opinion
AI-powered therapeutic target discovery
Frank W. Pun,1 Ivan V. Ozerov,1 and Alex Zhavoronkov
1,2,3,*
Disease modeling and target identiﬁcation are the most crucial initial steps in
drug discovery, and inﬂuence the probability of success at every step of drug
development. Traditional target i ...


Generated Response:
Recent advancements in AI-powered drug disco

In [22]:
# Example Query
test_query = " how Biomedical Image Analysis is helpful"
similar_papers, response = query_rag_system(test_query)

print("Relevant Papers:")
for paper in similar_papers:
    print(paper[:300], "...\n")

print("\nGenerated Response:")
print(response)

Relevant Papers:
Expert Opinion on Drug Discovery
ISSN: (Print) (Online) Journal homepage: www.tandfonline.com/journals/iedc20
Induﬆrializing AI-powered drug discovery: lessons
learned from the Patrimony computing platform
Mickaël Guedj, Jack Swindle, Antoine Hamon, Sandra Hubert, Emiko
Desvaux, Jessica Laplume, Lau ...

Opinion
AI-powered therapeutic target discovery
Frank W. Pun,1 Ivan V. Ozerov,1 and Alex Zhavoronkov
1,2,3,*
Disease modeling and target identiﬁcation are the most crucial initial steps in
drug discovery, and inﬂuence the probability of success at every step of drug
development. Traditional target i ...

See discussions, stats, and author profiles for this publication at: https://www.researchgate.net/publication/380731266
A COMPREHENSIVE STUDY ON AI-POWERED DRUG DISCOVERY: RAPID
DEVELOPMENT OF PHARMACEUTICAL RESEARCH
Article  in  Journal of Emerging Technologies and Innovative Research · February 20 ...


Generated Response:
Biomedical Image Analysis is helpful in drug

In [23]:


# Example Query
test_query = " What are latest advancement regarding Sustainable Energy"
similar_papers, response = query_rag_system(test_query)

print("Relevant Papers:")
for paper in similar_papers:
    print(paper[:300], "...\n")

print("\nGenerated Response:")
print(response)

Relevant Papers:
Received January 21, 2019, accepted March 3, 2019, date of current version May 29, 2019.
Digital Object Identifier 10.1109/ACCESS.2019.2906402
Towards Sustainable Energy: A Systematic
Review of Renewable Energy Sources,
Technologies, and Public Opinions
ATIKA QAZI
1, FAYAZ HUSSAIN1, NASRUDIN ABD. RA ...

1US Department of Energy, 1000 Independence Avenue SW, Washington DC 20585, USA. 
T
he industrial revolution began in the mid-eighteenth century, 
and provided humans with capabilities well beyond animal and 
human power. Steam-powered trains and ships, and then inter­
nal combustion engines transfo ...

Expert Opinion on Drug Discovery
ISSN: (Print) (Online) Journal homepage: www.tandfonline.com/journals/iedc20
Induﬆrializing AI-powered drug discovery: lessons
learned from the Patrimony computing platform
Mickaël Guedj, Jack Swindle, Antoine Hamon, Sandra Hubert, Emiko
Desvaux, Jessica Laplume, Lau ...


Generated Response:
Based on the research papers provided, the l