In [2]:
import os
from dotenv import load_dotenv

load_dotenv()

GROQ_API_KEY = os.getenv("GROQ_API_KEY")
# ASTRA_DB_API = os.getenv("ASTRA_DB_API")
# SERPAPI_KEY = os.getenv("SERPAPI_KEY")

In [3]:

import fitz  # PyMuPDF

def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text("text")
    return text

In [4]:
## Step 4: Store Text as Vector Embeddings

from langchain.vectorstores import FAISS
from langchain.embeddings.openai import OpenAIEmbeddings

def store_embeddings(text, save_path="vector_store"):
    embeddings = OpenAIEmbeddings()
    vector_store = FAISS.from_texts([text], embeddings)
    vector_store.save_local(save_path)
    return vector_store

In [6]:

## Step 5: Load Vector Store and Query
from langchain_groq import ChatGroq

def load_vector_store():
    embeddings = OpenAIEmbeddings()
    return FAISS.load_local("vector_store", embeddings)

def query_rag(user_query):
    vector_store = load_vector_store()
    retrieved_docs = vector_store.similarity_search(user_query, k=5)
    
    context = "\n\n".join([doc.page_content for doc in retrieved_docs])
    llm = ChatGroq(api_key=GROQ_API_KEY)
    
    response = llm.predict(f"Based on this research context, answer: {user_query}\n\nContext:\n{context}")
    return response

In [7]:
import streamlit as st

st.title("AI-Powered Research Assistant")
query = st.text_input("Enter your research topic or keywords:")
pdf_file = st.file_uploader("Upload Research Paper (PDF)", type=["pdf"])

if pdf_file:
    pdf_path = f"data/{pdf_file.name}"
    with open(pdf_path, "wb") as f:
        f.write(pdf_file.getbuffer())

    extracted_text = extract_text_from_pdf(pdf_path)
    store_embeddings(extracted_text)
    st.success("PDF processed and stored in the database!")

if query:
    response = query_rag(query)
    st.subheader("Generated Research Insights:")
    st.write(response)

2025-02-04 20:16:36.167 
  command:

    streamlit run c:\Users\Arvind\anaconda3\envs\groq\lib\site-packages\ipykernel_launcher.py [ARGUMENTS]
2025-02-04 20:16:36.176 Session state does not function when running a script without `streamlit run`
