In [None]:
!pip install langchain_google_vertexai langchain_community langgraph nltk

In [None]:
!pip install --upgrade google-auth google-auth-oauthlib google-api-python-client google.cloud

In [None]:
!gcloud auth login

In [None]:
!gcloud config set project rag-model-448019

In [None]:
!gcloud projects add-iam-policy-binding rag-model-448019 \
    --member="serviceAccount:rag-model@rag-model-448019.iam.gserviceaccount.com" \
    --role="roles/aiplatform.user"

In [None]:
!pip install unstructured unstructured[pdf] gradio

In [None]:
# Ensure your VertexAI credentials are configured
import os
import gradio as gr
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/content/rag-model-448019-fe37e29d6e38.json"


from google.oauth2 import service_account
from google.auth.transport.requests import Request

credentials = service_account.Credentials.from_service_account_file(
    "/content/rag-model-448019-fe37e29d6e38.json",
    scopes=["https://www.googleapis.com/auth/cloud-platform"]
)
credentials.refresh(Request())

In [None]:
from google.cloud import aiplatform as vertexai

vertexai.init(
    project="rag-model-448019",  # Replace with your Google Cloud project ID
    location="us-central1",
    credentials=credentials # Replace with your preferred region
)

In [None]:
from langchain_google_vertexai import ChatVertexAI

llm = ChatVertexAI(model="gemini-1.5-pro-001")

In [None]:
from langchain_google_vertexai import VertexAIEmbeddings

embeddings = VertexAIEmbeddings(model="text-embedding-005")

In [None]:
from langchain_core.vectorstores import InMemoryVectorStore

vector_store = InMemoryVectorStore(embeddings)

import nltk
nltk.download('punkt_tab')
nltk.download('averaged_perceptron_tagger_eng')

In [None]:
import bs4
from langchain import hub
from langchain_community.document_loaders import DirectoryLoader
from langchain_core.documents import Document
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langgraph.graph import START, StateGraph
from typing_extensions import List, TypedDict
import gradio as gr
import os
import shutil
import time

In [None]:
# Define the directory to save uploaded files
UPLOAD_DIRECTORY = "/content/"
os.makedirs(UPLOAD_DIRECTORY, exist_ok=True)

In [None]:
def save_file(file):
    try:
        # Extract the base file name
        base_filename = os.path.basename(file.name)
        # Define the target file path
        target_path = os.path.join(UPLOAD_DIRECTORY, base_filename)
        # Copy the file to the target directory
        shutil.copy(file.name, target_path)
        DATA_PATH = "/content/"
        # # Load and chunk contents of the documents
        loader = DirectoryLoader(
            DATA_PATH, glob="*.pdf"
        )
        docs = loader.load()

        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        all_splits = text_splitter.split_documents(docs)

# Index chunks
        _ = vector_store.add_documents(documents=all_splits)
        return f"File saved successfully"
    except Exception as e:
        return f"Error saving file: {str(e)}"



In [None]:
# Define prompt for question-answering
prompt = hub.pull("rlm/rag-prompt")


# Define state for application
class State(TypedDict):
    question: str
    context: List[Document]
    answer: str

# Define application steps
def retrieve(state: State):
    retrieved_docs = vector_store.similarity_search(state["question"])
    return {"context": retrieved_docs}


def generate(state: State):
    docs_content = "\n\n".join(doc.page_content for doc in state["context"])
    messages = prompt.invoke({"question": state["question"], "context": docs_content})
    response = llm.invoke(messages)
    return {"answer": response.content}

In [None]:
# Compile application and test
graph_builder = StateGraph(State).add_sequence([retrieve, generate])
graph_builder.add_edge(START, "retrieve")
graph = graph_builder.compile()

In [None]:
def chatbot_interface(question):
    try:
        response = graph.invoke({"question": question})
        answer = response.get("answer", "No answer found.")

        return answer
    except Exception as e:
        return f"Error: {e}", None

In [None]:
# Gradio Interface
with gr.Blocks() as demo:
    with gr.Tab("Upload Files"):
        upload_file = gr.File(label="Upload PDF File")
        upload_output = gr.Textbox(label="Upload Status", interactive=False)
        upload_button = gr.Button("Upload and Process")

    with gr.Tab("Ask Questions"):
        question_input = gr.Textbox(label="Ask a Question")
        answer_output = gr.Textbox(label="Answer", interactive=False)
        question_button = gr.Button("Get Answer")
    upload_button.click(save_file, inputs=upload_file, outputs=upload_output)
    question_button.click(chatbot_interface, inputs=question_input, outputs=answer_output)
demo.launch()
