In [1]:
! pip install -qU google-cloud-aiplatform langchain chromadb pypdf transformers gradio

In [1]:
from google.cloud import aiplatform
aiplatform.init(
    # your Google Cloud Project ID or number
    # environment default used is not set
    project='ibm-keras',
    # the Vertex AI region you will use
    # defaults to us-central1
    location='us-central1',
)

In [2]:
import gradio as gr
import vertexai
from google.cloud import aiplatform
from langchain.llms import VertexAI
from langchain.document_loaders import PyPDFLoader
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import VertexAIEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA

In [3]:
PROJECT_ID = "ibm-keras"
REGION = "us-central1"
import vertexai
vertexai.init(project=PROJECT_ID, location=REGION)

llm = VertexAI(
    model_name="text-bison@001",
    max_output_tokens=256,
    temperature=0.1,
    top_p=0.8,
    top_k=40,
    verbose=True,
)
# Embedding
embeddings = VertexAIEmbeddings(model_name="textembedding-gecko@001")

In [4]:
def upload_sec_file_to_vector_db(fileUrl, query):
  url = fileUrl
  loader = PyPDFLoader(url)
  documents = loader.load()
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=0)
  docs = text_splitter.split_documents(documents)
  print(f"# of documents = {len(docs)}")
  db = Chroma.from_documents(docs, embeddings)
  retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 2})
  # Uses Vertex PaLM Text API for LLM to synthesize results from the search index.
  qa = RetrievalQA.from_chain_type(
      llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True
  )
  result = qa({"query": query})
  return result

In [None]:
def upload_and_chat_pdf(file_url, query):
    return upload_sec_file_to_vector_db(file_url, query)

sec_file_chat_app = gr.Interface(
    fn=upload_and_chat_pdf,
    inputs=[gr.Textbox(lines=3, placeholder="Please Enter the URL of the SEC Filling Here !"),
            gr.Textbox(lines=1, placeholder="Please Enter Your SEC File Query Here !")],
    outputs="text",
)
sec_file_chat_app.launch(share=True,debug=True)