In [None]:
!pip install --upgrade google-cloud-aiplatform


In [None]:
import sys

if 'google.colab' in sys.modules:
  from google.colab import auth as google_auth
  google_auth.authenticate_user()

In [None]:
# GCP
PROJECT_ID = "Project_id"   # @param {type: "string"}
LOCATION = 'us-central1' # @param {type: "string"}


import vertexai
vertexai.init(project=PROJECT_ID, location=LOCATION)
from vertexai.preview.generative_models import GenerativeModel, Part

def generate():
  model = GenerativeModel("gemini-pro-vision")
  responses = model.generate_content(
    """Answer the question: Who is the killer of John?
Based on the context: John died due to heart attack""",
    generation_config={
        "max_output_tokens": 2048,
        "temperature": 0.9,
        "top_p": 1
    },
    safety_settings=[],
  stream=True,
  )

  for response in responses:
      print(response.text, end="")

print("Calling generate")
generate()

Calling generate
The provided context does not mention anything about a killer, therefore I cannot answer this question.

In [None]:

# Install langchain and related libraries
!pip install langchain unstructured[pdf]


In [None]:

# Using Google Cloud Storage Directory loader from langchain
from langchain.document_loaders import GCSDirectoryLoader

In [None]:
loader = GCSDirectoryLoader(project_name=PROJECT_ID, bucket="empdocs")
documents = loader.load()
len(documents)

3

In [None]:
# split the documents into chunks
from langchain.text_splitter import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000)
docs = text_splitter.split_documents(documents)
print(f"# of documents = {len(docs)}")

# of documents = 15


In [None]:
from langchain.embeddings import VertexAIEmbeddings
REQUESTS_PER_MINUTE = 590

embedding = VertexAIEmbeddings(model_name="textembedding-gecko@001",requests_per_minute=REQUESTS_PER_MINUTE)


In [None]:
# Store docs in local vectorstore as index
!pip install -q chromadb


[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m509.0/509.0 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.4/2.4 MB[0m [31m22.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m92.0/92.0 kB[0m [31m7.6 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m60.7/60.7 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.8/40.8 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.4/5.4 MB[0m [31m45.8 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m57.9/57.9 kB[0m [31m4.9 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m105.6/105.6 kB[0m [31m11.7 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━

In [None]:

# Chroma DB as Vector Store Database
from langchain.vectorstores import Chroma

emphandbook_db = Chroma.from_documents(docs, embedding)

In [None]:
# Expose index to the retriever
retriever = emphandbook_db.as_retriever(
    search_type="similarity",
    search_kwargs={"k":6})

In [None]:
from langchain.llms import VertexAI
# Create chain to answer questions
from langchain.chains import RetrievalQA

llm = VertexAI(
    model_name='gemini-pro',
    max_output_tokens=256,
    temperature=0.1,
    top_p=0.8,
    top_k=40,
    verbose=True,
)

# Uses LLM to synthesize results from the search index.
# We use Vertex PaLM Text API for LLM
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True)


In [None]:
query = "Think through the steps before you answer this question: How many days of vacation does an employee get"
result = qa({"query": query})
print(result["query"])
print(result["result"])
for i in result["source_documents"]:
  print (i.page_content)
  print (i.metadata["source"])



In [None]:
dir(result.values)