In [1]:
import vertexai
from vertexai.preview import rag

vertexai.init(project="andrewcooley-test-project")

rag_corpus = vertexai.preview.rag.create_corpus(
        display_name="andrewcooley-corpus-1",)

In [2]:
corpus_name = rag_corpus.name

In [None]:
response = await rag.import_files_async(
    corpus_name=corpus_name,
    paths=["gs://andrewcooley-genai-tests/rag_data"],
    chunk_size=1024,
    chunk_overlap=200,
)

In [None]:
response.running

In [None]:
print(f"Imported {response.imported_rag_files_count} files.")

In [3]:
result = rag.import_files(
    corpus_name=corpus_name,
    paths=["gs://andrewcooley-genai-tests/rag_data"]
)

In [4]:
result

imported_rag_files_count: 176

In [5]:
rag_files = list(rag.list_files(corpus_name=corpus_name))
len(rag_files)

176

In [6]:
text = "2013 earnings"

response = rag.retrieval_query(
    rag_corpora=[corpus_name],
    text=text,
    similarity_top_k=5,
  #  vector_distance_threshold=0.5
  )

In [7]:
response

contexts {
  contexts {
    source_uri: "gs://andrewcooley-genai-tests/rag_data/2014Q3_google_earnings_release.pdf"
    text: "except share amounts \r\nwhich are reflected in thousands and per share amounts, unaudited):\r\nThree Months Ended September 30, 2013 Three Months Ended September 30, 2014\r\nGAAP\r\nActual\r\nOperating\r\nMargin (a) Adjustments\r\nNon￾GAAP\r\nResults\r\nNon￾GAAP\r\nOperating\r\nMargin (e)\r\nGAAP\r\nActual\r\nOperating\r\nMargin (a) Adjustments\r\nNon￾GAAP\r\nResults\r\nNon-GAAP\r\nOperating\r\nMargin (e)\r\n$ 856 (b) $ 1,255 (f)\r\n— 378 (g)\r\nIncome from\r\noperations $ 3,761 27.3% $ 856 $ 4,617 33.6% $ 3,724 22.5% $ 1,633 $ 5,357 32.4%\r\n$ 856 (b) $ 1,255 (f)\r\n(200) (c) (258) (c)\r\n193 (d) 185 (d)\r\n— 378 (g)\r\nNet income $ 2,970 $ 849 $ 3,819 $ 2,813 $ 1,560 $ 4,373\r\nNet income per\r\nshare - diluted $ 4.38 $ 5.63 $ 4.09 $ 6.35\r\nShares used in per\r\nshare calculation -\r\ndiluted 678,470 678,470 688,215 688,215\r\n(a) Operating margin is define

In [8]:
import vertexai
from vertexai.preview.generative_models import Tool, Part, FinishReason, GenerativeModel
import vertexai.preview.generative_models as generative_models


In [9]:
tools = [Tool.from_retrieval(retrieval=rag.Retrieval(source=rag.VertexRagStore(rag_corpora=[corpus_name], similarity_top_k=3))),
         # Tool.from_google_search_retrieval(google_search_retrieval=generative_models.grounding.GoogleSearchRetrieval(disable_attribution=False))
        ]

In [21]:
model = GenerativeModel(
    "gemini-1.5-flash-preview-0514",
    system_instruction="If retrieved content does NOT contain relevant information, use your own knowledge to answer question.",
    tools=tools,
  )

responses = model.generate_content("""Has Alphabet been profitable recently?""").text

responses


"In 2015, Alphabet, Google's parent company, reported net income of 21.8%. In 2017, Alphabet reported net income of $12.7 billion, with diluted net income per share of $18.00.  \n"

In [None]:
generation_config = {
  "temperature": 0.2,
  "max_output_tokens": 2048,
  "candidate_count": 1,
  "top_p": 0.1
}

rag_model = GenerativeModel(
    model_name="gemini-1.5-flash-preview-0514",
    tools=tools
)

In [None]:
rag_response = rag_model.generate_content("Any good pizza recommendations for Dallas, TX?").text

In [None]:
rag_response

In [None]:
generation_config = {
  "temperature": 0.2,
  "max_output_tokens": 2048,
  "candidate_count": 1,
  "top_p": 0.1
}

summary_model = GenerativeModel(
    generation_config=generation_config,
    model_name="gemini-1.5-flash-preview-0514"
)

print(summary_model.generate_content(f"Use the table below to answer this question: was Google Cloud profitable in Q3 2023?\n\n{rag_response}").text)

In [None]:
text = "When did Google acquire FitBit?"


response = rag.retrieval_query(
    rag_corpora=[corpus_name],
    text=text,
    similarity_top_k=2,
  )

In [None]:
response

In [None]:
text = "When did Google acquire FitBit?"

response = rag_model.generate_content(
    text)

print(response.text)

In [None]:
files = rag.list_files(corpus_name=corpus_name)
for file in files:
    print(file)

In [None]:
rag.delete_corpus(corpus_name)