In [1]:
from vertexai import rag
from vertexai.generative_models import GenerativeModel, Tool
import vertexai

In [6]:
PROJECT_ID="kevin-ai-playground" # @param {type:"string"}
REGION="asia-northeast3" # @param {type:"string"}
CORPUS_ID="5764607523034234880" # @param {type:"string"}

In [3]:
# Initialize Vertex AI API once per session
vertexai.init(project=PROJECT_ID, location=REGION)

In [8]:
CORPUS_NAME=f"projects/{PROJECT_ID}/locations/{REGION}/ragCorpora/{CORPUS_ID}"
print(CORPUS_NAME)

projects/kevin-ai-playground/locations/asia-northeast3/ragCorpora/5764607523034234880


In [9]:
# Get the existing RagCorpus by its full resource name
rag_corpus = rag.get_corpus(name=CORPUS_NAME)
print(f"Successfully retrieved RAG Corpus: {rag_corpus.name}")
print(f"Display Name: {rag_corpus.display_name}")

Successfully retrieved RAG Corpus: projects/kevin-ai-playground/locations/asia-northeast3/ragCorpora/5764607523034234880
Display Name: parsed_jsonl_docs


In [10]:
rag.list_files(corpus_name=CORPUS_NAME)

ListRagFilesPager<rag_files {
  name: "projects/834471899683/locations/asia-northeast3/ragCorpora/5764607523034234880/ragFiles/5570831911093208782"
  display_name: "parsed_docs.jsonl"
  create_time {
    seconds: 1763606542
    nanos: 358625000
  }
  update_time {
    seconds: 1763606542
    nanos: 358625000
  }
  gcs_source {
    uris: "gs://kevin-vertexai-002/parsed_docs.jsonl"
  }
  file_status {
    state: ACTIVE
  }
}
>

In [11]:
# Direct context retrieval
rag_retrieval_config = rag.RagRetrievalConfig(
    top_k=20,  # Optional
    filter=rag.Filter(vector_distance_threshold=0.3),  # Optional
    ranking=rag.Ranking(
        rank_service=rag.RankService(
            model_name="gemini-2.5-flash"
        )
    )
)

In [12]:
response = rag.retrieval_query(
    rag_resources=[
        rag.RagResource(
            rag_corpus=rag_corpus.name,
            # Optional: supply IDs from `rag.list_files()`.
            # rag_file_ids=["rag-file-1", "rag-file-2", ...],
        )
    ],
    text="Did the typical family net worth increase? If so, by how much?",
    rag_retrieval_config=rag_retrieval_config,
)
print(response)

contexts {
  contexts {
    source_uri: "gs://kevin-vertexai-002/parsed_docs.jsonl"
    text: "uri gs://kevin-ai-playground-34068/scf23.pdf\nid c24\ncontent # Net Worth\n\nThe net improvements in economic perfor- mance, including rising house and corporate equity prices that well exceeded consumer price inflation, supported substantial increases in median and mean inflation- adjusted net worth-the difference between Figure 2. Change in median and mean family net worth, 2016–22 surveys 40 Percent 35 30 25 20 15 10 5 0 2016-19 Median families\' assets and liabilities-between 2019 and 2022 (figure 2).18 Specifically, real median net worth surged 37 percent to $192,900, and real mean net worth increased 23 percent to $1,063,700 (table 2), acceler- ating the steady growth experienced over the 2013-19 period.19 The 2019–22 changes imply some narrowing of the wealth distribu- tion between surveys. Indeed, growth in median net worth was the largest increase over the history of the modern SCF, 

In [14]:
rag_retrieval_tool = Tool.from_retrieval(
    retrieval=rag.Retrieval(
        source=rag.VertexRagStore(
            rag_resources=[
                rag.RagResource(
                    rag_corpus=CORPUS_NAME,
                )
            ],
            rag_retrieval_config=rag_retrieval_config
        ),
    )
)


In [15]:
rag_model = GenerativeModel(
    model_name="gemini-2.5-flash", tools=[rag_retrieval_tool],
)



In [18]:
response = rag_model.generate_content("Did the amount of debt families own on their home increase between 2019 and 2022?")
print(response.text)

The amount of debt families owned on their primary residence was largely unchanged between 2019 and 2022. The median amount of this debt decreased by less than 1 percent to $155,600 in 2022, while the mean amount grew by 1 percent to $212,400.
