In [1]:
#Load Environment Variables and API Key

import os
from dotenv import load_dotenv
load_dotenv()

True

In [2]:
#Retrieve Google API Key from Environment

google_api_key = os.getenv("GOOGLE_API_KEY")

In [3]:
#Validate Google API Key

if google_api_key is None:
    print("Google API key not found. Please set the GOOGLE_API_KEY environment variable.")
else:
    print("Google API key loaded successfully.")

Google API key loaded successfully.


In [45]:
#Import required Libraries

from llama_index.core import (VectorStoreIndex, SimpleDirectoryReader,
                              StorageContext, load_index_from_storage)
from llama_index.core import Settings
from llama_index.llms.google_genai import GoogleGenAI
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from sentence_transformers import SentenceTransformer
from google import genai
from IPython.display import Markdown, display

In [5]:
#Initialize Google Generative AI Client

client = genai.Client(api_key=google_api_key)

In [None]:
#List all available Models

for model in client.models.list():
    display(Markdown(f" *Model*: {model.name} | {model.supported_actions}"))

In [None]:
#List Models that support `generateContent`

for model in client.models.list():
    if 'generateContent' in model.supported_actions:
        display(Markdown(f" *Model*: {model.name} | {model.supported_actions}"))

In [33]:
documents = SimpleDirectoryReader("../data")

In [34]:
docs = documents.load_data()

In [35]:
display(Markdown(f"{docs[0].text[:600]}"))

This dataset provides an extensive collection of general knowledge, formatted into detailed descriptive paragraphs, suitable for training or testing a comprehensive Question Answering (QA) system.

Category: History
History encompasses the systematic study of past events, particularly in human affairs, often relying on critical analysis of primary sources (such as documents, artifacts, and eyewitness accounts) and secondary sources (interpretations by historians), alongside archaeological findings and oral traditions. George Washington, a figure of immense significance in American history, 

In [16]:
model = GoogleGenAI(model="gemini-2.5-flash")

In [None]:
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

In [47]:
Settings.llm = model
Settings.embed_model = embed_model
Settings.chunk_size = 1000
Settings.chunk_overlap = 50

In [50]:
index = VectorStoreIndex.from_documents(docs, model=model, embed_model=embed_model, chunk_size=5000, chunk_overlap=50)

In [51]:
index.storage_context.persist()

In [52]:
query_engine = index.as_query_engine()

In [55]:
response = query_engine.query("What is the capital of France?")

In [61]:
display(Markdown(f"*Query Response*: {response}"))

*Query Response*: Paris is the capital and most populous city of France.

In [62]:
response = query_engine.query("What is Machine Learning?")

In [63]:
display(Markdown(f"*Query Response*: {response}"))

*Query Response*: Machine learning is a subset of artificial intelligence that enables systems to learn from data without explicit programming. It uses algorithms to identify patterns and make predictions.