# Setup

In [None]:
# %pip install -r requirements.txt

# Environment Variables

In [None]:
# Get environment variables

from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

# Google Colab Auth

In [None]:
# Check if running in Google Colab

import sys

# If running in Colab, use the permissions of the currently authenticated user
if "google.colab" in sys.modules:
	print('Running in Google Colab')
	
	from google.colab import auth
	
	auth.authenticate_user()

# If not, set the GOOGLE_APPLICATION_CREDENTIALS to the service account credentials file 
else:
	print("Running locally")

# Utilities

In [None]:
# Import the utils module

from utils import *

# Environment Checks

In [None]:
# Check GCP permissions

import os

import google.auth
from google.cloud import storage

PROJECT_ID = os.environ["PROJECT_ID"]
creds, _ = google.auth.default(quota_project_id=PROJECT_ID)

# Now, you can use the Google Cloud client libraries
client = storage.Client(credentials=creds)

# List all buckets in your project
buckets = list(client.list_buckets())
print(buckets)

In [None]:
# Check Google API key

import requests

GOOGLE_API_KEY = os.environ['GOOGLE_API_KEY']
url = f'https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent?key={GOOGLE_API_KEY}'
  
headers = {
	"Content-Type": "application/json",
}
  
data = {"contents":[{"parts":[{"text":"What was Uber's annual revenue for 2022?"}]}]}

response = requests.post(url, headers=headers, json=data)
print(response.text)

In [None]:
# Check Vertex AI Agent Builder data store

import json

import google.auth
from google.auth.transport.requests import Request
import requests

def query_chunks(query: str, page_size: int, access_token:str) -> str:
		
	PROJECT_ID = os.environ['PROJECT_ID']
	LOCATION_ID = os.environ['LOCATION_ID']
	DATA_STORE_ID = os.environ['DATA_STORE_ID']

	if LOCATION_ID == 'us':
		api_endpoint = 'us-discoveryengine.googleapis.com'
	else:
		api_endpoint = 'discoveryengine.googleapis.com'

	url = f"https://{api_endpoint}/v1alpha/projects/{PROJECT_ID}/locations/{LOCATION_ID}/collections/default_collection/dataStores/{DATA_STORE_ID}/servingConfigs/default_search:search"
	print(url)
	
	headers = {
			"Authorization": f"Bearer {access_token}",
			"Content-Type": "application/json",
			}
	
	post_data = {
			"servingConfig": f"projects/{PROJECT_ID}/locations/{LOCATION_ID}/collections/default_collection/dataStores/{DATA_STORE_ID}/servingConfigs/default_search",
			"pageSize": page_size,
			"query": query,
			"contentSearchSpec": {"searchResultMode": "CHUNKS"},
			}
	
	response = requests.post(url, headers=headers, json=post_data)

	if response.status_code != 200:
		print(f"Error retrieving search results: {response.status_code} - {response.text}")

	return response.json()

#####

def test_query_chunks():
	creds, _ = google.auth.default()
	creds.refresh(Request())
	access_token = creds.token
	print(access_token)
	response = query_chunks(query = "What is the annual revenue of Uber?", page_size=3, access_token=access_token)
	print(json.dumps(response, indent=4))

test_query_chunks()

# Test 2 - Gemini and Vertex AI

In [None]:
# Create chain using Vertex AI Agent Builder datastore

retreiver = create_retriever_vertexai()

chat_prompt_template = create_chat_prompt_template()

chain = create_chain_vertexai(model_name=os.environ["GOOGLE_LLM_MODEL_NAME"], prompt_template=chat_prompt_template, retriever=retreiver)

In [None]:
# Test the chain with a few questions 

questions = [
	"What is the annual revenue of Uber?",
	"What is the annual revenue of Lyft?",
	"How does Uber's revenue compare to Lyft's revenue?",
	"Summarize Lyft's risk factors",
	]

for question in questions:
	print(question)
	result = chain.invoke({"question" : question})
	print(result)
	print(result["response"].content)
	print("\n*****")

In [None]:
# Evaluate the chain using Ragas
ragas_results, ragas_results_df = run_ragas_evaluation(chain, testset_name="testsets/unittest_testset-gemini-2.0.csv",  eval_metrics=[answer_correctness, answer_relevancy, context_precision, context_recall, faithfulness,],use_google=True)
    
# Write the results to disk
ragas_results_df.to_csv(f"evaluations/10x_test2_testset_evaluation_{get_time_string()}.csv")

# Show the resutls
print(ragas_results)

In [None]:
# Build a heatmap showing the Ragas results

import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap

heatmap_data = ragas_results_df[
	[
		"answer_correctness",
		"answer_relevancy",
		"context_precision", 
		"context_recall", 
		"faithfulness",]
	]

cmap = LinearSegmentedColormap.from_list("green_red", ["red", "green"])

plt.figure(figsize=(10, 8))
sns.heatmap(heatmap_data, annot=True, fmt=".2f", linewidths=0.5, cmap=cmap)
plt.yticks(ticks=range(len(ragas_results_df["user_input"])), labels=ragas_results_df["user_input"].str[:100], rotation=0)
plt.show()