In [None]:
import os
from dotenv import load_dotenv
from google import genai
from google.genai import types

embedding_models = {
	"Legacy_1": "models/embedding-001",
	"Legacy_2": "models/text-embedding-004",
	"Current": "models/gemini-embedding-001",
}

chat_models = {
	"FLASH_LATEST": "gemini-2.5-flash-preview-09-2025",
	"PRO": "gemini-2.5-pro",
}


class GeminiModel:
	DEFAULT_CHAT_MODEL = chat_models["FLASH_LATEST"]
	DEFAULT_EMBEDDING_MODEL = embedding_models["Current"]

	def __init__(self, *, api_key=None):
		if api_key is None:
			load_dotenv()
			GOOGLE_API_KEY = self.getEnvAPIKey()
		else:
			GOOGLE_API_KEY = api_key
		self.client = genai.Client(api_key=GOOGLE_API_KEY)

	def getEnvAPIKey(self):
		load_dotenv()
		return os.environ.get("GEMINI_API_KEY")

	def startChat(self, model_name=None):

		if model_name is None:
			model_name = self.DEFAULT_CHAT_MODEL

		chat = self.client.chats.create(model=model_name)
		return chat

	def send_prompt(self, chat, prompt):
		response = chat.send_message(prompt)
		return response.text

	def getSemanticEmbedding(self, semantic_datum, *, model_name=None, task_type=None):
		if task_type is None:
			# task_type = "SEMANTIC_SIMILARITY"
			task_type = "FACT_VERIFICATION"
		if model_name is None:
			model_name = self.DEFAULT_EMBEDDING_MODEL
		result = self.client.models.embed_content(
			model=model_name,
			contents=semantic_datum,
			config=types.EmbedContentConfig(task_type=task_type),
		)
		return result.embeddings[0].values

	@staticmethod
	def preprocessStatement(statement):
		prefix = "Does the privacy policy affirm"
		replacement = "The privacy policy affirms"
		processed_text = ("The privacy policy affirms" + statement[len(prefix) :])[:-1] + "."

		return processed_text

	@staticmethod
	def preprocessStatement_alt(statement):
		prefix = "Does the privacy policy affirm that "
		processed_text = (statement[len(prefix) :].capitalize())[:-1] + "."
		return processed_text

In [None]:
import numpy as np


question_1 = "Does the privacy policy affirm that personal data transfers are automatically consented to by using the service?"
question_1_prime = "Does the privacy policy affirm that user data transfers are automatically consented to by using the service?"
question_2 = "Does the privacy policy affirm that personal data processing is automatically consented to by using the service?"
print("Original Qs:")
print("---")
print(question_1)
print(question_1_prime)
print(question_2)
print("---")
print()

s_1 = GeminiModel.preprocessStatement(question_1)
s_1_p = GeminiModel.preprocessStatement(question_1_prime)
s_2 = GeminiModel.preprocessStatement(question_2)
print("Preprocessing A:")
print("---")
print(s_1)
print(s_1_p)
print(s_2)
print("---")
print()


s_1_2 = GeminiModel.preprocessStatement_alt(question_1)
s_1_p_2 = GeminiModel.preprocessStatement_alt(question_1_prime)
s_2_2 = GeminiModel.preprocessStatement_alt(question_2)

print("Preprocessing B:")
print("---")
print(s_1_2)
print(s_1_p_2)
print(s_2_2)
print("---")
print()

model = GeminiModel()


def process_sim(emb_a, emb_b):
	arr_1 = np.array(emb_a)
	arr_2 = np.array(emb_b)
	dot_product = np.dot(arr_1, arr_2)
	norm1 = np.linalg.norm(arr_1)
	norm2 = np.linalg.norm(arr_2)
	similarity_score = dot_product / (norm1 * norm2)
	return similarity_score


task_type = None	# Preprocessing B comparison 0.028234191625243987, however None give 1,1_prime of 99.2%
# task_type = "QUESTION_ANSWERING"	# Preprocessing B  0.06292713395639515 , similar to None = 0.06241341674965495
# task_type = (
# 	"FACT_VERIFICATION"	# Preprocessing B 0.06857297559057307, None  0.04625901551552625
# )
# # task_type = "CLASSIFICATION"	# Preprocessing B 0.051221128737101496, None
# # task_type = "CLUSTERING"	# B 0.02915895877534147, A is very bad 0.009454020561961651
# # task_type = "RETRIEVAL_DOCUMENT"	# None  0.03241810464957673
# task_type = "RETRIEVAL_QUERY"	# B  0.062026106845692985
q_1_embed = model.getSemanticEmbedding(question_1, task_type=task_type)
q_1_prime_embed = model.getSemanticEmbedding(question_1_prime, task_type=task_type)
q_2_embed = model.getSemanticEmbedding(question_2, task_type=task_type)

sim_a = process_sim(q_1_embed, q_1_prime_embed)
sim_b = process_sim(q_1_embed, q_2_embed)
print("Original Q comparison:")
print("---")

print(f"1 vs 1 prime: {sim_a}")
print(f"1 vs 2: {sim_b}")
print(f"delta = {sim_a-sim_b}")
print("---")
print()

print("Preprocessing A comparison:")
print("---")

s_1_embed = model.getSemanticEmbedding(s_1, task_type=task_type)
s_1_prime_embed = model.getSemanticEmbedding(s_1_p, task_type=task_type)
s2_embed = model.getSemanticEmbedding(s_2, task_type=task_type)
sim_c = process_sim(s_1_embed, s_1_prime_embed)
sim_d = process_sim(s_1_embed, s2_embed)
print(f"statement 1 vs 1 prime: {sim_c}")
print(f"statement 1 vs 2: {sim_d}")
print(f"delta = {sim_c-sim_d}")
print("---")

print()

print("Preprocessing B comparison:")
print("---")
s_1_embed_2 = model.getSemanticEmbedding(s_1_2, task_type=task_type)
s_1_prime_embed_2 = model.getSemanticEmbedding(s_1_p_2, task_type=task_type)
s2_embed_2 = model.getSemanticEmbedding(s_2_2, task_type=task_type)
sim_c_2 = process_sim(s_1_embed_2, s_1_prime_embed_2)
sim_d_2 = process_sim(s_1_embed_2, s2_embed_2)
print(f"statement 1 vs 1 prime: {sim_c_2}")
print(f"statement 1 vs 2: {sim_d_2}")
print(f"delta = {sim_c_2-sim_d_2}")
print("---")
print()

Original Qs:
---
Does the privacy policy affirm that personal data transfers are automatically consented to by using the service?
Does the privacy policy affirm that user data transfers are automatically consented to by using the service?
Does the privacy policy affirm that personal data processing is automatically consented to by using the service?
---

Preprocessing A:
---
The privacy policy affirms that personal data transfers are automatically consented to by using the service.
The privacy policy affirms that user data transfers are automatically consented to by using the service.
The privacy policy affirms that personal data processing is automatically consented to by using the service.
---

Preprocessing B:
---
Personal data transfers are automatically consented to by using the service.
User data transfers are automatically consented to by using the service.
Personal data processing is automatically consented to by using the service.
---

Original Q comparison:
---
1 vs 1 prime: 