In [9]:
import os
import numpy as np
from scipy.spatial.distance import cosine
from google import genai
from google.genai import types

def get_text_similarity(text1, text2, api_key=None):
    """
    Calculate the semantic similarity between two texts using Google's Generative AI 
    embedding model.
    
    Args:
        text1 (str): First text input
        text2 (str): Second text input
        api_key (str, optional): Your Google AI API key. If None, will look for
                                GOOGLE_API_KEY environment variable
    
    Returns:
        float: Similarity score between 0 and 1 (higher means more similar)
    """
    # Configure the API key
    if api_key is None:
        api_key = os.environ.get("GOOGLE_API_KEY")
    
    if not api_key:
        raise ValueError("API key must be provided either as an argument or as GOOGLE_API_KEY environment variable")
    
    client = genai.Client(api_key=api_key)
    
    # Get the embedding model
    
    # Generate embeddings for both texts
    response1 = client.models.embed_content(
        model='text-embedding-004',
        contents=text1,
    )
    print(response1)
    response2 = client.models.embed_content(
        model='text-embedding-004',
        contents=text2,
    )
        
    # Extract the embedding values
    vector1 = np.array(response1["embedding"])
    vector2 = np.array(response2["embedding"])
    
    # Calculate cosine similarity (1 - cosine distance)
    similarity = 1 - cosine(vector1, vector2)
    
    return similarity




In [10]:
import dotenv
dotenv.load_dotenv()
API_KEY = os.getenv("GOOGLE_API_KEY") 

text1 = "The quick brown fox jumps over the lazy dog."
text2 = "A fast auburn fox leaps above the sleepy canine."

try:
    similarity_score = get_text_similarity(text1, text2, API_KEY)
    print(f"Similarity score: {similarity_score:.4f}")
    
    # Additional examples
    text3 = "Machine learning is a subfield of artificial intelligence."
    text4 = "Deep learning is a type of machine learning using neural networks."
    
    similarity_score2 = get_text_similarity(text3, text4, API_KEY)
    print(f"Similarity score: {similarity_score2:.4f}")

except Exception as e:
    print(f"Error: {e}")

embeddings=[ContentEmbedding(values=[-0.06261901, 0.008358474, 0.020931892, 0.023453966, -0.03660129, 0.033054803, 0.016852979, 0.036087364, 0.047807004, 0.0036566, -0.031072818, 0.0331268, 0.021608587, 0.057072442, 0.0018407678, -3.885652e-05, 0.012503638, 0.08669894, -0.054491583, -0.0005350425, 0.028513968, -0.0044726413, -0.0064341724, -0.042229954, 0.0042517767, -0.00911962, 0.004954402, -0.013468084, 0.008117534, -0.010380683, 0.021130672, 0.09230528, 0.012990522, -0.008506946, 0.042690475, 0.029525528, -0.04068141, 0.035252534, 0.06356755, 0.012869843, -0.08559919, -0.01675492, -0.073462166, 0.02371569, 0.016049176, -0.027477734, 0.00493844, 0.044440545, -0.021486944, 0.040557824, 0.04165276, 0.017904084, -0.028954932, 0.0040466893, -0.014781696, 0.034403805, -0.011937411, 0.020024376, -0.018938418, -0.022551354, 0.026943836, 0.0090188235, 0.0108811, 0.011751853, 0.012611736, -0.043945126, -0.06382962, -0.03539745, 0.013086459, 0.0082184505, -6.327254e-05, 0.04279918, -0.0162921