In [None]:
import google.generativeai as genai
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity


google_api_key=""
MODEL = "models/embedding-001"

def gen_standard_embeddings (standard_diagnosis_list):
    # Configure Gemini API
    genai.configure(api_key=google_api_key)  

    # Get embeddings using embed_content
    standard_embeddings = [
        genai.embed_content(model= MODEL, content=text, task_type="retrieval_document")["embedding"]
        for text in standard_diagnosis_list
    ]
    return standard_embeddings



def gen_matched_strings (standard_embeddings,standard_diagnosis_list, to_be_matched_strings):
    genai.configure(api_key=google_api_key)
    # Compare a test diagnosis
    input_strings_embeddings=[]
    matched_strings=[]
    similarities =[]
    for to_be_matched_string in to_be_matched_strings:
        input_strings_embeddings.append (genai.embed_content(model=MODEL, content=to_be_matched_string, task_type="retrieval_query")["embedding"])

    # Compute cosine similarities
    for input_str_emb in input_strings_embeddings:
        similarity_score = cosine_similarity([input_str_emb], standard_embeddings)[0]
        #print (similarity_score)
        best_index = np.argmax(similarity_score)
        similarities.append (similarity_score[best_index])
        #print (best_index)
        matched_strings.append (standard_diagnosis_list[best_index])

    return similarities, matched_strings
    

In [36]:
standard_diagnosis = [
        "Hypertension, primary",
        "Type 2 diabetes mellitus",
        "Acute upper respiratory infection",
        "Major depressive disorder",
    ]

to_be_matched_strings=[
    "Depression",
    "T2 diab",
    "resp inf"
]

standard_embeddings = gen_standard_embeddings(standard_diagnosis)
similarities, matched_strings = gen_matched_strings (standard_embeddings,standard_diagnosis, to_be_matched_strings)

for input, matched in zip (to_be_matched_strings,matched_strings):
    print(f"Test Input: {input}")
    print(f"Best Match: {matched}")
    #print(f"Similarity Score: {similarities[best_match_index]:.4f}")

Test Input: Depression
Best Match: Major depressive disorder
Test Input: T2 diab
Best Match: Type 2 diabetes mellitus
Test Input: resp inf
Best Match: Acute upper respiratory infection
