In [None]:
from dotenv import dotenv_values
config = dotenv_values(".env")
from langchain_openai import AzureOpenAIEmbeddings

embeddings = AzureOpenAIEmbeddings(
    model="text-embedding-3-small",
    azure_endpoint=config.get("AZURE_EMBEDDING_ENDPOINT"),
    api_key=config.get("AZURE_EMBEDDING_API_KEY")
)

In [8]:
import numpy as np

In [9]:
hello_embed = embeddings.embed_query("Hello, world!")

bye_embed = embeddings.embed_query("Bye, world!")

hi_embed = embeddings.embed_query("Hi, world!")

In [11]:
hello_embed = np.array(hello_embed)
hi_embed = np.array(hi_embed)
bye_embed = np.array(bye_embed)

In [15]:
print(f"hello magnitude: {np.linalg.norm(hello_embed)}")
print(f"hi magnitude: {np.linalg.norm(hi_embed)}")
print(f"bye magnitude: {np.linalg.norm(bye_embed)}")

hello magnitude: 1.0000000406837233
hi magnitude: 0.9999999324950786
bye magnitude: 1.0000000197725742


In [13]:
print(f"Hi and hello: {np.dot(hello_embed, hi_embed)}")
print(f"Bye and hello: {np.dot(hello_embed, bye_embed)}")
print(f"Hi and bye: {np.dot(hi_embed, bye_embed)}")



Hi and hello: 0.8020760969287266
Bye and hello: 0.4932629256090393
Hi and bye: 0.6295159724598807


In [19]:
lorem_text = "Uganda is a landlocked country in East Africa whose diverse landscape encompasses the snow-capped Rwenzori Mountains and immense Lake Victoria. Its abundant wildlife includes chimpanzees as well as rare birds. Remote Bwindi Impenetrable National Park is a renowned mountain gorilla sanctuary. Murchison Falls National Park in the northwest is known for its 43m-tall waterfall and wildlife such as hippos."
lorem_embed = np.array(embeddings.embed_query(lorem_text))

In [20]:
print(f"lorem_embed and hello: {np.dot(lorem_embed, hello_embed)}")
print(f"lorem_embed and hi: {np.dot(lorem_embed, hi_embed)}")
print(f"lorem_embed and bye: {np.dot(lorem_embed, bye_embed)}")

lorem_embed and hello: 0.04240616560348247
lorem_embed and hi: 0.09444964782851624
lorem_embed and bye: 0.07227772703831098


In [22]:
store_docs = [
"Uganda is a landlocked country in East Africa whose diverse landscape encompasses the snow-capped Rwenzori Mountains and immense Lake Victoria. Its abundant wildlife includes chimpanzees as well as rare birds. ",
"Formula One is the highest class of worldwide racing for open-wheel single-seater formula racing cars sanctioned by the Fédération Internationale de l'Automobile.",
"Kampala is Uganda's national and commercial capital bordering Lake Victoria, Africa's largest lake. Hills covered with red-tile villas and trees surround an urban centre of contemporary skyscrapers",
"Germany is a Western European country with a landscape of forests, rivers, mountain ranges and North Sea beaches. It has over 2 millennia of history."
]

store_embeddings = embeddings.embed_documents(store_docs)


In [24]:
store = np.array(store_embeddings)

In [25]:
store

array([[ 0.01806934, -0.01006123,  0.04370892, ...,  0.00443219,
        -0.05161   ,  0.03146809],
       [-0.03841206,  0.04251046,  0.01043942, ...,  0.04125585,
        -0.01247294,  0.01403075],
       [-0.00436758, -0.01459705,  0.07780901, ..., -0.00992012,
        -0.03556983,  0.02571263],
       [-0.01069064,  0.0028758 ,  0.07976863, ..., -0.03156284,
        -0.01689748, -0.00552643]], shape=(4, 1536))

In [35]:
def get_query_results(query):
    query_embed = np.array(embeddings.embed_query(query))
    query_embed = query_embed / np.linalg.norm(query_embed)

    scores = np.dot(store_embeddings, query_embed)

    index = np.argmax(scores)

    results = {score: doc for score, doc in zip(scores, store_docs)}

    return results, store_docs[index]





In [40]:
query = "Who is Jovery Musevenee?"

results = get_query_results(query)

In [41]:
results

({np.float64(0.2630237884321371): 'Uganda is a landlocked country in East Africa whose diverse landscape encompasses the snow-capped Rwenzori Mountains and immense Lake Victoria. Its abundant wildlife includes chimpanzees as well as rare birds. ',
  np.float64(0.018525482999005617): "Formula One is the highest class of worldwide racing for open-wheel single-seater formula racing cars sanctioned by the Fédération Internationale de l'Automobile.",
  np.float64(0.23078057396309537): "Kampala is Uganda's national and commercial capital bordering Lake Victoria, Africa's largest lake. Hills covered with red-tile villas and trees surround an urban centre of contemporary skyscrapers",
  np.float64(-0.018048681673581864): 'Germany is a Western European country with a landscape of forests, rivers, mountain ranges and North Sea beaches. It has over 2 millennia of history.'},
 'Uganda is a landlocked country in East Africa whose diverse landscape encompasses the snow-capped Rwenzori Mountains and 

In [42]:
store.shape

(4, 1536)

In [47]:
import chromadb

chroma_client = chromadb.Client()

In [72]:
from chromadb.utils import embedding_functions

azure_openai_ef = embedding_functions.OpenAIEmbeddingFunction(
    api_base=config.get("AZURE_EMBEDDING_BASE"),
    deployment_id="text-embedding-3-small",
    api_key=config.get("AZURE_EMBEDDING_API_KEY"),
    api_type="azure",
    api_version="2023-05-15",  # Use appropriate API version
    model_name="text-embedding-3-small"  # Your deployed model name in Azure
)

In [74]:
collection = chroma_client.get_or_create_collection(name="test2", embedding_function=azure_openai_ef)

In [75]:
collection.add(documents=store_docs, ids = [f"id{i}" for i in range(len(store_docs))])

In [76]:
collection.query(query_texts=["What is Uganda?"], n_results=2, where={"source": {"$ne": "wikipedia"}})

{'ids': [['id0', 'id2']],
 'embeddings': None,
 'documents': [['Uganda is a landlocked country in East Africa whose diverse landscape encompasses the snow-capped Rwenzori Mountains and immense Lake Victoria. Its abundant wildlife includes chimpanzees as well as rare birds. ',
   "Kampala is Uganda's national and commercial capital bordering Lake Victoria, Africa's largest lake. Hills covered with red-tile villas and trees surround an urban centre of contemporary skyscrapers"]],
 'uris': None,
 'included': ['metadatas', 'documents', 'distances'],
 'data': None,
 'metadatas': [[None, None]],
 'distances': [[0.6812670230865479, 0.9465869665145874]]}