In [2]:
import os
from openai import OpenAI

In [3]:
from dotenv import load_dotenv
load_dotenv()

True

In [4]:
token = os.environ["GITHUB_TOKEN"]
endpoint = "https://models.inference.ai.azure.com"
model_name = "text-embedding-3-small"

In [5]:
client = OpenAI(
    base_url=endpoint,
    api_key=token,
)
# define the client talks to the server

In [6]:
# This demonstrates how to generate and process embeddings using a client library for an embedding model. 
# Embeddings are numerical representations of input data (e.g., text) in a high-dimensional space, 
# often used in machine learning tasks like semantic similarity, clustering, or search.

#Input a fixed list of phrases to get their embeddings here we are using cloud embedding model
response = client.embeddings.create(
    input=["first phrase", "second phrase", "third phrase"],
    model=model_name,
)

print(response.data)
 




[Embedding(embedding=[-0.00721184303984046, 0.007491494063287973, -0.0189374890178442, -0.010083189234137535, 0.025334011763334274, 0.008326508104801178, 0.023348884657025337, 0.02906794287264347, -0.02553882636129856, 0.045563410967588425, 0.03174629062414169, -0.05214899033308029, 0.009445112198591232, -0.01767709106206894, 0.004600455053150654, 0.01758255995810032, -0.009326949715614319, 0.01511690579354763, 0.018401820212602615, 0.05426016077399254, 0.03648854047060013, 0.004596516024321318, -0.0828712061047554, 0.04379885271191597, -0.016999626532197, -0.00883854553103447, 0.0030013241339474916, 0.03863121569156647, 0.00830287579447031, -0.016810566186904907, 0.04644569009542465, -0.04691833630204201, -0.008090183138847351, -0.02470381185412407, 0.023616718128323555, 0.017393501475453377, -0.03655155748128891, -0.01780313067138195, -0.004974635783582926, -0.04742249846458435, 0.026169026270508766, -0.039481986314058304, 0.021568570286035538, 0.026925265789031982, -0.01419523917138

In [7]:
for item in response.data:
    length = len(item.embedding)
    print(
        f"data[{item.index}]: length={length}, "
        f"[{item.embedding[0]}, {item.embedding[1]}, "
        f"..., {item.embedding[length-2]}, {item.embedding[length-1]}]"
    )
print(response.usage)

data[0]: length=1536, [-0.00721184303984046, 0.007491494063287973, ..., 0.01611734740436077, -0.004887983202934265]
data[1]: length=1536, [-0.003025691257789731, 0.009231699630618095, ..., 0.029947662726044655, 0.020937401801347733]
data[2]: length=1536, [-0.013795719482004642, 0.031857650727033615, ..., 0.017506178468465805, 0.0226223636418581]
Usage(prompt_tokens=6, total_tokens=6)


Embeddings created now store in database

pip install chromadb , for cloud based  use azure-ai-search-service  
pip install pypdf
pip install langchain-openai ,
pip install langchain-community

Langchain framwork  is used to do  archestration part 

In [8]:
from langchain_community.document_loaders  import DirectoryLoader ,PyPDFLoader
from langchain_community.vectorstores import Chroma
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings

In [9]:
dir = "docs/"

In [10]:
# load the documents from the directory for  creating chunks 
#directly we cannot load as there is a context limit LLM has
def load_docs(dir):
    loader = DirectoryLoader(dir,loader_cls=PyPDFLoader,use_multithreading=True,max_concurrency=120,show_progress=True,silent_errors=True)
    documents = loader.load()
    return documents


In [15]:
# split the documents into chunks -- context length , context time , and context overlap
# chunking is to reduce the size of each of context that is sent , it will reduce chance of hitting ratelimit

# eg: overlpa  --> its like To be continued on the next page 
def split_docs(documents,chunk_size=300,chunk_overlap=100):
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size,chunk_overlap=chunk_overlap)
    docs = text_splitter.split_documents(documents)
    return docs

In [16]:
documents = load_docs(dir)

100%|██████████| 1/1 [00:00<00:00, 33.39it/s]


In [17]:
print(len(documents))

1


In [18]:
doc=split_docs(documents)
print(len(doc))

2


In [19]:
# langchain client embeddings -- middleman b/w api and application
# match the compatibility with framework
clientopen = OpenAIEmbeddings(
    model = 'text-embedding-3-large',
    base_url =endpoint,
    api_key = token,

)

In [20]:
save_to = Chroma.from_documents(documents=doc,embedding=clientopen,persist_directory='./ai-tkt-dir')

In [21]:
query = "what is an AI Toolkit"

In [22]:
db1 = Chroma(persist_directory='./ai-tkt-dir',embedding_function=clientopen)
results = db1.similarity_search(query)
print(results)
print(results[0].page_content)

# Retrieval Phase completed

  db1 = Chroma(persist_directory='./ai-tkt-dir',embedding_function=clientopen)
Number of requested results 4 is greater than number of elements in index 2, updating n_results = 2


[Document(metadata={'creationdate': '2025-03-20T17:57:48+05:30', 'creator': 'PyPDF', 'page': 0, 'page_label': '1', 'producer': 'cairo 1.16.0 (https://cairographics.org)', 'source': 'docs/tkt.pdf', 'total_pages': 1}, page_content='File: /home/akv/Documents/tkt.pdf Page 1 of 1\nThe AI Toolkit for VS Code (AI Toolkit) is a VS Code extension that enables you to\ndownload, test, fine-tune, and deploy AI models with your apps or in the cloud. For more\ninformation, see the AI Toolkit overview.\nInstall the AI Toolkit for VS Code'), Document(metadata={'creationdate': '2025-03-20T17:57:48+05:30', 'creator': 'PyPDF', 'page': 0, 'page_label': '1', 'producer': 'cairo 1.16.0 (https://cairographics.org)', 'source': 'docs/tkt.pdf', 'total_pages': 1}, page_content='information, see the AI Toolkit overview.\nInstall the AI Toolkit for VS Code\nDownload a model from the catalog\nRun the model locally using the playground\nIntegrate an AI model into your application using REST or the ONNX Runtime')]
Fil