In [3]:
from langchain_openai import OpenAIEmbeddings
import keyring

OPENAI_API_KEY = keyring.get_password('openai', 'key_for_mac')

embeddings = OpenAIEmbeddings(model='text-embedding-3-large',  dimensions=1024, api_key=OPENAI_API_KEY)
text = "This is a test document."
query_result = embeddings.embed_query(text)
print(query_result)

[-0.019731642678380013, -0.03733621910214424, -0.027476655319333076, 0.07862627506256104, -0.030604690313339233, 0.02947859838604927, -0.03183088079094887, 0.08793530613183975, -0.02298479713499546, 0.025975199416279793, 0.025399640202522278, 0.03388287127017975, -0.02145831659436226, -0.0665145292878151, -0.009665627032518387, 0.05315156280994415, -0.03203107416629791, -0.0016938307089731097, -0.017817284911870956, -0.03230633959174156, 0.02273455448448658, 0.0061716120690107346, -0.05670500919222832, 0.06246059387922287, 0.021783633157610893, 0.023147456347942352, -0.0003038103459402919, 0.010985657572746277, 0.02506181225180626, 0.005633590277284384, 0.022071411833167076, 0.06266079097986221, -0.03223126754164696, -0.0383872389793396, 0.07312093675136566, -0.005774351768195629, 0.06140957400202751, 0.08192948251962662, 0.021320683881640434, -0.02336016297340393, 0.04221595451235771, -0.01655355840921402, -0.03077986091375351, 0.03703593090176582, 0.03578471392393112, -0.001976917730

In [4]:
len(query_result)

1024

In [5]:
# Embed a single text
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(
    model='text-embedding-3-large',
    dimensions=1024,
    api_key=OPENAI_API_KEY
)
text = "LangChain is the framework for building context-aware reasoning applications"
single_vector = embeddings.embed_query(text)

# Display the first 100 characters of the embedding vector for readability
print("Embedding for single text (first 100 characters):")
print(str(single_vector)[:100])

Embedding for single text (first 100 characters):
[-0.026146935299038887, 0.005094355903565884, -0.04464210569858551, 0.005107142962515354, 0.01105823


In [6]:
# Emebed multiple texts
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(
    model='text-embedding-3-large',
    dimensions=1024,
    api_key=OPENAI_API_KEY
)

text = "LangChain is the framework for building context-aware reasoning applications"

# Define an additional text to embed
text2 = "LangGraph is a library for building stateful, multi-actor applications with LLMs"

# Embed the texts
two_vestors = embeddings.embed_documents([text, text2])

# Display the first 100 characters of each embedding vector
print("Embeddings for multiple texts (first 100 characters of each):")
for i, vector in enumerate(two_vestors, start=1):
    print(f"Embedding {i}:", str(vector)[:100])

Embeddings for multiple texts (first 100 characters of each):
Embedding 1: [-0.026129402220249176, 0.005087253171950579, -0.044647105038166046, 0.005156311206519604, 0.0111106
Embedding 2: [-0.013998507522046566, 0.03207070752978325, -0.057838719338178635, -0.0020668695215135813, -0.03220


In [7]:
# Indexing and retrieval
from langchain_openai import OpenAIEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore

# Intialize embedding model
embeddings = OpenAIEmbeddings(
    model='text-embedding-3-large',
    dimensions=1024,
    api_key=OPENAI_API_KEY
)

# Sample document to index
text = 'LangChain is the framework for building context-aware reasoning apllications.'

# index the document in the InMemory Vectorstore
vectorstore = InMemoryVectorStore.from_texts(
    [text],
    embedding=embeddings
)
print("Document indexed succesfully.")

Document indexed succesfully.


In [8]:
# Using a vector store as a retriever
# Convert the vector store into a retriever
retriever = vectorstore.as_retriever()

# Convert the vector store into a retriever
retriever = vectorstore.as_retriever()

# Define a sample query
query = "What is LangChain?"

# Retrieve the most similar document(s)
retrieved_documents = retriever.invoke(query)

# Display the content of the retrieved document
print("Retrieved document content:")
print(retrieved_documents[0].page_content)

Retrieved document content:
LangChain is the framework for building context-aware reasoning apllications.


In [9]:
# Cosine similarity
from langchain_openai import OpenAIEmbeddings
import numpy as np

# Initialize embedding model
embeddings = OpenAIEmbeddings(
    model='text-embedding-3-large',
    dimensions=1024,
    api_key=OPENAI_API_KEY
)

# Define texts to embed
text1 = "LangGraph is a library for building stateful, multi-actor applications with LLMs."
text2 = "LangChain is a framework for building context-aware reasoning applications."
text3 = "The quick brown fox jumps over the lazy dog."

# Embed single and multiple texts
embedding1 = embeddings.embed_query(text1)
embedding2, embedding3 = embeddings.embed_documents([text2, text3])

# display first 10 values of each embedding for readability
print("Embedding for text1 (first 10 values):", embedding1[:10])
print("Embedding for text2 (first 10 values):", embedding2[:10])
print("Embedding for text3 (first 10 values):", embedding3[:10])

Embedding for text1 (first 10 values): [-0.020116712898015976, 0.030621476471424103, -0.051706984639167786, -0.004699123557657003, -0.03421171009540558, -0.02294711023569107, -0.015966098755598068, 0.024903694167733192, -0.021978316828608513, -0.0016965760150924325]
Embedding for text2 (first 10 values): [-0.036500561982393265, 0.004984983243048191, -0.04653669148683548, 0.006315839011222124, 0.0017685367492958903, -0.022209767252206802, -0.008509333245456219, 0.03749806806445122, -0.021354762837290764, 0.000759580172598362]
Embedding for text3 (first 10 values): [-0.01729467138648033, 0.01311655342578888, -0.015627989545464516, 0.02812810055911541, 0.0017836913466453552, -0.029269661754369736, 0.0137672433629632, 0.08548019826412201, -0.023744499310851097, 0.009286610409617424]


In [10]:
# Calculating cosine similarity
# Define a fucntion to calculate cosine similarity
def cosine_similarity(vec1, vec2):
    dot_product = np.dot(vec1, vec2)
    norm_vec1 = np.linalg.norm(vec1)
    norm_vec2 = np.linalg.norm(vec2)
    return dot_product / (norm_vec1 * norm_vec2)

# Calculte similarity scores
similarity_1_2 = cosine_similarity(embedding1, embedding2)
similarity_1_3 = cosine_similarity(embedding1, embedding3)
similarity_2_3 = cosine_similarity(embedding2, embedding3)

# Display similarity scores
print("Cosine Similarity between text1 and text2:", similarity_1_2)
print("Cosine Similarity between text1 and text3:", similarity_1_3)
print("Cosine Similarity between text2 and text3:", similarity_2_3)

Cosine Similarity between text1 and text2: 0.534822993225475
Cosine Similarity between text1 and text3: 0.18252651833157857
Cosine Similarity between text2 and text3: 0.18567230491463935


In [17]:
# Using Chroma as a free, local Vector Store
from langchain_community.vectorstores import Chroma

db = Chroma(embedding_function=embeddings)

db.add_texts([text1, text2, text3])

# Define a query to search
query = "What is LangChain?"

# Perform similarity search in Chroma
docs = db.similarity_search(query)

# Display the content of the retrieved document
print("Most similar dodument to the query:")
print(docs[0].page_content)

Number of requested results 4 is greater than number of elements in index 3, updating n_results = 3


Most similar dodument to the query:
LangChain is a framework for building context-aware reasoning applications.
