In [1]:
from langchain_openai import OpenAIEmbeddings
import keyring

OPENAI_API_KEY = keyring.get_password('openai', 'key_for_windows')

embeddings = OpenAIEmbeddings(model='text-embedding-3-large',  dimensions=1024, api_key=OPENAI_API_KEY)
text = "This is a test document."
query_result = embeddings.embed_query(text)
print(query_result)

[-0.019716789945960045, -0.03733178973197937, -0.027448374778032303, 0.07856690138578415, -0.030551016330718994, 0.029400035738945007, -0.031877145171165466, 0.08787482976913452, -0.02299458160996437, 0.026022160425782204, 0.025321563705801964, 0.03390387073159218, -0.02150581404566765, -0.06645659357309341, -0.009633203037083149, 0.05304517224431038, -0.03200225159525871, -0.0016607892466709018, -0.01776512898504734, -0.03232752904295921, 0.022706836462020874, 0.0061959014274179935, -0.056698281317949295, 0.06240313872694969, 0.021768536418676376, 0.023132199421525, -0.00028891791589558125, 0.010996864177286625, 0.025058839470148087, 0.005595389753580093, 0.022056281566619873, 0.06260330975055695, -0.0321773998439312, -0.03838268667459488, 0.07301217317581177, -0.005814326461404562, 0.06140228733420372, 0.08191975951194763, 0.021393217146396637, -0.023282326757907867, 0.04216090217232704, -0.016564104706048965, -0.030751187354326248, 0.03705655783414841, 0.035755448043346405, -0.00200

In [2]:
len(query_result)

1024

In [3]:
# Embed a single text
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(
    model='text-embedding-3-large',
    dimensions=1024,
    api_key=OPENAI_API_KEY
)
text = "LangChain is the framework for building context-aware reasoning applications"
single_vector = embeddings.embed_query(text)

# Display the first 100 characters of the embedding vector for readability
print("Embedding for single text (first 100 characters):")
print(str(single_vector)[:100])

Embedding for single text (first 100 characters):
[-0.02610638178884983, 0.005122559145092964, -0.04468365013599396, 0.00509954197332263, 0.0110174659


In [4]:
# Emebed multiple texts
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(
    model='text-embedding-3-large',
    dimensions=1024,
    api_key=OPENAI_API_KEY
)

text = "LangChain is the framework for building context-aware reasoning applications"

# Define an additional text to embed
text2 = "LangGraph is a library for building stateful, multi-actor applications with LLMs"

# Embed the texts
two_vestors = embeddings.embed_documents([text, text2])

# Display the first 100 characters of each embedding vector
print("Embeddings for multiple texts (first 100 characters of each):")
for i, vector in enumerate(two_vestors, start=1):
    print(f"Embedding {i}:", str(vector)[:100])

Embeddings for multiple texts (first 100 characters of each):
Embedding 1: [-0.02610638178884983, 0.005122559145092964, -0.04468365013599396, 0.00509954197332263, 0.0110174659
Embedding 2: [-0.013918550685048103, 0.03208278864622116, -0.05786428600549698, -0.002065209439024329, -0.0321980


In [5]:
# Indexing and retrieval
from langchain_openai import OpenAIEmbeddings
from langchain_core.vectorstores import InMemoryVectorStore

# Intialize embedding model
embeddings = OpenAIEmbeddings(
    model='text-embedding-3-large',
    dimensions=1024,
    api_key=OPENAI_API_KEY
)

# Sample document to index
text = 'LangChain is the framework for building context-aware reasoning apllications.'

# index the document in the InMemory Vectorstore
vectorstore = InMemoryVectorStore.from_texts(
    [text],
    embedding=embeddings
)
print("Document indexed succesfully.")

Document indexed succesfully.


In [6]:
# Using a vector store as a retriever
# Convert the vector store into a retriever
retriever = vectorstore.as_retriever()

# Convert the vector store into a retriever
retriever = vectorstore.as_retriever()

# Define a sample query
query = "What is LangChain?"

# Retrieve the most similar document(s)
retrieved_documents = retriever.invoke(query)

# Display the content of the retrieved document
print("Retrieved document content:")
print(retrieved_documents[0].page_content)

Retrieved document content:
LangChain is the framework for building context-aware reasoning apllications.


In [7]:
# Cosine similarity
from langchain_openai import OpenAIEmbeddings
import numpy as np

# Initialize embedding model
embeddings = OpenAIEmbeddings(
    model='text-embedding-3-large',
    dimensions=1024,
    api_key=OPENAI_API_KEY
)

# Define texts to embed
text1 = "LangGraph is a library for building stateful, multi-actor applications with LLMs."
text2 = "LangChain is a framework for building context-aware reasoning applications."
text3 = "The quick brown fox jumps over the lazy dog."

# Embed single and multiple texts
embedding1 = embeddings.embed_query(text1)
embedding2, embedding3 = embeddings.embed_documents([text2, text3])

# display first 10 values of each embedding for readability
print("Embedding for text1 (first 10 values):", embedding1[:10])
print("Embedding for text2 (first 10 values):", embedding2[:10])
print("Embedding for text3 (first 10 values):", embedding3[:10])

Embedding for text1 (first 10 values): [-0.02010262757539749, 0.030552953481674194, -0.05171961337327957, -0.0047073969617486, -0.03423906862735748, -0.02297171577811241, -0.016017500311136246, 0.02490977756679058, -0.021907683461904526, -0.0016578016802668571]
Embedding for text2 (first 10 values): [-0.036500561982393265, 0.004984983243048191, -0.04653669148683548, 0.006315839011222124, 0.0017685367492958903, -0.022209767252206802, -0.008509333245456219, 0.03749806806445122, -0.021354762837290764, 0.000759580172598362]
Embedding for text3 (first 10 values): [-0.01729467138648033, 0.01311655342578888, -0.015627989545464516, 0.02812810055911541, 0.0017836913466453552, -0.029269661754369736, 0.0137672433629632, 0.08548019826412201, -0.023744499310851097, 0.009286610409617424]


In [8]:
# Calculating cosine similarity
# Define a fucntion to calculate cosine similarity
def cosine_similarity(vec1, vec2):
    dot_product = np.dot(vec1, vec2)
    norm_vec1 = np.linalg.norm(vec1)
    norm_vec2 = np.linalg.norm(vec2)
    return dot_product / (norm_vec1 * norm_vec2)

# Calculte similarity scores
similarity_1_2 = cosine_similarity(embedding1, embedding2)
similarity_1_3 = cosine_similarity(embedding1, embedding3)
similarity_2_3 = cosine_similarity(embedding2, embedding3)

# Display similarity scores
print("Cosine Similarity between text1 and text2:", similarity_1_2)
print("Cosine Similarity between text1 and text3:", similarity_1_3)
print("Cosine Similarity between text2 and text3:", similarity_2_3)

Cosine Similarity between text1 and text2: 0.5348994244992196
Cosine Similarity between text1 and text3: 0.18250263323910468
Cosine Similarity between text2 and text3: 0.1856723049146393


In [9]:
# # Using Chroma as a free, local Vector Store
# from langchain_chroma import Chroma

# db = Chroma(embedding_function=embeddings)

# db.add_texts([text1, text2, text3])

# # Define a query to search
# query = "What is LangChain?"

# # Perform similarity search in Chroma
# docs = db.similarity_search(query)

# # Display the content of the retrieved document
# print("Most similar dodument to the query:")
# print(docs[0].page_content)

In [12]:
from langchain.vectorstores.deeplake import DeepLake

db = DeepLake(embedding=embeddings)

db.add_texts([text1, text2, text3])

# Define a query to search
query = "What is LangChain?"

# Perform similarity search in Chroma
docs = db.similarity_search(query)

# Display the content of the retrieved document
print("Most similar dodument to the query:")
print(docs[0].page_content)


ImportError: Could not import deeplake python package. Please install it with `pip install deeplake[enterprise]`.