In [1]:
from langchain_openai import OpenAIEmbeddings

# Initialize
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")

In [10]:
docs = [
    "Machine learning is a subset of AI",
    "Deep learning uses neural networks",
    "The weather is sunny today"
]

doc_vectors = embeddings.embed_documents(docs)
print(f"Query: {docs}")
print(f"\nVector dimensions: {len(doc_vectors)} documents")
print(f"First 5 values: {doc_vectors}")

Query: ['Machine learning is a subset of AI', 'Deep learning uses neural networks', 'The weather is sunny today']

Vector dimensions: 3 documents
First 5 values: [[-0.02326645515859127, -0.056493259966373444, -0.009426760487258434, -0.02003607153892517, 0.05049397796392441, -0.009998807683587074, 0.017478683963418007, 0.04637908190488815, -0.02728520706295967, 0.02042064070701599, -0.03257303684949875, -0.03753398358821869, -0.03491891175508499, 0.0044634100049734116, 0.0023639006540179253, -0.0020105773583054543, -0.010633346624672413, 0.008696078322827816, 0.04195653274655342, -0.010998687706887722, 0.00537916598841548, 0.038610778748989105, 0.022381944581866264, 0.0016416311264038086, 0.01980532892048359, -0.034957367926836014, 0.018920820206403732, 0.03761089965701103, -0.005859877914190292, 0.014729011803865433, 0.032880693674087524, -0.016911443322896957, -0.06360779702663422, -0.04522537440061569, 0.009883436374366283, 0.044725432991981506, 0.025650786235928535, -0.0019312599906

In [11]:
query = "What is machine learning?"
query_vector = embeddings.embed_query(query)

print(f"Query: {query}")
print(f"Vector dimensions: {len(query_vector)}")
print(f"First 5 values: {query_vector[:5]}")


Query: What is machine learning?
Vector dimensions: 1536
First 5 values: [-0.002476818859577179, -0.012755980715155602, -0.006645360495895147, -0.03157883137464523, 0.028759293258190155]


In [12]:
import numpy as np

def cosine_similarity(vec1, vec2):
    """Calculate cosine similarity between two vectors"""
    return np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))

# Test similarity
texts = [
    "I love programming",
    "I enjoy coding",
    "The weather is nice"
]

vectors = [embeddings.embed_query(text) for text in texts]

print("Similarity Scores:")
print(f"'{texts[0]}' vs '{texts[1]}': {cosine_similarity(vectors[0], vectors[1]):.4f}")
print(f"'{texts[0]}' vs '{texts[2]}': {cosine_similarity(vectors[0], vectors[2]):.4f}")
print("\nSimilar meanings = Higher similarity score!")

Similarity Scores:
'I love programming' vs 'I enjoy coding': 0.7529
'I love programming' vs 'The weather is nice': 0.2328

Similar meanings = Higher similarity score!
