## open ai embedding

In [None]:
import os
import numpy as np
from dotenv import load_dotenv
from langchain_openai import OpenAIEmbeddings

In [12]:
load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

In [13]:
embeddings=OpenAIEmbeddings(model="text-embedding-3-small")

In [18]:
## single text embedding
text = "Hello, I am Learning about embedding"
text_embedding = embeddings.embed_query(text)
print(f"Text: {text}")
print(f"Embedding length: {len(text_embedding)}")
print(text_embedding)

Text: Hello, I am Learning about embedding
Embedding length: 1536
[-0.0004855790757574141, -0.030097153037786484, 0.035780616104602814, 0.0005166386254131794, 0.0035189171321690083, -0.0007336181006394327, 0.03093707375228405, 0.0052810003980994225, -0.04319991171360016, 0.03751645237207413, 0.017932303249835968, -0.02067604474723339, -0.02171194553375244, -0.056526653468608856, 0.0077902632765471935, 0.02476365678012371, 0.0005980059504508972, 0.02838931418955326, 0.02574356459081173, 0.07777664065361023, 0.02791335992515087, -0.014768603257834911, -0.03552863746881485, 0.06025030091404915, 0.006866350304335356, -0.06836953014135361, 0.03421276435256004, 0.06086624413728714, -0.005120015703141689, -0.0032669410575181246, 0.01646244339644909, -0.031665004789829254, -0.05294299125671387, -0.011261934414505959, -0.013991676270961761, 0.012052860110998154, 0.0052740010432899, 0.032140959054231644, -0.023447781801223755, 0.03396078571677208, 0.03519267216324806, -0.014390639029443264, -0.0

In [None]:
## multiple embedding
sentences = [
    "Hello, I am Learning about embedding",
    "Machine learning is fascinating",
    "Python is great for data science",
    "I love coding in python",
    "Embeddings convert text to vectors"
]

sentences_embedding = embeddings.embed_documents(sentences)
print(f"Text: {sentences}")
print(f"Embedding length: {len(sentences_embedding)}")
for i in range(len(sentences_embedding)):
    print(sentences_embedding[i])


Text: ['Hello, I am Learning about embedding', 'Machine learning is fascinating', 'Python is great for data science', 'Embeddings convert text to vectors']
Embedding length: 4
[-0.0004855790757574141, -0.030097153037786484, 0.035780616104602814, 0.0005166386254131794, 0.0035189171321690083, -0.0007336181006394327, 0.03093707375228405, 0.0052810003980994225, -0.04319991171360016, 0.03751645237207413, 0.017932303249835968, -0.02067604474723339, -0.02171194553375244, -0.056526653468608856, 0.0077902632765471935, 0.02476365678012371, 0.0005980059504508972, 0.02838931418955326, 0.02574356459081173, 0.07777664065361023, 0.02791335992515087, -0.014768603257834911, -0.03552863746881485, 0.06025030091404915, 0.006866350304335356, -0.06836953014135361, 0.03421276435256004, 0.06086624413728714, -0.005120015703141689, -0.0032669410575181246, 0.01646244339644909, -0.031665004789829254, -0.05294299125671387, -0.011261934414505959, -0.013991676270961761, 0.012052860110998154, 0.0052740010432899, 0.03

## Cosine Similairty

In [23]:
## Cosine Similarity
def cosine_similarity(vec1, vec2):
    dot_product=np.dot(vec1, vec2)
    norm_a=np.linalg.norm(vec1)
    norm_b=np.linalg.norm(vec2)
    return dot_product/(norm_a * norm_b)

In [27]:
## Calculating the similarity
for i in range(len(sentences)):
    for j in range(i+1, len(sentences)):
        similarity = cosine_similarity(sentences_embedding[i], sentences_embedding[j])
        print(f"{sentences[i]} and {sentences[j]} have {similarity:.3f} similarity")

Hello, I am Learning about embedding and Machine learning is fascinating have 0.354 similarity
Hello, I am Learning about embedding and Python is great for data science have 0.182 similarity
Hello, I am Learning about embedding and Embeddings convert text to vectors have 0.468 similarity
Machine learning is fascinating and Python is great for data science have 0.392 similarity
Machine learning is fascinating and Embeddings convert text to vectors have 0.256 similarity
Python is great for data science and Embeddings convert text to vectors have 0.142 similarity
