# Embedding Model
### This model turns words and sentences into numbers (vectors). It's used to help computers understand the meaning and relationship between different pieces of text, which is great for tasks like search and recommendation.

In [5]:
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv

load_dotenv()

embeddings = OpenAIEmbeddings(model='text-embedding-3-large', dimensions=32)

result = embeddings.embed_query("Delhi is the capital of India.")

print(str(result))


[-0.06837411969900131, 0.19173479080200195, -0.012945176102221012, 0.4353785216808319, -0.17737263441085815, 0.05344773828983307, -0.14198017120361328, 0.0560637041926384, -0.13931292295455933, -0.13336287438869476, -0.12392488867044449, 0.13859480619430542, 0.00964957382529974, -0.20035208761692047, -0.22158755362033844, -0.10515150427818298, -0.20014691352844238, 0.1846562922000885, 0.08171041309833527, -0.22692206501960754, -0.0025085685774683952, -0.03785454109311104, -0.08591647446155548, -0.04267612099647522, -0.1638311743736267, 0.4837994873523712, 0.28765347599983215, -0.05590982362627983, -0.015195674262940884, 0.024172022938728333, 0.21009783446788788, 0.06611721217632294]


In [6]:
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv

load_dotenv()

embeddings = OpenAIEmbeddings(model='text-embedding-3-large', dimensions=32)

document = [
    "Delhi is capital of India.",
    "Bhopal is the capital of MP",
    "Mumbai is the capital of Maharashtra."
]

result = embeddings.embed_documents("Delhi is the capital of India.")

print(str(result))


[[0.11011563986539841, -0.11362789571285248, -0.04523652046918869, 0.2158084362745285, 0.23545104265213013, 0.028976088389754295, 0.03613067790865898, 0.32676962018013, -0.20227976143360138, 0.5091466307640076, -0.016333602368831635, -0.09444058686494827, -0.062049806118011475, 0.024211781099438667, -0.34758296608924866, 0.15649038553237915, -0.005126100964844227, 0.10003416985273361, -0.23102819919586182, 0.09099337458610535, 0.09463571012020111, -0.07805006951093674, 0.00565049983561039, 0.3056960999965668, 0.1510268896818161, 0.036390844732522964, 0.0771394819021225, 0.003888275707140565, -0.05196833610534668, 0.18367783725261688, 0.06627751886844635, 0.22751595079898834], [-0.07116422057151794, -0.08085905760526657, 0.0032058279030025005, 0.0673137903213501, 0.09199780225753784, 0.10341158509254456, -0.030906587839126587, 0.30005860328674316, -0.12328255921602249, 0.21713683009147644, -0.04503629356622696, -0.10994356870651245, -0.4491253197193146, -0.1823454201221466, -0.111800022

In [8]:
# from langchain_huggingface import HuggingFaceEmbeddings

# embeddings = HuggingFaceEmbeddings(
#     model_name ='sentence-transformer/all-MiniLM-L6-v2'
# )

# text = "I love to listen music"

# vector = embeddings.embed_query(text)

# print(str(vector))


# Document similarity 


In [10]:
from langchain_openai import OpenAIEmbeddings
from dotenv import load_dotenv
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

load_dotenv()

embedding = OpenAIEmbeddings(model = 'text-embedding-3-large', dimensions=300)

documents = [
    "Doraemon is a blue robotic cat from the 22nd century.",
    "He travels back in time to help a boy named Nobita Nobi.",
    "Doraemon has a magical pocket called the '4D pocket.'",
    "He uses gadgets from the future to solve problems.",
    "Nobita often gets into trouble due to laziness and poor grades.",
    "Doraemon always tries to help Nobita make better choices.",
    "Shizuka is Nobita’s kind and intelligent friend.",
    "Gian and Suneo often bully Nobita but are part of the friend group.",
    "Doraemon is scared of mice because he lost his ears to one.",
    "The cartoon teaches lessons about friendship, honesty, and hard work."
]


query = "What is the name of the futuristic robotic cat who helps Nobita using gadgets from his 4D pocket? "

doc_embedding = embedding.embed_documents(documents)

query_embedding = embedding.embed_query(query)

print(cosine_similarity([query_embedding], doc_embedding))



[[0.67690686 0.66038133 0.67626903 0.42819386 0.52127394 0.55332628
  0.5954287  0.45863574 0.42923128 0.28520346]]


In [11]:
scores =cosine_similarity([query_embedding], doc_embedding)[0]

index, score = (sorted(list(enumerate(scores)),key=lambda x :x[1])[-1])

print(query)

print(documents[index])


What is the name of the futuristic robotic cat who helps Nobita using gadgets from his 4D pocket? 
Doraemon is a blue robotic cat from the 22nd century.
