# Embeddings OpenAI

In [1]:
from langchain_openai import OpenAIEmbeddings

embedding_model = OpenAIEmbeddings()

# Embedding Documents

In [2]:
embeddings = embedding_model.embed_documents(
    [
        'Eu gosto de cachorros',
        'Eu gosto de animais',
        'O tempo está ruim lá fora'
    ]
)

In [4]:
len(embeddings)

3

In [6]:
embeddings[0][:10]

[-0.011923524551093578,
 0.00309882964938879,
 -0.008522930555045605,
 -0.028405683115124702,
 -0.021347152069211006,
 0.00406539486721158,
 0.007407780271023512,
 -0.014337639324367046,
 0.00388464261777699,
 -0.007848938927054405]

In [8]:
for emb in embeddings:
    print(len(emb), max(emb), min(emb))

1536 0.24646034836769104 -0.6615411639213562
1536 0.23291762173175812 -0.6546251773834229
1536 0.23204083740711212 -0.6500335931777954


In [9]:
import numpy as np

np.dot(embeddings[0], embeddings[1])

0.9291416904446919

In [10]:
for i in range(len(embeddings)):
    for j in range(len(embeddings)):
        print(round(np.dot(embeddings[i], embeddings[j]), 2), end=' | ')
    print()

1.0 | 0.93 | 0.81 | 
0.93 | 1.0 | 0.8 | 
0.81 | 0.8 | 1.0 | 


# Embedding Query

In [11]:
pergunta = 'O que é um cachorro?'

emb_query =  embedding_model.embed_query(pergunta)

emb_query[:10]

[0.005100993439555168,
 0.003876505186781287,
 -0.0046824184246361256,
 -0.006441058591008186,
 -0.017842544242739677,
 0.013619309291243553,
 0.0015954270493239164,
 -0.002005412010475993,
 -0.002075695199891925,
 0.005591413471847773]

# Embedding HugginFace

In [12]:
from langchain_community.embeddings.huggingface import HuggingFaceBgeEmbeddings

embedding_model = HuggingFaceBgeEmbeddings(model_name='all-MiniLM-l6-v2')

  from .autonotebook import tqdm as notebook_tqdm


In [14]:
embeddings = embedding_model.embed_documents(
    [
        'Eu gosto de cachorros',
        'Eu gosto de animais',
        'O tempo está ruim lá fora'
    ]
)

In [16]:
for i in range(len(embeddings)):
    for j in range(len(embeddings)):
        print(round(np.dot(embeddings[i], embeddings[j]), 2), end=' | ')
    print()

1.0 | 0.68 | 0.4 | 
0.68 | 1.0 | 0.49 | 
0.4 | 0.49 | 1.0 | 


In [17]:
for emb in embeddings:
    print(len(emb), max(emb), min(emb))

384 0.1526719480752945 -0.13653476536273956
384 0.1252877563238144 -0.1534135788679123
384 0.12505865097045898 -0.13441649079322815
