# Ollama Embedding

In [1]:
from langchain_ollama import OllamaEmbeddings

embeddings = (
    OllamaEmbeddings(model="gemma:2b") ## By default it uses llama2 model
)
embeddings

OllamaEmbeddings(model='gemma:2b', validate_model_on_init=False, base_url=None, client_kwargs={}, async_client_kwargs={}, sync_client_kwargs={}, mirostat=None, mirostat_eta=None, mirostat_tau=None, num_ctx=None, num_gpu=None, keep_alive=None, num_thread=None, repeat_last_n=None, repeat_penalty=None, temperature=None, stop=None, tfs_z=None, top_k=None, top_p=None)

In [2]:
r1 = embeddings.embed_documents(
    [
        "Alpha is the first letter of Greek alphabet",
        "Beta is the second letter of Greek alphabet",
    ]
)
r1

[[-0.035815116,
  -0.013962773,
  0.0052150865,
  0.043568313,
  -0.0055166003,
  0.0045032105,
  -0.011376285,
  -0.006312422,
  -0.0014207044,
  -0.015014854,
  0.009569753,
  0.008638977,
  0.013403333,
  -0.0146921,
  -0.011642571,
  -0.0017025727,
  0.06274001,
  -0.010987698,
  0.017832292,
  0.0033997174,
  0.007944931,
  -0.0065994123,
  0.0038537516,
  0.0033534549,
  -0.016253492,
  -0.010046466,
  -0.009217481,
  0.0017356202,
  0.0044503044,
  0.024190327,
  -0.0035104128,
  0.00015908925,
  0.0191279,
  0.009758478,
  -0.023555536,
  -0.0012452665,
  -0.017024608,
  0.012699231,
  0.0048338133,
  -0.004126083,
  0.021962846,
  0.00957391,
  0.015496152,
  -0.0014647716,
  -0.00084738474,
  -0.02366197,
  0.024617458,
  0.00033993565,
  -0.022350337,
  0.0039692856,
  -0.26383677,
  -0.15769972,
  -0.019034714,
  -0.0037712734,
  -0.028734174,
  -0.010687475,
  -0.016549073,
  0.0057868897,
  -0.0112727685,
  0.012421704,
  -0.0033519655,
  -0.008743162,
  -0.017213149,
  -

In [3]:
len(r1[0])

2048

In [4]:
text = "What is the second letter of Greek alphabet?"
query_result = embeddings.embed_query(text)
len(query_result)

2048

In [5]:
## Text Loader
from langchain_community.document_loaders import TextLoader

loader = TextLoader('../resources/speech.txt')
text_documents=loader.load()
text_documents

[Document(metadata={'source': '../resources/speech.txt'}, page_content='In deep learning, transformer is a neural network architecture based on the multi-head attention mechanism, in which text is converted to numerical representations called tokens, and each token is converted into a vector via lookup from a word embedding table.[1] At each layer, each token is then contextualized within the scope of the context window with other (unmasked) tokens via a parallel multi-head attention mechanism, allowing the signal for key tokens to be amplified and less important tokens to be diminished.\n\nTransformers have the advantage of having no recurrent units, therefore requiring less training time than earlier recurrent neural architectures (RNNs) such as long short-term memory (LSTM).[2] Later variations have been widely adopted for training large language models (LLMs) on large (language) datasets.[3]\n\nThe modern version of the transformer was proposed in the 2017 paper "Attention Is All Y

In [6]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
final_documents = text_splitter.split_documents(text_documents)
final_documents

[Document(metadata={'source': '../resources/speech.txt'}, page_content='In deep learning, transformer is a neural network architecture based on the multi-head attention mechanism, in which text is converted to numerical representations called tokens, and each token is converted into a vector via lookup from a word embedding table.[1] At each layer, each token is then contextualized within the scope of the context window with other (unmasked) tokens via a parallel multi-head attention mechanism, allowing the signal for key tokens to be amplified and less important'),
 Document(metadata={'source': '../resources/speech.txt'}, page_content='for key tokens to be amplified and less important tokens to be diminished.'),
 Document(metadata={'source': '../resources/speech.txt'}, page_content='Transformers have the advantage of having no recurrent units, therefore requiring less training time than earlier recurrent neural architectures (RNNs) such as long short-term memory (LSTM).[2] Later varia

In [7]:
## Vector Embedding and Vector Store DB
from langchain_community.vectorstores import Chroma

db = Chroma.from_documents(final_documents, embedding=embeddings)
db

<langchain_community.vectorstores.chroma.Chroma at 0x118a7a6e0>

In [8]:
## Retrieve the results from Query Vector Store DB
query = "Que es un Transformer?"
retrieved_results = db.similarity_search(query)
retrieved_results

[Document(metadata={'source': '../resources/speech.txt'}, page_content='Transformers have the advantage of having no recurrent units, therefore requiring less training time than earlier recurrent neural architectures (RNNs) such as long short-term memory (LSTM).[2] Later variations have been widely adopted for training large language models (LLMs) on large (language) datasets.[3]'),
 Document(metadata={'source': '../resources/speech.txt'}, page_content='even playing chess.[10] It has also led to the development of pre-trained systems, such as generative pre-trained transformers (GPTs)[11] and BERT[12] (bidirectional encoder representations from transformers).'),
 Document(metadata={'source': '../resources/speech.txt'}, page_content='for key tokens to be amplified and less important tokens to be diminished.'),
 Document(metadata={'source': '../resources/speech.txt'}, page_content='The modern version of the transformer was proposed in the 2017 paper "Attention Is All You Need" by researc

In [9]:
### Other Embedding Models
### https://ollama.com/blog/embedding-models

embeddings = OllamaEmbeddings(model="mxbai-embed-large")
text = "This is a test document."
query_result = embeddings.embed_query(text)
query_result

[0.007115886,
 -0.01859572,
 0.055750184,
 0.024308423,
 -0.051249996,
 -0.0021895592,
 -0.0024503348,
 -0.008703062,
 0.022067511,
 0.055213656,
 0.013423865,
 0.026502533,
 0.026831796,
 0.007171958,
 -0.020773634,
 0.012967359,
 -0.02250218,
 0.03116162,
 -0.030890204,
 -0.028512252,
 -0.042156376,
 0.014364254,
 -0.053492423,
 -0.019483844,
 -0.025103685,
 0.03528237,
 -0.012705377,
 0.0042947484,
 0.06716325,
 0.018897109,
 -0.004126084,
 0.015779322,
 -0.0359606,
 -0.047145445,
 0.020437004,
 -0.043359198,
 0.06231037,
 -0.035126634,
 0.0022438436,
 -0.031958126,
 0.004531385,
 0.00976794,
 0.04288005,
 -0.034264904,
 -0.08437538,
 -0.044229124,
 -0.014799712,
 -0.00810004,
 -0.027713168,
 -0.027542407,
 0.014831756,
 0.030830864,
 -0.010884217,
 -0.035572667,
 0.0006072619,
 -0.020553628,
 -0.024287587,
 -0.024687767,
 -0.036252476,
 0.06537489,
 0.052428693,
 0.0099111395,
 0.018032875,
 -0.039189182,
 -0.023859845,
 0.011019015,
 -0.011203443,
 0.0054785535,
 0.01142023,
 -0.0

In [10]:
len(query_result)

1024