# Experiments with llamaindex framework

Useful for production: https://docs.llamaindex.ai/en/stable/examples/embeddings/openvino/

In [1]:
from llama_index.embeddings.huggingface import HuggingFaceEmbedding

MODEL_NAME = "intfloat/multilingual-e5-small"
embedding_model = HuggingFaceEmbedding(model_name=MODEL_NAME)

  from .autonotebook import tqdm as notebook_tqdm


## Testing sentence similarity

### Get text embedding

In [2]:
text = "Hi!"
emb = embedding_model.get_text_embedding(text)
type(emb), len(emb)

  return forward_call(*args, **kwargs)


(list, 384)

### Multilingual sentence similarity

#### Mock query and documents

In [3]:
query = "What are the benefits of regular physical activity on mental health?"

documents = [
    "La actividad física regular ayuda a reducir los síntomas de ansiedad y depresión, mejorando el estado de ánimo gracias a la liberación de endorfinas.",  # Regular physical activity helps reduce symptoms of anxiety and depression, improving mood thanks to the release of endorphins.
    "Die industrielle Revolution hatte erhebliche Auswirkungen auf die Urbanisierung und die Massenproduktion.",  # The industrial revolution had significant impacts on urbanization and mass production.
    "Les principales attractions touristiques de Rome incluent le Colisée, le Vatican et la Piazza Navona.",  # The main tourist attractions in Rome include the Colosseum, the Vatican, and Piazza Navona.
    "Photosynthesis is the process by which plants convert solar energy into chemical energy.",  # Photosynthesis is the process by which plants convert solar energy into chemical energy.
    "L'attività culturale svolge un ruolo importante nello sviluppo personale e sociale.",  # Cultural activities play an important role in personal and social development.
    "日常の散歩は健康維持に非常に効果的です。",  # Daily walking is very effective for maintaining health.
    "La revolución tecnológica ha cambiado la forma en que vivimos y trabajamos.",  # The technological revolution has changed the way we live and work.
    "Das italienische Rechtssystem sieht drei Instanzen vor.",  # The Italian legal system provides for three levels of jurisdiction.
    "Le sport améliore souvent la concentration et la discipline.",  # Sports often improve concentration and discipline.
    "健康的な食生活は長寿に寄与します。",  # A healthy diet contributes to longevity.
]



#### Calculate embeddings

In [4]:
import torch

def tt(li: list) -> torch.tensor:
    """
    Converts a list to a torch.tensor
    """
    return torch.tensor(li)

query_emb = tt(embedding_model.get_text_embedding(query))
docs_emb = tt(embedding_model.get_text_embedding_batch(documents))

#### Retrieve the most similar document

In [5]:
scores = torch.matmul(query_emb, docs_emb.T)
max_score, max_idx = scores.max(dim = 0)

In [6]:
documents[max_idx]

'La actividad física regular ayuda a reducir los síntomas de ansiedad y depresión, mejorando el estado de ánimo gracias a la liberación de endorfinas.'

## FAISS Vector Store

### Create a FAISS index

In [7]:
import faiss
d = query_emb.shape[0]
faiss_index = faiss.IndexFlatL2(d)

### Create the Vector Store

In [8]:
from llama_index.core import (
    Document,
    load_index_from_storage,
    VectorStoreIndex,
    StorageContext,
)
from llama_index.vector_stores.faiss import FaissVectorStore
from IPython.display import Markdown, display

### Create documents to load

In [9]:
documents_to_load = []
for doc in documents:
    documents_to_load.append(Document(text=doc))

In [10]:
documents_to_load[0]

Document(id_='8355c554-1183-44b2-bf87-95e8e2e5fef4', embedding=None, metadata={}, excluded_embed_metadata_keys=[], excluded_llm_metadata_keys=[], relationships={}, metadata_template='{key}: {value}', metadata_separator='\n', text_resource=MediaResource(embeddings=None, data=None, text='La actividad física regular ayuda a reducir los síntomas de ansiedad y depresión, mejorando el estado de ánimo gracias a la liberación de endorfinas.', path=None, url=None, mimetype=None), image_resource=None, audio_resource=None, video_resource=None, text_template='{metadata_str}\n\n{content}')

### Create FAISS vector store from documents

In [None]:
vector_store = FaissVectorStore(faiss_index=faiss_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    documents_to_load, storage_context=storage_context, embed_model=embedding_model
)

### Retrieve

In [22]:
engine = index.as_retriever(similarity_top_k=1)
engine.retrieve(query)[0].text

  return forward_call(*args, **kwargs)


'La actividad física regular ayuda a reducir los síntomas de ansiedad y depresión, mejorando el estado de ánimo gracias a la liberación de endorfinas.'