# Embeddings Applications

Explore possible applications of embeddings.

# Setup Notebook

## Imports

In [2]:
# Import Standard Libraries
from transformers import AutoTokenizer, AutoModel
from sentence_transformers.util import semantic_search

## Data

In [3]:
# List of sentences
sentences = [
    "Python is a great programming language for Machine Learning projects.",
    "Java is a programming language mainly used for backend applications.",
    "HTML and CSS are used to develop web applications.",
]

# Semantic Search

## Sentence Transformers

In [4]:
# Tokenise
tokeniser = AutoTokenizer.from_pretrained("bert-base-uncased")
tokens = tokeniser(sentences, return_tensors="pt", padding=True)

In [5]:
# Create embeddings
transformer = AutoModel.from_pretrained("bert-base-uncased")
embeddings = transformer(**tokens).last_hidden_state

In [6]:
# Define the query
query = "What is Python language suited for?"
query_tokens = tokeniser(query,
                         return_tensors="pt",
                         padding='max_length',
                         max_length=len(tokens.input_ids[0]))
query_embeddings = transformer(**query_tokens).last_hidden_state

In [11]:
# Mean pooling the tokens' embeddings
embeddings = embeddings.mean(dim=1)
query_embeddings = query_embeddings.mean(dim=1)

In [12]:
# Fine the most relevant sentences
search_results = semantic_search(query_embeddings, embeddings, top_k=3)

In [13]:
search_results

[[{'corpus_id': 0, 'score': 0.7585159540176392},
  {'corpus_id': 1, 'score': 0.6306092739105225},
  {'corpus_id': 2, 'score': 0.610126793384552}]]