In [14]:
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import os
import numpy as np
import pickle

In order to find similarity between a user input request and one of the embedded elements, we must do an embedding of the user imput and compare both generated vectors. This can be done with `cosine similarity` from [sklearn](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.pairwise.cosine_similarity.html). 

<img src="media/cosine.png" width="450">

Similarity values can range from -1 and 1. The value -1 means that the vectors are opposite, 0 represents orthogonal vectors, and value 1 signifies similar vectors.

The following function `find_closest_sentence` finds the highest similarity embedding and returns the sentence information.

In [15]:
def find_closest_sentence(input_sentence, sentences, embeddings):
    input_embedding = model.encode([input_sentence])[0]
    similarities = cosine_similarity([input_embedding], embeddings)[0]
    closest_index = similarities.argmax()
    closest_sentence = sentences[closest_index]
    return closest_sentence

In [None]:
model = SentenceTransformer('paraphrase-multilingual-MiniLM-L12-v2')

embeddings_file = 'embeddings/embeddings.npy'
sentences_file = 'embeddings/sentences.npy'

if os.path.exists(embeddings_file) and os.path.exists(sentences_file):
    embeddings = np.load(embeddings_file)
    with open(sentences_file, 'rb') as f:
        sentences = pickle.load(f)
    
    while True:
        user_input = input("You: ")
    
        if user_input.lower() == 'exit':
            break
    
        closest_sentence = find_closest_sentence(user_input, sentences, embeddings)
    
        print(f"Here is the element that may meet your requirements: \n{closest_sentence}")
else:
    print("The embeddings have not been generated, execute the embeddings_generator notebook.")