In [9]:
from sentence_transformers import SentenceTransformer, util
import torch

embedder = SentenceTransformer('sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2')

def semantic_search(queries, corpus_embeddings, top_k):
    for query in queries:
        print('searching for similarities with: {}'.format(query))
        query_embedding = embedder.encode(query, convert_to_tensor=True)
        hits = util.semantic_search(query_embedding, corpus_embeddings, top_k=top_k)
        hits = hits[0]
        for hit in hits:
            print('- ', corpus[hit['corpus_id']], "(Score: {:.4f})".format(hit['score']))

In [15]:
queries = ['A man is eating pasta.']

corpus = ['A man is eating food.',
          'A man is eating a piece of bread.',
          'The girl is carrying a baby.',
          'A man is riding a horse.',
          'A woman is playing violin.',
          'Two men pushed carts through the woods.',
          'A man is riding a white horse on an enclosed ground.',
          'A monkey is playing drums.',
          'A cheetah is running behind its prey.']

corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True)
semantic_search(queries, corpus_embeddings, 5)

searching for similarities with: A man is eating pasta.
-  A man is eating food. (Score: 0.6734)
-  A man is eating a piece of bread. (Score: 0.4269)
-  A man is riding a horse. (Score: 0.2086)
-  A man is riding a white horse on an enclosed ground. (Score: 0.1020)
-  A cheetah is running behind its prey. (Score: 0.0566)


In [22]:
queries = ['a man is eating pasta', 'horseback riding', 'stomach ache']

corpus = ['الرجل يأكل طعام',
          'الرجل يأكل خبز',
          'البنت تحمل طفلها',
          'الرجل يمتطي الحصان',
          'المرأة تعزف على الغيتار',
          'المرأة لديها وجع في الرأس',
          'السعدان يأكل موزة',
          'السعدان يلعب مع القطة']

corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True)
semantic_search(queries, corpus_embeddings, 3)

searching for similarities with: a man is eating pasta
-  الرجل يأكل طعام (Score: 0.6864)
-  الرجل يأكل خبز (Score: 0.4017)
-  السعدان يأكل موزة (Score: 0.1844)
searching for similarities with: horseback riding
-  الرجل يمتطي الحصان (Score: 0.7628)
-  السعدان يأكل موزة (Score: 0.2227)
-  السعدان يلعب مع القطة (Score: 0.0845)
searching for similarities with: stomach ache
-  المرأة لديها وجع في الرأس (Score: 0.2064)
-  الرجل يأكل طعام (Score: 0.1998)
-  السعدان يأكل موزة (Score: 0.1638)


In [23]:
queries = ['الرجل يأكل طعام', 'الرجل يمتطي الحصان', 'وجع في الرأس']

corpus = ['A man is eating food.',
          'A man is eating a piece of bread.',
          'The girl is carrying a baby.',
          'A man is riding a horse.',
          'A woman is playing violin.',
          'Two men pushed carts through the woods.',
          'A man is riding a white horse on an enclosed ground.',
          'A monkey is playing drums.',
          'A cheetah is running behind its prey.']

corpus_embeddings = embedder.encode(corpus, convert_to_tensor=True)
semantic_search(queries, corpus_embeddings, 5)

searching for similarities with: الرجل يأكل طعام
-  A man is eating food. (Score: 0.9802)
-  A man is eating a piece of bread. (Score: 0.7294)
-  A man is riding a horse. (Score: 0.2755)
-  A man is riding a white horse on an enclosed ground. (Score: 0.1723)
-  A monkey is playing drums. (Score: 0.0750)
searching for similarities with: الرجل يمتطي الحصان
-  A man is riding a horse. (Score: 0.8233)
-  A man is riding a white horse on an enclosed ground. (Score: 0.6816)
-  A cheetah is running behind its prey. (Score: 0.2546)
-  Two men pushed carts through the woods. (Score: 0.2129)
-  A man is eating food. (Score: 0.1383)
searching for similarities with: وجع في الرأس
-  A monkey is playing drums. (Score: 0.2033)
-  A man is eating a piece of bread. (Score: 0.1074)
-  A cheetah is running behind its prey. (Score: 0.0895)
-  A man is riding a white horse on an enclosed ground. (Score: 0.0477)
-  Two men pushed carts through the woods. (Score: 0.0431)
