In [1]:
import pandas as pd
import numpy as np

In [2]:
mentees = [
    "Artificial intelligence (AI) specialist out of Denver, Colorado with skills in engineering and computer science to create machines and software programs that can think for you.",
    "Software Developer at IBM experienced in design, programming, testing, and providing industry-leading solutions that make the world run.",
    "Application Package Specialist with experience in helping clients in the selection, implementation, and production support of application packaged solutions."
]

In [3]:
mentor = "Director of engineering in Silver Spring, Maryland with experience in Artificial intelligence"

In [4]:
sentences = [mentor] + mentees
sentences

['Director of engineering in Silver Spring, Maryland with experience in Artificial intelligence',
 'Artificial intelligence (AI) specialist out of Denver, Colorado with skills in engineering and computer science to create machines and software programs that can think for you.',
 'Software Developer at IBM experienced in design, programming, testing, and providing industry-leading solutions that make the world run.',
 'Application Package Specialist with experience in helping clients in the selection, implementation, and production support of application packaged solutions.']

Initialize the model

In [5]:
from sentence_transformers import SentenceTransformer

model = SentenceTransformer('bert-base-nli-mean-tokens')

  from .autonotebook import tqdm as notebook_tqdm


Enconde sentences

In [6]:
sentence_embeddings = model.encode(sentences)

In [7]:
sentence_embeddings.shape

(4, 768)

Find most similar sentence

In [8]:
from sklearn.metrics.pairwise import cosine_similarity

Let's calculate cosine similarity for sentence 0

In [9]:
result = cosine_similarity(
    [sentence_embeddings[0]],
    sentence_embeddings[1:]
)

result = result.tolist()[0]
result

[0.6850104331970215, 0.5846598148345947, 0.421392023563385]

Get the indices that sort the array in descending order

In [10]:
# Get the indices that would sort the list in descending order
sorted_indices = sorted(range(len(result)), key=lambda i: result[i], reverse=True)

In [11]:
sorted_indices

[0, 1, 2]

In [12]:
# Access elements in the list using the sorted indices
sorted_elements = [mentees[i] for i in sorted_indices]


In [13]:
print(sorted_elements)

['Artificial intelligence (AI) specialist out of Denver, Colorado with skills in engineering and computer science to create machines and software programs that can think for you.', 'Software Developer at IBM experienced in design, programming, testing, and providing industry-leading solutions that make the world run.', 'Application Package Specialist with experience in helping clients in the selection, implementation, and production support of application packaged solutions.']


Download pre-trained model

In [1]:
from transformers import AutoTokenizer, AutoModel

In [2]:
tokenizer = AutoTokenizer.from_pretrained('sentence-transformers/bert-base-nli-mean-tokens')
model = AutoModel.from_pretrained('sentence-transformers/bert-base-nli-mean-tokens')

Downloading (…)okenizer_config.json: 100%|██████████| 399/399 [00:00<00:00, 1.52MB/s]
Downloading (…)lve/main/config.json: 100%|██████████| 625/625 [00:00<00:00, 6.44MB/s]
Downloading (…)solve/main/vocab.txt: 100%|██████████| 232k/232k [00:00<00:00, 6.65MB/s]
Downloading (…)/main/tokenizer.json: 100%|██████████| 466k/466k [00:00<00:00, 21.4MB/s]
Downloading (…)in/added_tokens.json: 100%|██████████| 2.00/2.00 [00:00<00:00, 10.1kB/s]
Downloading (…)cial_tokens_map.json: 100%|██████████| 112/112 [00:00<00:00, 1.02MB/s]
Downloading pytorch_model.bin: 100%|██████████| 438M/438M [00:06<00:00, 66.6MB/s] 


In [3]:
tokenizer.save_pretrained('../src/models/')
model.save_pretrained('../src/models/')