In [1]:
%pip install sentence_transformers ipywidgets einops datasets

Note: you may need to restart the kernel to use updated packages.


In [13]:
from sentence_transformers import SentenceTransformer
from pathlib import Path
import pandas as pd
import random
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from IPython.display import display
from collections import defaultdict
from tqdm import tqdm

In [3]:
# Load https://huggingface.co/sentence-transformers/all-mpnet-base-v2
cache_folder = Path.home() / "tmp_cache"
model = SentenceTransformer("all-mpnet-base-v2", cache_folder=cache_folder)

In [4]:
embeddings = model.encode([
    "The weather is lovely today.",
    "It's so sunny outside!",
    "He drove to the stadium.",
])

In [5]:
similarities = model.similarity(embeddings, embeddings)
similarities

tensor([[1.0000, 0.6817, 0.0492],
        [0.6817, 1.0000, 0.0421],
        [0.0492, 0.0421, 1.0000]])

Top original model for semantic search (see https://www.sbert.net/docs/sentence_transformer/pretrained_models.html)
multi-qa-mpnet-base-dot-v1

Multilingual semantic similarity model
distiluse-base-multilingual-cased-v1
https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v1

Instructor model (trained with instructions in mind)
hkunlp/instructor-large
https://huggingface.co/hkunlp/instructor-large

NV-Embed-v2
https://huggingface.co/nvidia/NV-Embed-v2

# Generate test data

In [6]:
# Sample data pools
first_names = ["Alice", "Bob", "Carlos", "Dmitry", "Emma", "Fatima", "George", "Hiroshi", "Isla", "Juan", "Katerina", "Luis", "Mikhail", "Natalia", "Omar", "Paula", "Quentin", "Ravi", "Sofia", "Tariq", "Ursula", "Victor", "Wei", "Xenia", "Yusuf", "Zara"]
last_names = ["Anderson", "Brown", "Chen", "Diaz", "Evans", "Fernández", "Gonzalez", "Hernandez", "Ivanov", "Johnson", "Kowalski", "Lopez", "Martinez", "Novikov", "O'Connor", "Petrov", "Quintero", "Rodriguez", "Sanchez", "Taylor", "Ulyanov", "Vega", "Wang", "Xiao", "Yamamoto", "Zhukov"]
job_titles = ["Data Scientist", "Software Engineer", "ML Engineer", "AI Researcher", "Financial Analyst", "Product Manager", "DevOps Engineer", "Cloud Architect", "Security Specialist", "QA Engineer", "Frontend Developer", "Backend Developer"]
seniorities = ["Junior", "Senior", "Lead", "Principal", "Staff", "Intern"]
locations = ["New York, NY", "San Francisco, CA", "London, UK", "Madrid, España", "Berlin, DE", "Москва, Россия", "Paris, FR", "Home Office", "HQ Building 3", "Sao Paulo, BR", "Toronto, CA"]
departments = ["AI", "Eng", "Finanzas", "R&D", "IT", "DevOps", "Infra", "Data", "CyberSec", "Маркетинг", "BizOps"]

# Function to introduce small variations (e.g., typos, abbreviations)
def introduce_variation(text):
    variations = [
        lambda t: t.replace("Engineer", "Eng."),  # Abbreviation
        lambda t: t.replace("Data", "D."),  # Shorten "Data"
        lambda t: t.lower() if random.random() < 0.1 else t,  # Random lowercase
        lambda t: t + " (remote)" if "Home Office" in text and random.random() < 0.5 else t,  # Clarify remote work
        lambda t: t.replace("Senior", "Sr.") if "Senior" in t else t,  # Abbreviate Senior
        lambda t: t + " " + random.choice(["Jr.", "II", "III"]) if "Intern" in t and random.random() < 0.3 else t,  # Add suffix to Interns
        lambda t: t.replace("Analyst", "Analista") if "Analyst" in t and random.random() < 0.2 else t,  # Spanish variation
        lambda t: t.replace("Manager", "Mgr.") if "Manager" in t and random.random() < 0.2 else t,  # Shorten Manager
        lambda t: t[:-1] + random.choice("xyz") if len(t) > 6 and random.random() < 0.05 else t,  # Introduce small typo
    ]
    return random.choice(variations)(text)

# Generate dataset
employees = []
for _ in range(100):
    first_name = random.choice(first_names)
    last_name = random.choice(last_names)
    name = f"{first_name} {last_name}"
    job_title = introduce_variation(f"{random.choice(seniorities)} {random.choice(job_titles)}")
    location = introduce_variation(random.choice(locations))
    department = introduce_variation(random.choice(departments))

    employees.append({"name": name, "job_title": job_title, "location": location, "department": department})

df = pd.DataFrame(employees)
df

Unnamed: 0,name,job_title,location,department
0,Bob Hernandez,Senior QA Engineer,"Berlin, DE",Infra
1,Zara Diaz,Junior DevOps Engineer,"Berlin, DE",R&D
2,Hiroshi Hernandez,Principal Backend Developer,"London, UK",Infra
3,Luis Novikov,Intern Data Scientist,"Berlin, DE",Маркетинг
4,Carlos Petrov,Principal AI Researcher,"New York, NY",R&D
...,...,...,...,...
95,Juan Zhukov,Staff Data Scientist,"Москва, Россия",IT
96,Juan Ivanov,Principal Software Engineer,"New York, NY",DevOps
97,Ravi Gonzalez,Junior Financial Analyst,"Москва, Россия",Infra
98,Ursula Quintero,Intern Backend Developer,"Sao Paulo, Bx",Маркетинг


In [7]:
df['text'] = "Job title: " + df['job_title'] + "\nLocation: " + df['location'] + "\nDepartment: " + df['department']

In [8]:
print(df['text'][0])

Job title: Senior QA Engineer
Location: Berlin, DE
Department: Infra


In [9]:
questions = [
    "Who are the senior engineers working in NY?",
    "Find me all data scientists in London HQ",
    "People in AI dept. working remotely?",
    "Is there any anaylst in Madrid?",
    "Show all engs in SF office",
    "Lista de empleados en Finanzas en México",
    "Кто работает в отделе ИИ в Москве?",
    "Software devs in EU, any location",
    "Any Sr. Analyst in the Finance dept?",
    "Who’s working in AI at the Berlin office?",
    "Data scientists in remote, any country",
    "List of engineers in building 12?",
    "Can you find me a ML eng in US?",
    "Who are the people working in IT at Paris HQ?",
    "Finance folks in LatAm region?",
    "Empleados en el depto de ingeniería en Argentina",
    "Senior managers in tech, UK or Germany",
    "All ppl in eng, except remote",
    "Кто в Лондоне в отделе Финансов?",
    "Are there any junior devs in Madrid?"
]

In [10]:
model_names = [
    "multi-qa-mpnet-base-dot-v1",
    "distiluse-base-multilingual-cased-v1",
    "hkunlp/instructor-large",
    # "nvidia/NV-Embed-v2", # This model requires a lot of memory
    "Alibaba-NLP/gte-Qwen2-1.5B-instruct"
]

models = {name: SentenceTransformer(name, cache_folder=cache_folder, trust_remote_code=True) for name in model_names}

The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/284 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/145k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/55.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/901 [00:00<?, ?B/s]

modeling_qwen.py:   0%|          | 0.00/65.2k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/Alibaba-NLP/gte-Qwen2-1.5B-instruct:
- modeling_qwen.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors.index.json:   0%|          | 0.00/27.8k [00:00<?, ?B/s]

Downloading shards:   0%|          | 0/2 [00:00<?, ?it/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/2.11G [00:00<?, ?B/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

tokenizer_config.json:   0%|          | 0.00/1.31k [00:00<?, ?B/s]

tokenization_qwen.py:   0%|          | 0.00/10.8k [00:00<?, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/Alibaba-NLP/gte-Qwen2-1.5B-instruct:
- tokenization_qwen.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


vocab.json:   0%|          | 0.00/2.78M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/1.67M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/7.03M [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/80.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/370 [00:00<?, ?B/s]

1_Pooling/config.json:   0%|          | 0.00/297 [00:00<?, ?B/s]

In [19]:
def get_top_matches():
    top_matches = defaultdict(dict)
    for model_name, model in tqdm(models.items()):
        # Encode the questions and the text column
        question_instruction = "Represent search query for retrieval of employee contacts: " if model_name == "hkunlp/instructor-large" else None
        question_embeddings = model.encode(questions, prompt=question_instruction, show_progress_bar=True)

        text_instruction = "Represent employee attributes for retrieval: " if model_name == "hkunlp/instructor-large" else None
        text_embeddings = model.encode(df['text'].tolist(), prompt=text_instruction, show_progress_bar=True)

        # Compute cosine similarity
        cosine_similarities = cosine_similarity(question_embeddings, text_embeddings)

        # Retrieve top 5 matches for each question
        for i, question in enumerate(questions):
            top_indices = np.argsort(cosine_similarities[i])[::-1][:5]
            top_similarities = cosine_similarities[i][top_indices]
            top_matches[question][model_name] = (df.iloc[top_indices], top_similarities)

    return top_matches

top_matches = get_top_matches()

  0%|          | 0/4 [00:00<?, ?it/s]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

 25%|██▌       | 1/4 [00:05<00:17,  5.77s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

 50%|█████     | 2/4 [00:10<00:10,  5.21s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

 75%|███████▌  | 3/4 [00:17<00:05,  5.98s/it]

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

100%|██████████| 4/4 [03:24<00:00, 51.15s/it]


In [21]:
def display_top_matches(top_matches):
    # Display the results
    for question, results in top_matches.items():
        print(f"Question: {question}\n")
        for model_name, (matches, similarities) in results.items():
            print(f"Model: {model_name}\n")
            matches = matches.copy()
            matches['cosine_similarity'] = similarities
            display(matches[['name', 'job_title', 'location', 'department', 'cosine_similarity']].style.set_table_styles(
                [{'selector': 'th', 'props': [('font-size', '120%'), ('text-align', 'center')]},
                {'selector': 'td', 'props': [('font-size', '110%'), ('text-align', 'center')]}]
            ).set_properties(**{'background-color': 'lightyellow'}, subset=['cosine_similarity']))
        print("\n" + "="*80 + "\n")

display_top_matches(top_matches)

Question: Who are the senior engineers working in NY?

Model: multi-qa-mpnet-base-dot-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
39,Victor Novikov,Lead ML Engineer,"New York, NY",R&D,0.683339
96,Juan Ivanov,Principal Software Engineer,"New York, NY",DevOps,0.668728
10,Natalia Chen,Senior ML Engineer,Home Office,IT,0.658557
12,Ravi Quintero,Staff Backend Developer,"New York, NY",Infra,0.640238
92,Victor Diaz,Junior ML Engineer,Home Office,IT,0.627096


Model: distiluse-base-multilingual-cased-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
39,Victor Novikov,Lead ML Engineer,"New York, NY",R&D,0.516727
96,Juan Ivanov,Principal Software Engineer,"New York, NY",DevOps,0.505807
44,Luis O'Connor,Staff DevOps Engineer,"San Francisco, CA",Eng,0.417064
89,George Novikov,Senior DevOps Engineer,"Москва, Россия",IT,0.413501
94,Natalia Hernandez,Staff DevOps Engineer,"Toronto, CA",BizOps,0.410772


Model: hkunlp/instructor-large



Unnamed: 0,name,job_title,location,department,cosine_similarity
96,Juan Ivanov,Principal Software Engineer,"New York, NY",DevOps,0.921197
12,Ravi Quintero,Staff Backend Developer,"New York, NY",Infra,0.910388
39,Victor Novikov,Lead ML Engineer,"New York, NY",R&D,0.901808
56,Bob Martinez,Lead Product Manager,"New York, NY",BizOps,0.89443
4,Carlos Petrov,Principal AI Researcher,"New York, NY",R&D,0.888664


Model: Alibaba-NLP/gte-Qwen2-1.5B-instruct



Unnamed: 0,name,job_title,location,department,cosine_similarity
96,Juan Ivanov,Principal Software Engineer,"New York, NY",DevOps,0.485743
39,Victor Novikov,Lead ML Engineer,"New York, NY",R&D,0.482007
4,Carlos Petrov,Principal AI Researcher,"New York, NY",R&D,0.46765
10,Natalia Chen,Senior ML Engineer,Home Office,IT,0.46448
12,Ravi Quintero,Staff Backend Developer,"New York, NY",Infra,0.463722




Question: Find me all data scientists in London HQ

Model: multi-qa-mpnet-base-dot-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
24,Alice Anderson,Lead Data Scientist,Home Office,Eng,0.677842
28,Victor O'Connor,Senior Data Scientist,Home Office,Finanzas,0.641377
29,Ravi Martinez,Lead Data Scientist,"San Francisco, CA",R&D,0.640458
42,Sofia Kowalski,Lead AI Researcher,"London, UK",Маркетинг,0.624645
3,Luis Novikov,Intern Data Scientist,"Berlin, DE",Маркетинг,0.618943


Model: distiluse-base-multilingual-cased-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
29,Ravi Martinez,Lead Data Scientist,"San Francisco, CA",R&D,0.373258
83,Tariq Taylor,Principal QA Engineer,"London, UK",CyberSec,0.356056
42,Sofia Kowalski,Lead AI Researcher,"London, UK",Маркетинг,0.339767
77,Mikhail Novikov,Lead Data Scientist,"Paris, FR",BizOps,0.32728
19,Victor Hernandez,Intern Data Scientist,"Paris, FR",Eng,0.320155


Model: hkunlp/instructor-large



Unnamed: 0,name,job_title,location,department,cosine_similarity
2,Hiroshi Hernandez,Principal Backend Developer,"London, UK",Infra,0.893701
24,Alice Anderson,Lead Data Scientist,Home Office,Eng,0.893477
42,Sofia Kowalski,Lead AI Researcher,"London, UK",Маркетинг,0.891435
32,Juan Brown,Principal DevOps Eng.,"London, UK",IT,0.889541
29,Ravi Martinez,Lead Data Scientist,"San Francisco, CA",R&D,0.887633


Model: Alibaba-NLP/gte-Qwen2-1.5B-instruct



Unnamed: 0,name,job_title,location,department,cosine_similarity
24,Alice Anderson,Lead Data Scientist,Home Office,Eng,0.574466
95,Juan Zhukov,Staff Data Scientist,"Москва, Россия",IT,0.541486
77,Mikhail Novikov,Lead Data Scientist,"Paris, FR",BizOps,0.540368
42,Sofia Kowalski,Lead AI Researcher,"London, UK",Маркетинг,0.527562
29,Ravi Martinez,Lead Data Scientist,"San Francisco, CA",R&D,0.521346




Question: People in AI dept. working remotely?

Model: multi-qa-mpnet-base-dot-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
54,Xenia Chen,Staff AI Researcher,Home Office,Eng,0.67153
65,Dmitry Vega,Intern AI Researcher,Home Office,CyberSec,0.637132
4,Carlos Petrov,Principal AI Researcher,"New York, NY",R&D,0.614724
27,George Kowalski,Intern Backend Developer,HQ Building 3,AI,0.606558
31,Ravi Brown,Principal Security Specialist,Home Office,AI,0.603362


Model: distiluse-base-multilingual-cased-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
54,Xenia Chen,Staff AI Researcher,Home Office,Eng,0.363669
65,Dmitry Vega,Intern AI Researcher,Home Office,CyberSec,0.360697
76,Alice O'Connor,Staff Frontend Developer,"Berlin, DE",AI,0.343949
35,Wei Ulyanov,Lead Backend Developer,"Москва, Россия",AI,0.330015
23,Tariq Yamamoto,Staff AI Researcher,"Sao Paulo, BR",DevOps,0.325932


Model: hkunlp/instructor-large



Unnamed: 0,name,job_title,location,department,cosine_similarity
54,Xenia Chen,Staff AI Researcher,Home Office,Eng,0.918683
65,Dmitry Vega,Intern AI Researcher,Home Office,CyberSec,0.914538
27,George Kowalski,Intern Backend Developer,HQ Building 3,AI,0.909319
23,Tariq Yamamoto,Staff AI Researcher,"Sao Paulo, BR",DevOps,0.908108
69,Sofia Chen,Intern AI Researcher,"Sao Paulo, BR",Eng,0.905316


Model: Alibaba-NLP/gte-Qwen2-1.5B-instruct



Unnamed: 0,name,job_title,location,department,cosine_similarity
54,Xenia Chen,Staff AI Researcher,Home Office,Eng,0.577924
65,Dmitry Vega,Intern AI Researcher,Home Office,CyberSec,0.53357
23,Tariq Yamamoto,Staff AI Researcher,"Sao Paulo, BR",DevOps,0.51839
69,Sofia Chen,Intern AI Researcher,"Sao Paulo, BR",Eng,0.487526
27,George Kowalski,Intern Backend Developer,HQ Building 3,AI,0.486444




Question: Is there any anaylst in Madrid?

Model: multi-qa-mpnet-base-dot-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
45,Ravi Anderson,Intern Frontend Developer,"Madrid, España",R&D,0.559383
93,Alice Johnson,Staff Financial Analyst,"Madrid, España",Маркетинг,0.555805
86,Dmitry Sanchez,Junior Cloud Architect,"Madrid, España",BizOps,0.532251
40,Ursula Zhukov,Intern Data Scientist,"Paris, FR",Маркетинг,0.477701
19,Victor Hernandez,Intern Data Scientist,"Paris, FR",Eng,0.474202


Model: distiluse-base-multilingual-cased-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
93,Alice Johnson,Staff Financial Analyst,"Madrid, España",Маркетинг,0.255764
45,Ravi Anderson,Intern Frontend Developer,"Madrid, España",R&D,0.255763
86,Dmitry Sanchez,Junior Cloud Architect,"Madrid, España",BizOps,0.227365
42,Sofia Kowalski,Lead AI Researcher,"London, UK",Маркетинг,0.11192
67,Emma Xiao,Intern AI Researcher,"Berlin, DE",BizOps,0.046538


Model: hkunlp/instructor-large



Unnamed: 0,name,job_title,location,department,cosine_similarity
93,Alice Johnson,Staff Financial Analyst,"Madrid, España",Маркетинг,0.905859
86,Dmitry Sanchez,Junior Cloud Architect,"Madrid, España",BizOps,0.877622
45,Ravi Anderson,Intern Frontend Developer,"Madrid, España",R&D,0.871849
3,Luis Novikov,Intern Data Scientist,"Berlin, DE",Маркетинг,0.844978
76,Alice O'Connor,Staff Frontend Developer,"Berlin, DE",AI,0.842614


Model: Alibaba-NLP/gte-Qwen2-1.5B-instruct



Unnamed: 0,name,job_title,location,department,cosine_similarity
93,Alice Johnson,Staff Financial Analyst,"Madrid, España",Маркетинг,0.530975
86,Dmitry Sanchez,Junior Cloud Architect,"Madrid, España",BizOps,0.434462
45,Ravi Anderson,Intern Frontend Developer,"Madrid, España",R&D,0.425259
28,Victor O'Connor,Senior Data Scientist,Home Office,Finanzas,0.336813
75,Natalia Martinez,Intern Financial Analyst,"Paris, FR",Eng,0.335857




Question: Show all engs in SF office

Model: multi-qa-mpnet-base-dot-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
71,Xenia Chen,Intern DevOps Eng.,Home Office,Eng,0.49333
70,Carlos O'Connor,Intern Financial Analyst,"San Francisco, CA",Eng,0.451919
27,George Kowalski,Intern Backend Developer,HQ Building 3,AI,0.434647
44,Luis O'Connor,Staff DevOps Engineer,"San Francisco, CA",Eng,0.433433
13,Yusuf Quintero,Intern Software Engineer,"San Francisco, CA",AI,0.433417


Model: distiluse-base-multilingual-cased-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
29,Ravi Martinez,Lead Data Scientist,"San Francisco, CA",R&D,0.272169
70,Carlos O'Connor,Intern Financial Analyst,"San Francisco, CA",Eng,0.268594
55,Fatima Gonzalez,Lead Cloud Architect,"San Francisco, CA",CyberSec,0.261758
61,Victor Rodriguez,Intern Cloud Architect,"San Francisco, CA",Finanzas,0.26001
62,Ursula Kowalski,Principal Financial Analyst,"San Francisco, CA",DevOps,0.259764


Model: hkunlp/instructor-large



Unnamed: 0,name,job_title,location,department,cosine_similarity
44,Luis O'Connor,Staff DevOps Engineer,"San Francisco, CA",Eng,0.899834
57,Victor Ulyanov,Staff Software Engineer,"San Francisco, Cx",Маркетинг,0.892574
84,Mikhail Ivanov,Staff Software Engineer,"San Francisco, CA",Data,0.888436
46,George Taylor,Lead Software Engineer,"San Francisco, CA",BizOps,0.886333
59,Xenia Kowalski,Staff Backend Developer,"San Francisco, CA",Finanzas,0.874861


Model: Alibaba-NLP/gte-Qwen2-1.5B-instruct



Unnamed: 0,name,job_title,location,department,cosine_similarity
44,Luis O'Connor,Staff DevOps Engineer,"San Francisco, CA",Eng,0.528877
84,Mikhail Ivanov,Staff Software Engineer,"San Francisco, CA",Data,0.484601
57,Victor Ulyanov,Staff Software Engineer,"San Francisco, Cx",Маркетинг,0.477668
70,Carlos O'Connor,Intern Financial Analyst,"San Francisco, CA",Eng,0.476841
25,Dmitry Evans,Staff ML Engineer,"sao paulo, br",Eng,0.459731




Question: Lista de empleados en Finanzas en México

Model: multi-qa-mpnet-base-dot-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
45,Ravi Anderson,Intern Frontend Developer,"Madrid, España",R&D,0.406733
93,Alice Johnson,Staff Financial Analyst,"Madrid, España",Маркетинг,0.401198
7,Paula Zhukov,Principal Backend Developer,"Paris, FR",Finanzas,0.389472
85,Alice Lopez,Staff Frontend Developer,"Berlin, DE",Finanzas,0.389082
75,Natalia Martinez,Intern Financial Analyst,"Paris, FR",Eng,0.370905


Model: distiluse-base-multilingual-cased-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
59,Xenia Kowalski,Staff Backend Developer,"San Francisco, CA",Finanzas,0.387295
93,Alice Johnson,Staff Financial Analyst,"Madrid, España",Маркетинг,0.363093
51,Ursula Brown,Staff Cloud Architect,"San Francisco, CA",Finanzas,0.349915
85,Alice Lopez,Staff Frontend Developer,"Berlin, DE",Finanzas,0.348124
26,Xenia O'Connor,Lead Financial Analyst,Home Office,Data,0.343957


Model: hkunlp/instructor-large



Unnamed: 0,name,job_title,location,department,cosine_similarity
93,Alice Johnson,Staff Financial Analyst,"Madrid, España",Маркетинг,0.88384
85,Alice Lopez,Staff Frontend Developer,"Berlin, DE",Finanzas,0.860282
86,Dmitry Sanchez,Junior Cloud Architect,"Madrid, España",BizOps,0.859103
98,Ursula Quintero,Intern Backend Developer,"Sao Paulo, Bx",Маркетинг,0.858551
28,Victor O'Connor,Senior Data Scientist,Home Office,Finanzas,0.857769


Model: Alibaba-NLP/gte-Qwen2-1.5B-instruct



Unnamed: 0,name,job_title,location,department,cosine_similarity
59,Xenia Kowalski,Staff Backend Developer,"San Francisco, CA",Finanzas,0.488569
51,Ursula Brown,Staff Cloud Architect,"San Francisco, CA",Finanzas,0.471572
81,Carlos Taylor,Lead Product Manager,"London, UK",Finanzas,0.465875
85,Alice Lopez,Staff Frontend Developer,"Berlin, DE",Finanzas,0.463491
93,Alice Johnson,Staff Financial Analyst,"Madrid, España",Маркетинг,0.458797




Question: Кто работает в отделе ИИ в Москве?

Model: multi-qa-mpnet-base-dot-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
41,Juan Petrov,Lead Backend Developer,"Москва, Россия",Data,0.461404
35,Wei Ulyanov,Lead Backend Developer,"Москва, Россия",AI,0.438696
95,Juan Zhukov,Staff Data Scientist,"Москва, Россия",IT,0.431201
89,George Novikov,Senior DevOps Engineer,"Москва, Россия",IT,0.425074
15,Natalia Brown,Principal QA Eng.,"Москва, Россия",Маркетинг,0.418949


Model: distiluse-base-multilingual-cased-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
66,Omar Petrov,Intern QA Engineer,"Москва, Россия",IT,0.436771
35,Wei Ulyanov,Lead Backend Developer,"Москва, Россия",AI,0.43021
95,Juan Zhukov,Staff Data Scientist,"Москва, Россия",IT,0.428021
65,Dmitry Vega,Intern AI Researcher,Home Office,CyberSec,0.425307
89,George Novikov,Senior DevOps Engineer,"Москва, Россия",IT,0.412031


Model: hkunlp/instructor-large



Unnamed: 0,name,job_title,location,department,cosine_similarity
95,Juan Zhukov,Staff Data Scientist,"Москва, Россия",IT,0.902042
89,George Novikov,Senior DevOps Engineer,"Москва, Россия",IT,0.901306
41,Juan Petrov,Lead Backend Developer,"Москва, Россия",Data,0.900926
17,Juan Xiao,Junior Frontend Developer,"Москва, Россия",Infra,0.896129
47,Emma O'Connor,Senior Cloud Architect,"Москва, Россия",Eng,0.895054


Model: Alibaba-NLP/gte-Qwen2-1.5B-instruct



Unnamed: 0,name,job_title,location,department,cosine_similarity
95,Juan Zhukov,Staff Data Scientist,"Москва, Россия",IT,0.618057
66,Omar Petrov,Intern QA Engineer,"Москва, Россия",IT,0.597745
15,Natalia Brown,Principal QA Eng.,"Москва, Россия",Маркетинг,0.581292
89,George Novikov,Senior DevOps Engineer,"Москва, Россия",IT,0.579515
52,Fatima Lopez,Senior QA Engineer,"Москва, Россия",Data,0.55476




Question: Software devs in EU, any location

Model: multi-qa-mpnet-base-dot-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
45,Ravi Anderson,Intern Frontend Developer,"Madrid, España",R&D,0.628263
38,Ravi Anderson,Lead Frontend Developer,"London, UK",Маркетинг,0.613955
85,Alice Lopez,Staff Frontend Developer,"Berlin, DE",Finanzas,0.612244
76,Alice O'Connor,Staff Frontend Developer,"Berlin, DE",AI,0.6033
5,Ravi Petrov,Staff DevOps Engineer,"Paris, FR",IT,0.59147


Model: distiluse-base-multilingual-cased-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
96,Juan Ivanov,Principal Software Engineer,"New York, NY",DevOps,0.347557
6,Mikhail Rodriguez,Senior DevOps Eng.,"London, UK",IT,0.315075
32,Juan Brown,Principal DevOps Eng.,"London, UK",IT,0.313522
13,Yusuf Quintero,Intern Software Engineer,"San Francisco, CA",AI,0.3077
46,George Taylor,Lead Software Engineer,"San Francisco, CA",BizOps,0.295772


Model: hkunlp/instructor-large



Unnamed: 0,name,job_title,location,department,cosine_similarity
85,Alice Lopez,Staff Frontend Developer,"Berlin, DE",Finanzas,0.903958
5,Ravi Petrov,Staff DevOps Engineer,"Paris, FR",IT,0.901517
1,Zara Diaz,Junior DevOps Engineer,"Berlin, DE",R&D,0.896513
76,Alice O'Connor,Staff Frontend Developer,"Berlin, DE",AI,0.895967
59,Xenia Kowalski,Staff Backend Developer,"San Francisco, CA",Finanzas,0.894277


Model: Alibaba-NLP/gte-Qwen2-1.5B-instruct



Unnamed: 0,name,job_title,location,department,cosine_similarity
5,Ravi Petrov,Staff DevOps Engineer,"Paris, FR",IT,0.501857
1,Zara Diaz,Junior DevOps Engineer,"Berlin, DE",R&D,0.476947
53,Tariq Diaz,Staff Frontend Developer,Home Office,IT,0.47592
85,Alice Lopez,Staff Frontend Developer,"Berlin, DE",Finanzas,0.475408
45,Ravi Anderson,Intern Frontend Developer,"Madrid, España",R&D,0.475169




Question: Any Sr. Analyst in the Finance dept?

Model: multi-qa-mpnet-base-dot-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
97,Ravi Gonzalez,Junior Financial Analyst,"Москва, Россия",Infra,0.639927
26,Xenia O'Connor,Lead Financial Analyst,Home Office,Data,0.618639
70,Carlos O'Connor,Intern Financial Analyst,"San Francisco, CA",Eng,0.59725
62,Ursula Kowalski,Principal Financial Analyst,"San Francisco, CA",DevOps,0.596304
58,Ravi Gonzalez,Junior Financial Analyst,"Berlin, DE",BizOps,0.591061


Model: distiluse-base-multilingual-cased-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
75,Natalia Martinez,Intern Financial Analyst,"Paris, FR",Eng,0.410893
70,Carlos O'Connor,Intern Financial Analyst,"San Francisco, CA",Eng,0.409268
64,Victor Quintero,Lead Financial Analyst,"Toronto, CA",Infra,0.409256
26,Xenia O'Connor,Lead Financial Analyst,Home Office,Data,0.391544
62,Ursula Kowalski,Principal Financial Analyst,"San Francisco, CA",DevOps,0.391133


Model: hkunlp/instructor-large



Unnamed: 0,name,job_title,location,department,cosine_similarity
93,Alice Johnson,Staff Financial Analyst,"Madrid, España",Маркетинг,0.899043
64,Victor Quintero,Lead Financial Analyst,"Toronto, CA",Infra,0.894934
62,Ursula Kowalski,Principal Financial Analyst,"San Francisco, CA",DevOps,0.893844
58,Ravi Gonzalez,Junior Financial Analyst,"Berlin, DE",BizOps,0.888747
97,Ravi Gonzalez,Junior Financial Analyst,"Москва, Россия",Infra,0.888303


Model: Alibaba-NLP/gte-Qwen2-1.5B-instruct



Unnamed: 0,name,job_title,location,department,cosine_similarity
97,Ravi Gonzalez,Junior Financial Analyst,"Москва, Россия",Infra,0.610902
28,Victor O'Connor,Senior Data Scientist,Home Office,Finanzas,0.600618
70,Carlos O'Connor,Intern Financial Analyst,"San Francisco, CA",Eng,0.593961
58,Ravi Gonzalez,Junior Financial Analyst,"Berlin, DE",BizOps,0.584063
62,Ursula Kowalski,Principal Financial Analyst,"San Francisco, CA",DevOps,0.579086




Question: Who’s working in AI at the Berlin office?

Model: multi-qa-mpnet-base-dot-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
76,Alice O'Connor,Staff Frontend Developer,"Berlin, DE",AI,0.76357
67,Emma Xiao,Intern AI Researcher,"Berlin, DE",BizOps,0.746092
54,Xenia Chen,Staff AI Researcher,Home Office,Eng,0.720162
20,Bob Fernández,Intern ML Engineer,"Berlin, DE",CyberSec,0.69052
65,Dmitry Vega,Intern AI Researcher,Home Office,CyberSec,0.689256


Model: distiluse-base-multilingual-cased-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
67,Emma Xiao,Intern AI Researcher,"Berlin, DE",BizOps,0.521121
76,Alice O'Connor,Staff Frontend Developer,"Berlin, DE",AI,0.506387
20,Bob Fernández,Intern ML Engineer,"Berlin, DE",CyberSec,0.399009
0,Bob Hernandez,Senior QA Engineer,"Berlin, DE",Infra,0.39369
54,Xenia Chen,Staff AI Researcher,Home Office,Eng,0.385997


Model: hkunlp/instructor-large



Unnamed: 0,name,job_title,location,department,cosine_similarity
76,Alice O'Connor,Staff Frontend Developer,"Berlin, DE",AI,0.938484
67,Emma Xiao,Intern AI Researcher,"Berlin, DE",BizOps,0.935625
74,Mikhail Xiao,Intern ML Engineer,"Berlin, DE",Infra,0.906713
54,Xenia Chen,Staff AI Researcher,Home Office,Eng,0.905402
20,Bob Fernández,Intern ML Engineer,"Berlin, DE",CyberSec,0.903189


Model: Alibaba-NLP/gte-Qwen2-1.5B-instruct



Unnamed: 0,name,job_title,location,department,cosine_similarity
67,Emma Xiao,Intern AI Researcher,"Berlin, DE",BizOps,0.587796
76,Alice O'Connor,Staff Frontend Developer,"Berlin, DE",AI,0.540707
74,Mikhail Xiao,Intern ML Engineer,"Berlin, DE",Infra,0.511613
78,Fatima O'Connor,Junior Security Specialist,"Berlin, DE",Eng,0.508413
20,Bob Fernández,Intern ML Engineer,"Berlin, DE",CyberSec,0.498027




Question: Data scientists in remote, any country

Model: multi-qa-mpnet-base-dot-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
24,Alice Anderson,Lead Data Scientist,Home Office,Eng,0.637526
95,Juan Zhukov,Staff Data Scientist,"Москва, Россия",IT,0.627123
29,Ravi Martinez,Lead Data Scientist,"San Francisco, CA",R&D,0.621343
28,Victor O'Connor,Senior Data Scientist,Home Office,Finanzas,0.602148
40,Ursula Zhukov,Intern Data Scientist,"Paris, FR",Маркетинг,0.600548


Model: distiluse-base-multilingual-cased-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
95,Juan Zhukov,Staff Data Scientist,"Москва, Россия",IT,0.329103
24,Alice Anderson,Lead Data Scientist,Home Office,Eng,0.3073
29,Ravi Martinez,Lead Data Scientist,"San Francisco, CA",R&D,0.303833
19,Victor Hernandez,Intern Data Scientist,"Paris, FR",Eng,0.2981
77,Mikhail Novikov,Lead Data Scientist,"Paris, FR",BizOps,0.287495


Model: hkunlp/instructor-large



Unnamed: 0,name,job_title,location,department,cosine_similarity
19,Victor Hernandez,Intern Data Scientist,"Paris, FR",Eng,0.884478
95,Juan Zhukov,Staff Data Scientist,"Москва, Россия",IT,0.883679
40,Ursula Zhukov,Intern Data Scientist,"Paris, FR",Маркетинг,0.879339
24,Alice Anderson,Lead Data Scientist,Home Office,Eng,0.879332
3,Luis Novikov,Intern Data Scientist,"Berlin, DE",Маркетинг,0.878665


Model: Alibaba-NLP/gte-Qwen2-1.5B-instruct



Unnamed: 0,name,job_title,location,department,cosine_similarity
24,Alice Anderson,Lead Data Scientist,Home Office,Eng,0.45562
28,Victor O'Connor,Senior Data Scientist,Home Office,Finanzas,0.441091
19,Victor Hernandez,Intern Data Scientist,"Paris, FR",Eng,0.436641
29,Ravi Martinez,Lead Data Scientist,"San Francisco, CA",R&D,0.431831
77,Mikhail Novikov,Lead Data Scientist,"Paris, FR",BizOps,0.426506




Question: List of engineers in building 12?

Model: multi-qa-mpnet-base-dot-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
0,Bob Hernandez,Senior QA Engineer,"Berlin, DE",Infra,0.570905
48,Ursula Rodriguez,Intern Frontend Developer,HQ Building 3,R&D,0.57015
16,Alice Ivanov,Intern Cloud Architect,HQ Building 3,DevOps,0.563229
53,Tariq Diaz,Staff Frontend Developer,Home Office,IT,0.56192
10,Natalia Chen,Senior ML Engineer,Home Office,IT,0.561015


Model: distiluse-base-multilingual-cased-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
1,Zara Diaz,Junior DevOps Engineer,"Berlin, DE",R&D,0.287932
63,Quentin Gonzalez,Principal DevOps Engineer,Home Office,IT,0.286777
16,Alice Ivanov,Intern Cloud Architect,HQ Building 3,DevOps,0.286349
39,Victor Novikov,Lead ML Engineer,"New York, NY",R&D,0.283278
74,Mikhail Xiao,Intern ML Engineer,"Berlin, DE",Infra,0.281705


Model: hkunlp/instructor-large



Unnamed: 0,name,job_title,location,department,cosine_similarity
16,Alice Ivanov,Intern Cloud Architect,HQ Building 3,DevOps,0.863833
48,Ursula Rodriguez,Intern Frontend Developer,HQ Building 3,R&D,0.862739
44,Luis O'Connor,Staff DevOps Engineer,"San Francisco, CA",Eng,0.857472
27,George Kowalski,Intern Backend Developer,HQ Building 3,AI,0.856726
71,Xenia Chen,Intern DevOps Eng.,Home Office,Eng,0.855276


Model: Alibaba-NLP/gte-Qwen2-1.5B-instruct



Unnamed: 0,name,job_title,location,department,cosine_similarity
25,Dmitry Evans,Staff ML Engineer,"sao paulo, br",Eng,0.386276
27,George Kowalski,Intern Backend Developer,HQ Building 3,AI,0.367786
16,Alice Ivanov,Intern Cloud Architect,HQ Building 3,DevOps,0.346716
48,Ursula Rodriguez,Intern Frontend Developer,HQ Building 3,R&D,0.346178
54,Xenia Chen,Staff AI Researcher,Home Office,Eng,0.345992




Question: Can you find me a ML eng in US?

Model: multi-qa-mpnet-base-dot-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
25,Dmitry Evans,Staff ML Engineer,"sao paulo, br",Eng,0.623037
39,Victor Novikov,Lead ML Engineer,"New York, NY",R&D,0.614168
10,Natalia Chen,Senior ML Engineer,Home Office,IT,0.581409
92,Victor Diaz,Junior ML Engineer,Home Office,IT,0.571501
74,Mikhail Xiao,Intern ML Engineer,"Berlin, DE",Infra,0.563721


Model: distiluse-base-multilingual-cased-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
39,Victor Novikov,Lead ML Engineer,"New York, NY",R&D,0.223886
10,Natalia Chen,Senior ML Engineer,Home Office,IT,0.166094
92,Victor Diaz,Junior ML Engineer,Home Office,IT,0.165003
74,Mikhail Xiao,Intern ML Engineer,"Berlin, DE",Infra,0.157869
20,Bob Fernández,Intern ML Engineer,"Berlin, DE",CyberSec,0.140014


Model: hkunlp/instructor-large



Unnamed: 0,name,job_title,location,department,cosine_similarity
25,Dmitry Evans,Staff ML Engineer,"sao paulo, br",Eng,0.903506
39,Victor Novikov,Lead ML Engineer,"New York, NY",R&D,0.898488
74,Mikhail Xiao,Intern ML Engineer,"Berlin, DE",Infra,0.893446
20,Bob Fernández,Intern ML Engineer,"Berlin, DE",CyberSec,0.890745
99,Ursula Ivanov,Principal ML Engineer,"Paris, FR",CyberSec,0.888083


Model: Alibaba-NLP/gte-Qwen2-1.5B-instruct



Unnamed: 0,name,job_title,location,department,cosine_similarity
25,Dmitry Evans,Staff ML Engineer,"sao paulo, br",Eng,0.472214
39,Victor Novikov,Lead ML Engineer,"New York, NY",R&D,0.470081
92,Victor Diaz,Junior ML Engineer,Home Office,IT,0.437673
10,Natalia Chen,Senior ML Engineer,Home Office,IT,0.425731
74,Mikhail Xiao,Intern ML Engineer,"Berlin, DE",Infra,0.402949




Question: Who are the people working in IT at Paris HQ?

Model: multi-qa-mpnet-base-dot-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
5,Ravi Petrov,Staff DevOps Engineer,"Paris, FR",IT,0.685855
7,Paula Zhukov,Principal Backend Developer,"Paris, FR",Finanzas,0.66149
9,George Brown,Lead Security Specialist,"Paris, FR",IT,0.661308
79,Mikhail Martinez,Staff Cloud Architect,"Paris, FR",R&D,0.659702
18,George Johnson,Senior Product Manager,"Paris, FR",Infra,0.656114


Model: distiluse-base-multilingual-cased-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
5,Ravi Petrov,Staff DevOps Engineer,"Paris, FR",IT,0.535571
9,George Brown,Lead Security Specialist,"Paris, FR",IT,0.484569
87,Hiroshi Anderson,Principal Security Specialist,"Paris, FR",IT,0.470027
33,Dmitry Quintero,Principal Security Specialist,"Paris, FR",IT,0.470027
99,Ursula Ivanov,Principal ML Engineer,"Paris, FR",CyberSec,0.437655


Model: hkunlp/instructor-large



Unnamed: 0,name,job_title,location,department,cosine_similarity
5,Ravi Petrov,Staff DevOps Engineer,"Paris, FR",IT,0.924758
87,Hiroshi Anderson,Principal Security Specialist,"Paris, FR",IT,0.908136
33,Dmitry Quintero,Principal Security Specialist,"Paris, FR",IT,0.908136
68,Mikhail Sanchez,Principal Backend Developer,"Paris, FR",Data,0.907793
7,Paula Zhukov,Principal Backend Developer,"Paris, FR",Finanzas,0.906951


Model: Alibaba-NLP/gte-Qwen2-1.5B-instruct



Unnamed: 0,name,job_title,location,department,cosine_similarity
5,Ravi Petrov,Staff DevOps Engineer,"Paris, FR",IT,0.563153
14,Dmitry Fernández,Staff Security Specialist,"Paris, FR",Data,0.554134
33,Dmitry Quintero,Principal Security Specialist,"Paris, FR",IT,0.553567
87,Hiroshi Anderson,Principal Security Specialist,"Paris, FR",IT,0.553567
9,George Brown,Lead Security Specialist,"Paris, FR",IT,0.549948




Question: Finance folks in LatAm region?

Model: multi-qa-mpnet-base-dot-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
93,Alice Johnson,Staff Financial Analyst,"Madrid, España",Маркетинг,0.452102
97,Ravi Gonzalez,Junior Financial Analyst,"Москва, Россия",Infra,0.423492
64,Victor Quintero,Lead Financial Analyst,"Toronto, CA",Infra,0.421854
26,Xenia O'Connor,Lead Financial Analyst,Home Office,Data,0.416249
73,Isla Sanchez,Lead Backend Developer,"Sao Paulo, BR",BizOps,0.41272


Model: distiluse-base-multilingual-cased-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
64,Victor Quintero,Lead Financial Analyst,"Toronto, CA",Infra,0.283948
70,Carlos O'Connor,Intern Financial Analyst,"San Francisco, CA",Eng,0.259358
59,Xenia Kowalski,Staff Backend Developer,"San Francisco, CA",Finanzas,0.258026
75,Natalia Martinez,Intern Financial Analyst,"Paris, FR",Eng,0.250554
97,Ravi Gonzalez,Junior Financial Analyst,"Москва, Россия",Infra,0.242837


Model: hkunlp/instructor-large



Unnamed: 0,name,job_title,location,department,cosine_similarity
93,Alice Johnson,Staff Financial Analyst,"Madrid, España",Маркетинг,0.875576
73,Isla Sanchez,Lead Backend Developer,"Sao Paulo, BR",BizOps,0.868865
75,Natalia Martinez,Intern Financial Analyst,"Paris, FR",Eng,0.860552
98,Ursula Quintero,Intern Backend Developer,"Sao Paulo, Bx",Маркетинг,0.86055
97,Ravi Gonzalez,Junior Financial Analyst,"Москва, Россия",Infra,0.859874


Model: Alibaba-NLP/gte-Qwen2-1.5B-instruct



Unnamed: 0,name,job_title,location,department,cosine_similarity
59,Xenia Kowalski,Staff Backend Developer,"San Francisco, CA",Finanzas,0.440298
51,Ursula Brown,Staff Cloud Architect,"San Francisco, CA",Finanzas,0.432574
81,Carlos Taylor,Lead Product Manager,"London, UK",Finanzas,0.432074
73,Isla Sanchez,Lead Backend Developer,"Sao Paulo, BR",BizOps,0.42804
25,Dmitry Evans,Staff ML Engineer,"sao paulo, br",Eng,0.418558




Question: Empleados en el depto de ingeniería en Argentina

Model: multi-qa-mpnet-base-dot-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
98,Ursula Quintero,Intern Backend Developer,"Sao Paulo, Bx",Маркетинг,0.511509
25,Dmitry Evans,Staff ML Engineer,"sao paulo, br",Eng,0.482299
45,Ravi Anderson,Intern Frontend Developer,"Madrid, España",R&D,0.478646
66,Omar Petrov,Intern QA Engineer,"Москва, Россия",IT,0.466934
69,Sofia Chen,Intern AI Researcher,"Sao Paulo, BR",Eng,0.464996


Model: distiluse-base-multilingual-cased-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
66,Omar Petrov,Intern QA Engineer,"Москва, Россия",IT,0.411547
89,George Novikov,Senior DevOps Engineer,"Москва, Россия",IT,0.40975
92,Victor Diaz,Junior ML Engineer,Home Office,IT,0.393139
5,Ravi Petrov,Staff DevOps Engineer,"Paris, FR",IT,0.387251
63,Quentin Gonzalez,Principal DevOps Engineer,Home Office,IT,0.387213


Model: hkunlp/instructor-large



Unnamed: 0,name,job_title,location,department,cosine_similarity
93,Alice Johnson,Staff Financial Analyst,"Madrid, España",Маркетинг,0.890058
86,Dmitry Sanchez,Junior Cloud Architect,"Madrid, España",BizOps,0.884153
45,Ravi Anderson,Intern Frontend Developer,"Madrid, España",R&D,0.879663
25,Dmitry Evans,Staff ML Engineer,"sao paulo, br",Eng,0.874465
98,Ursula Quintero,Intern Backend Developer,"Sao Paulo, Bx",Маркетинг,0.871485


Model: Alibaba-NLP/gte-Qwen2-1.5B-instruct



Unnamed: 0,name,job_title,location,department,cosine_similarity
25,Dmitry Evans,Staff ML Engineer,"sao paulo, br",Eng,0.496991
66,Omar Petrov,Intern QA Engineer,"Москва, Россия",IT,0.416405
69,Sofia Chen,Intern AI Researcher,"Sao Paulo, BR",Eng,0.415654
23,Tariq Yamamoto,Staff AI Researcher,"Sao Paulo, BR",DevOps,0.395415
98,Ursula Quintero,Intern Backend Developer,"Sao Paulo, Bx",Маркетинг,0.390635




Question: Senior managers in tech, UK or Germany

Model: multi-qa-mpnet-base-dot-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
6,Mikhail Rodriguez,Senior DevOps Eng.,"London, UK",IT,0.663953
38,Ravi Anderson,Lead Frontend Developer,"London, UK",Маркетинг,0.658557
47,Emma O'Connor,Senior Cloud Architect,"Москва, Россия",Eng,0.647366
89,George Novikov,Senior DevOps Engineer,"Москва, Россия",IT,0.638126
60,Hiroshi Vega,Principal Product Manager,"Berlin, DE",Маркетинг,0.636184


Model: distiluse-base-multilingual-cased-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
6,Mikhail Rodriguez,Senior DevOps Eng.,"London, UK",IT,0.446703
32,Juan Brown,Principal DevOps Eng.,"London, UK",IT,0.358169
0,Bob Hernandez,Senior QA Engineer,"Berlin, DE",Infra,0.351226
83,Tariq Taylor,Principal QA Engineer,"London, UK",CyberSec,0.34978
89,George Novikov,Senior DevOps Engineer,"Москва, Россия",IT,0.34431


Model: hkunlp/instructor-large



Unnamed: 0,name,job_title,location,department,cosine_similarity
6,Mikhail Rodriguez,Senior DevOps Eng.,"London, UK",IT,0.903761
0,Bob Hernandez,Senior QA Engineer,"Berlin, DE",Infra,0.900284
60,Hiroshi Vega,Principal Product Manager,"Berlin, DE",Маркетинг,0.900214
81,Carlos Taylor,Lead Product Manager,"London, UK",Finanzas,0.898658
1,Zara Diaz,Junior DevOps Engineer,"Berlin, DE",R&D,0.891626


Model: Alibaba-NLP/gte-Qwen2-1.5B-instruct



Unnamed: 0,name,job_title,location,department,cosine_similarity
0,Bob Hernandez,Senior QA Engineer,"Berlin, DE",Infra,0.517212
60,Hiroshi Vega,Principal Product Manager,"Berlin, DE",Маркетинг,0.501513
18,George Johnson,Senior Product Manager,"Paris, FR",Infra,0.500984
78,Fatima O'Connor,Junior Security Specialist,"Berlin, DE",Eng,0.497779
6,Mikhail Rodriguez,Senior DevOps Eng.,"London, UK",IT,0.496546




Question: All ppl in eng, except remote

Model: multi-qa-mpnet-base-dot-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
80,Ravi Evans,Junior Frontend Developer,Home Office,Eng,0.382502
6,Mikhail Rodriguez,Senior DevOps Eng.,"London, UK",IT,0.377159
71,Xenia Chen,Intern DevOps Eng.,Home Office,Eng,0.36769
38,Ravi Anderson,Lead Frontend Developer,"London, UK",Маркетинг,0.364539
54,Xenia Chen,Staff AI Researcher,Home Office,Eng,0.342867


Model: distiluse-base-multilingual-cased-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
1,Zara Diaz,Junior DevOps Engineer,"Berlin, DE",R&D,0.088072
71,Xenia Chen,Intern DevOps Eng.,Home Office,Eng,0.086657
25,Dmitry Evans,Staff ML Engineer,"sao paulo, br",Eng,0.082883
5,Ravi Petrov,Staff DevOps Engineer,"Paris, FR",IT,0.082852
74,Mikhail Xiao,Intern ML Engineer,"Berlin, DE",Infra,0.082514


Model: hkunlp/instructor-large



Unnamed: 0,name,job_title,location,department,cosine_similarity
71,Xenia Chen,Intern DevOps Eng.,Home Office,Eng,0.856692
96,Juan Ivanov,Principal Software Engineer,"New York, NY",DevOps,0.843539
32,Juan Brown,Principal DevOps Eng.,"London, UK",IT,0.841739
25,Dmitry Evans,Staff ML Engineer,"sao paulo, br",Eng,0.841604
44,Luis O'Connor,Staff DevOps Engineer,"San Francisco, CA",Eng,0.840812


Model: Alibaba-NLP/gte-Qwen2-1.5B-instruct



Unnamed: 0,name,job_title,location,department,cosine_similarity
25,Dmitry Evans,Staff ML Engineer,"sao paulo, br",Eng,0.382148
71,Xenia Chen,Intern DevOps Eng.,Home Office,Eng,0.3719
54,Xenia Chen,Staff AI Researcher,Home Office,Eng,0.368938
32,Juan Brown,Principal DevOps Eng.,"London, UK",IT,0.350041
44,Luis O'Connor,Staff DevOps Engineer,"San Francisco, CA",Eng,0.347538




Question: Кто в Лондоне в отделе Финансов?

Model: multi-qa-mpnet-base-dot-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
41,Juan Petrov,Lead Backend Developer,"Москва, Россия",Data,0.448398
17,Juan Xiao,Junior Frontend Developer,"Москва, Россия",Infra,0.42044
38,Ravi Anderson,Lead Frontend Developer,"London, UK",Маркетинг,0.420399
35,Wei Ulyanov,Lead Backend Developer,"Москва, Россия",AI,0.419776
90,Katerina Brown,Senior Cloud Architect,"Москва, Россия",Data,0.412112


Model: distiluse-base-multilingual-cased-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
81,Carlos Taylor,Lead Product Manager,"London, UK",Finanzas,0.512708
2,Hiroshi Hernandez,Principal Backend Developer,"London, UK",Infra,0.395052
32,Juan Brown,Principal DevOps Eng.,"London, UK",IT,0.384867
6,Mikhail Rodriguez,Senior DevOps Eng.,"London, UK",IT,0.381171
37,Isla Chen,Junior Frontend Developer,"London, UK",R&D,0.375539


Model: hkunlp/instructor-large



Unnamed: 0,name,job_title,location,department,cosine_similarity
89,George Novikov,Senior DevOps Engineer,"Москва, Россия",IT,0.897267
95,Juan Zhukov,Staff Data Scientist,"Москва, Россия",IT,0.896272
41,Juan Petrov,Lead Backend Developer,"Москва, Россия",Data,0.895081
66,Omar Petrov,Intern QA Engineer,"Москва, Россия",IT,0.892049
57,Victor Ulyanov,Staff Software Engineer,"San Francisco, Cx",Маркетинг,0.89129


Model: Alibaba-NLP/gte-Qwen2-1.5B-instruct



Unnamed: 0,name,job_title,location,department,cosine_similarity
97,Ravi Gonzalez,Junior Financial Analyst,"Москва, Россия",Infra,0.496881
81,Carlos Taylor,Lead Product Manager,"London, UK",Finanzas,0.484941
93,Alice Johnson,Staff Financial Analyst,"Madrid, España",Маркетинг,0.473681
26,Xenia O'Connor,Lead Financial Analyst,Home Office,Data,0.455106
28,Victor O'Connor,Senior Data Scientist,Home Office,Finanzas,0.436814




Question: Are there any junior devs in Madrid?

Model: multi-qa-mpnet-base-dot-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
45,Ravi Anderson,Intern Frontend Developer,"Madrid, España",R&D,0.629108
86,Dmitry Sanchez,Junior Cloud Architect,"Madrid, España",BizOps,0.597054
17,Juan Xiao,Junior Frontend Developer,"Москва, Россия",Infra,0.562141
1,Zara Diaz,Junior DevOps Engineer,"Berlin, DE",R&D,0.553392
93,Alice Johnson,Staff Financial Analyst,"Madrid, España",Маркетинг,0.541805


Model: distiluse-base-multilingual-cased-v1



Unnamed: 0,name,job_title,location,department,cosine_similarity
45,Ravi Anderson,Intern Frontend Developer,"Madrid, España",R&D,0.389803
86,Dmitry Sanchez,Junior Cloud Architect,"Madrid, España",BizOps,0.38782
37,Isla Chen,Junior Frontend Developer,"London, UK",R&D,0.26369
1,Zara Diaz,Junior DevOps Engineer,"Berlin, DE",R&D,0.262512
93,Alice Johnson,Staff Financial Analyst,"Madrid, España",Маркетинг,0.259075


Model: hkunlp/instructor-large



Unnamed: 0,name,job_title,location,department,cosine_similarity
45,Ravi Anderson,Intern Frontend Developer,"Madrid, España",R&D,0.9258
86,Dmitry Sanchez,Junior Cloud Architect,"Madrid, España",BizOps,0.920603
93,Alice Johnson,Staff Financial Analyst,"Madrid, España",Маркетинг,0.894144
98,Ursula Quintero,Intern Backend Developer,"Sao Paulo, Bx",Маркетинг,0.885054
76,Alice O'Connor,Staff Frontend Developer,"Berlin, DE",AI,0.884418


Model: Alibaba-NLP/gte-Qwen2-1.5B-instruct



Unnamed: 0,name,job_title,location,department,cosine_similarity
86,Dmitry Sanchez,Junior Cloud Architect,"Madrid, España",BizOps,0.56186
45,Ravi Anderson,Intern Frontend Developer,"Madrid, España",R&D,0.526486
1,Zara Diaz,Junior DevOps Engineer,"Berlin, DE",R&D,0.495154
37,Isla Chen,Junior Frontend Developer,"London, UK",R&D,0.473749
17,Juan Xiao,Junior Frontend Developer,"Москва, Россия",Infra,0.456811






In [27]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
from IPython.display import display

# Encode the questions and the text column
question_embeddings = model_multi_qa.encode(questions)
text_embeddings = model_multi_qa.encode(df['text'].tolist())

# Compute cosine similarity
cosine_similarities = cosine_similarity(question_embeddings, text_embeddings)

# Retrieve top 5 matches for each question
top_matches = {}
for i, question in enumerate(questions):
    top_indices = np.argsort(cosine_similarities[i])[::-1][:5]
    top_similarities = cosine_similarities[i][top_indices]
    top_matches[question] = (df.iloc[top_indices], top_similarities)

# Display the results
for question, (matches, similarities) in top_matches.items():
    matches = matches.copy()
    matches['cosine_similarity'] = similarities
    print(f"Question: {question}\n")
    display(matches[['name', 'job_title', 'location', 'department', 'cosine_similarity']].style.set_table_styles(
        [{'selector': 'th', 'props': [('font-size', '120%'), ('text-align', 'center')]},
         {'selector': 'td', 'props': [('font-size', '110%'), ('text-align', 'center')]}]
    ).set_properties(**{'background-color': 'lightyellow'}, subset=['cosine_similarity']))
    print("\n" + "="*80 + "\n")

Question: Who are the senior engineers working in NY?



Unnamed: 0,name,job_title,location,department,cosine_similarity
79,Yusuf Vega,Staff Frontend Developer,"New York, NY",Eng,0.688557
6,Emma Diaz,Staff ML Engineer,"New York, NY",Маркетинг,0.682026
72,Luis Taylor,Lead QA Engineer,"New York, NY",Infra,0.667823
42,Hiroshi Anderson,Lead Backend Developer,"New York, NY",Data,0.654799
95,Fatima Kowalski,Senior Product Managex,"New York, NY",AI,0.637155




Question: Find me all data scientists in London HQ



Unnamed: 0,name,job_title,location,department,cosine_similarity
9,Hiroshi Taylor,Intern Data Scientist,"London, UK",R&D,0.730971
44,Juan Zhukov,Junior Data Scientist,"London, UK",Data,0.712025
56,Bob Petrov,Staff DevOps Engineer,"London, UK",Data,0.658398
61,Paula Diaz,Staff Software Engineer,HQ Building 3,Data,0.646586
60,Isla Ivanov,Intern AI Researcher,"London, UK",Eng,0.641826




Question: People in AI dept. working remotely?



Unnamed: 0,name,job_title,location,department,cosine_similarity
76,Isla Yamamoto,Staff AI Researcher,Home Office,Infra,0.672181
90,Omar Ulyanov,Lead AI Researcher,HQ Building 3,BizOps,0.619787
31,Ravi Wang,Junior AI Researcher,"Москва, Россия",Infra,0.615886
60,Isla Ivanov,Intern AI Researcher,"London, UK",Eng,0.609392
17,Emma Lopez,Junior AI Researcher,"New York, NY",Маркетинг,0.605038




Question: Is there any anaylst in Madrid?



Unnamed: 0,name,job_title,location,department,cosine_similarity
91,Sofia Sanchez,Junior Product Manager,"Madrid, España",Eng,0.567188
28,Wei Ivanov,Staff Data Scientist,"Madrid, España",CyberSec,0.522311
13,Tariq Zhukov,Staff Security Specialist,"Madrid, España",R&D,0.510996
62,Bob Fernández,Intern Cloud Architect,"Paris, FR",Data,0.467355
47,Wei Gonzalez,intern backend developer,"Paris, FR",IT,0.462667




Question: Show all engs in SF office



Unnamed: 0,name,job_title,location,department,cosine_similarity
46,Omar Wang,Staff DevOps Engineer,Home Office,Eng,0.456442
36,Bob Chen,Staff Software Engineer,HQ Building 3,D.,0.433521
79,Yusuf Vega,Staff Frontend Developer,"New York, NY",Eng,0.431767
61,Paula Diaz,Staff Software Engineer,HQ Building 3,Data,0.425926
88,Xenia Lopez,Junior Backend Developer,HQ Building 3,Eng,0.424512




Question: Lista de empleados en Finanzas en México



Unnamed: 0,name,job_title,location,department,cosine_similarity
91,Sofia Sanchez,Junior Product Manager,"Madrid, España",Eng,0.391753
13,Tariq Zhukov,Staff Security Specialist,"Madrid, España",R&D,0.375129
47,Wei Gonzalez,intern backend developer,"Paris, FR",IT,0.343714
28,Wei Ivanov,Staff Data Scientist,"Madrid, España",CyberSec,0.337789
89,Ravi Xiao,Lead Data Scientist,"Toronto, CA",Finanzas,0.333416




Question: Кто работает в отделе ИИ в Москве?



Unnamed: 0,name,job_title,location,department,cosine_similarity
26,Natalia Brown,Senior D. Scientist,"Москва, Россия",Infra,0.427641
57,Alice Quintero,Principal Frontend Developer,"Москва, Россия",DevOps,0.424796
23,George Ulyanov,Junior ML Engineer,"Москва, Россия",IT,0.419596
71,Sofia Anderson,Lead AI Researcher,"Москва, Россия",BizOps,0.416588
67,Yusuf Brown,Staff DevOps Engineer,"Москва, Россия",Маркетинг,0.416019




Question: Software devs in EU, any location



Unnamed: 0,name,job_title,location,department,cosine_similarity
32,Quentin Martinez,Staff Software Engineer,"Paris, FR",R&D,0.634955
51,Isla Sanchez,Staff Frontend Developer,"Berlin, DE",AI,0.6033
11,Juan Taylor,Sr. Frontend Developer,"Berlin, DE",IT,0.602169
92,George Petrov,Junior Frontend Developer,"Berlin, DE",R&D,0.59435
56,Bob Petrov,Staff DevOps Engineer,"London, UK",Data,0.586139




Question: Any Sr. Analyst in the Finance dept?



Unnamed: 0,name,job_title,location,department,cosine_similarity
25,Luis Hernandez,Staff Financial Analyst,"London, UK",IT,0.605363
55,Victor Ulyanov,Principal Financial Analyst,"Sao Paulo, BR",AI,0.591078
10,Yusuf O'Connor,Staff Financial Analyst,"New York, NY",Маркетинг,0.587632
83,Omar Sanchez,Intern Financial Analyst,"San Francisco, CA",it,0.582826
49,Ravi Lopez,Junior ML Engineer,"Sao Paulo, BR",R&D,0.5111




Question: Who’s working in AI at the Berlin office?



Unnamed: 0,name,job_title,location,department,cosine_similarity
50,Ravi Yamamoto,Staff AI Researcher,"Berlin, DE",Data,0.780474
59,Quentin Brown,Principal AI Researcher,"Berlin, DE",Data,0.763635
51,Isla Sanchez,Staff Frontend Developer,"Berlin, DE",AI,0.76357
37,Zara Quintero,Intern Data Scientist,"Berlin, DE",AI,0.748477
90,Omar Ulyanov,Lead AI Researcher,HQ Building 3,BizOps,0.710782




Question: Data scientists in remote, any country



Unnamed: 0,name,job_title,location,department,cosine_similarity
70,Victor Johnson,Principal Data Scientist,"sao paulo, br",Маркетинг,0.608857
9,Hiroshi Taylor,Intern Data Scientist,"London, UK",R&D,0.603742
28,Wei Ivanov,Staff Data Scientist,"Madrid, España",CyberSec,0.600839
26,Natalia Brown,Senior D. Scientist,"Москва, Россия",Infra,0.595021
93,Victor Kowalski,junior data scientist,"Москва, Россия",CyberSec,0.594399




Question: List of engineers in building 12?



Unnamed: 0,name,job_title,location,department,cosine_similarity
36,Bob Chen,Staff Software Engineer,HQ Building 3,D.,0.623717
66,Luis Ulyanov,Principal Frontend Developer,HQ Building 3,D.,0.600698
33,Omar Ulyanov,Principal Frontend Developer,HQ Building 3,Eng,0.597179
61,Paula Diaz,Staff Software Engineer,HQ Building 3,Data,0.594276
11,Juan Taylor,Sr. Frontend Developer,"Berlin, DE",IT,0.573058




Question: Can you find me a ML eng in US?



Unnamed: 0,name,job_title,location,department,cosine_similarity
23,George Ulyanov,Junior ML Engineer,"Москва, Россия",IT,0.617794
75,Alice Fernández,Lead ML Eng.,Home Office,Data,0.613012
85,Isla Brown,Lead ML Engineer,"San Francisco, CA",R&D,0.609355
84,Zara Xiao,Staff ML Engineer,"San Francisco, CA",Маркетинг,0.609101
63,Mikhail Brown,Principal ML Engineer,"Москва, Россия",CyberSec,0.605824




Question: Who are the people working in IT at Paris HQ?



Unnamed: 0,name,job_title,location,department,cosine_similarity
14,Ravi Diaz,Senior Backend Developer,"Paris, FR",IT,0.710749
47,Wei Gonzalez,intern backend developer,"Paris, FR",IT,0.688745
32,Quentin Martinez,Staff Software Engineer,"Paris, FR",R&D,0.682374
39,Juan O'Connor,Principal Backend Developer,"Paris, FR",CyberSec,0.655676
62,Bob Fernández,Intern Cloud Architect,"Paris, FR",Data,0.645719




Question: Finance folks in LatAm region?



Unnamed: 0,name,job_title,location,department,cosine_similarity
91,Sofia Sanchez,Junior Product Manager,"Madrid, España",Eng,0.415576
55,Victor Ulyanov,Principal Financial Analyst,"Sao Paulo, BR",AI,0.411434
83,Omar Sanchez,Intern Financial Analyst,"San Francisco, CA",it,0.389186
89,Ravi Xiao,Lead Data Scientist,"Toronto, CA",Finanzas,0.386087
64,Quentin Petrov,Staff QA Engineer,"Sao Paulo, BR",Infra,0.385668




Question: Empleados en el depto de ingeniería en Argentina



Unnamed: 0,name,job_title,location,department,cosine_similarity
91,Sofia Sanchez,Junior Product Manager,"Madrid, España",Eng,0.47796
45,Ravi Fernández,Staff ML Engineer,"Sao Paulo, BR",D.,0.475875
22,Isla Yamamoto,Intern Software Engineer,"Paris, FR",BizOps,0.471698
64,Quentin Petrov,Staff QA Engineer,"Sao Paulo, BR",Infra,0.457099
12,Omar Evans,Staff ML Engineer,"Paris, FR",маркетинг,0.454415




Question: Senior managers in tech, UK or Germany



Unnamed: 0,name,job_title,location,department,cosine_similarity
14,Ravi Diaz,Senior Backend Developer,"Paris, FR",IT,0.639809
7,Bob Sanchez,Senior Security Specialist,"London, UK",Маркетинг,0.637434
11,Juan Taylor,Sr. Frontend Developer,"Berlin, DE",IT,0.632364
32,Quentin Martinez,Staff Software Engineer,"Paris, FR",R&D,0.63231
98,Ravi Lopez,Senior ML Engineer,"Paris, FR",DevOps,0.62925




Question: All ppl in eng, except remote



Unnamed: 0,name,job_title,location,department,cosine_similarity
88,Xenia Lopez,Junior Backend Developer,HQ Building 3,Eng,0.38983
21,George Hernandez,Lead QA Engineer,Home Office (remote),DevOps,0.379902
74,Ursula Novikov,Staff QA Engineer,Home Office,Маркетинг,0.360302
46,Omar Wang,Staff DevOps Engineer,Home Office,Eng,0.358982
36,Bob Chen,Staff Software Engineer,HQ Building 3,D.,0.356168




Question: Кто в Лондоне в отделе Финансов?



Unnamed: 0,name,job_title,location,department,cosine_similarity
57,Alice Quintero,Principal Frontend Developer,"Москва, Россия",DevOps,0.423991
26,Natalia Brown,Senior D. Scientist,"Москва, Россия",Infra,0.412721
41,Luis Ivanov,Staff AI Researcher,"Paris, FR",Маркетинг,0.40104
93,Victor Kowalski,junior data scientist,"Москва, Россия",CyberSec,0.396991
97,Carlos Johnson,Staff Cloud Architect,"Москва, Россия",DevOps,0.390706




Question: Are there any junior devs in Madrid?



Unnamed: 0,name,job_title,location,department,cosine_similarity
91,Sofia Sanchez,Junior Product Manager,"Madrid, España",Eng,0.641094
28,Wei Ivanov,Staff Data Scientist,"Madrid, España",CyberSec,0.55518
82,Alice Chen,Junior DevOps Engineer,"Berlin, DE",Infra,0.547909
92,George Petrov,Junior Frontend Developer,"Berlin, DE",R&D,0.540904
88,Xenia Lopez,Junior Backend Developer,HQ Building 3,Eng,0.529776




