In [1]:
!pip install faiss-cpu

Collecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (31.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m31.4/31.4 MB[0m [31m55.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: faiss-cpu
Successfully installed faiss-cpu-1.12.0


In [2]:
import time
import numpy as np
import faiss
from sentence_transformers import SentenceTransformer

In [3]:
# ------------------------------
# OFFLINE JOB: Build embeddings & index
# ------------------------------

# Example job dataset
jobs = [
    "software engineer", "doctor", "teacher", "farmer", "driver",
    "AI researcher", "data scientist", "graphic designer", "lawyer", "accountant",
    "machine learning engineer", "web developer", "UX designer", "nurse", "project manager"
]

In [4]:
# Load embedding model (offline job: heavy task, runs once)
print("Loading model...")
model = SentenceTransformer("all-MiniLM-L6-v2")

Loading model...


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [5]:
# Generate embeddings (offline preprocessing step)
print("Generating embeddings...")
start = time.time()
job_vectors = model.encode(jobs, convert_to_numpy=True).astype("float32")
end = time.time()
print("=== OFFLINE JOB ===")
print(f"Generated embeddings + built dataset in {end - start:.4f} seconds\n")


Generating embeddings...
=== OFFLINE JOB ===
Generated embeddings + built dataset in 0.1875 seconds



In [6]:
# Create FAISS index
dim = job_vectors.shape[1]
index = faiss.IndexFlatL2(dim)
index.add(job_vectors)

In [7]:
# ------------------------------
# ONLINE JOB: User query lookup
# ------------------------------

# Example query
query = "AI career opportunities"

# Encode query (very fast with model already loaded)
query_vector = model.encode([query], convert_to_numpy=True).astype("float32")

# Perform search
start = time.time()
distances, indices = index.search(query_vector, 5)
end = time.time()

print("=== ONLINE JOB ===")
print(f"Query: {query}")
print(f"Processed in {end - start:.6f} seconds\n")

print("Top matches:")
for i, idx in enumerate(indices[0]):
    print(f"{i+1}. {jobs[idx]} (distance: {distances[0][i]:.4f})")


=== ONLINE JOB ===
Query: AI career opportunities
Processed in 0.009798 seconds

Top matches:
1. AI researcher (distance: 0.4872)
2. machine learning engineer (distance: 0.9916)
3. software engineer (distance: 0.9992)
4. data scientist (distance: 1.1588)
5. graphic designer (distance: 1.3038)


In [8]:
# ------------------------------
# ONLINE JOB: User query lookup
# ------------------------------

# Example query
query = "graphic designer"

# Encode query (very fast with model already loaded)
query_vector = model.encode([query], convert_to_numpy=True).astype("float32")

# Perform search
start = time.time()
distances, indices = index.search(query_vector, 5)
end = time.time()

print("=== ONLINE JOB ===")
print(f"Query: {query}")
print(f"Processed in {end - start:.6f} seconds\n")

print("Top matches:")
for i, idx in enumerate(indices[0]):
    print(f"{i+1}. {jobs[idx]} (distance: {distances[0][i]:.4f})")


=== ONLINE JOB ===
Query: graphic designer
Processed in 0.000203 seconds

Top matches:
1. graphic designer (distance: 0.0000)
2. UX designer (distance: 0.5897)
3. software engineer (distance: 0.8413)
4. web developer (distance: 0.9574)
5. data scientist (distance: 1.0264)
