In [5]:
!pip install faiss-cpu




In [6]:
import pandas as pd
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss

# Step 1: Load the Dataset
# Ensure shl_assessments.csv is in your Colab working directory.
df = pd.read_csv("/content/shl_assessments.csv")

# Step 2: Combine relevant text fields into one string per assessment.
def combine_text(row):
    parts = []
    if pd.notna(row.get("Assessment Name", None)):
        parts.append(str(row["Assessment Name"]))
    if pd.notna(row.get("Description", "")):
        parts.append(str(row["Description"]))
    if pd.notna(row.get("Job Levels", "")):
        parts.append(str(row["Job Levels"]))
    if pd.notna(row.get("Test Type", "")):
        parts.append(str(row["Test Type"]))
    return " ".join(parts)

df["combined_text"] = df.apply(combine_text, axis=1)

# Step 3: Generate Embeddings using Sentence Transformers
model_name = "all-MiniLM-L6-v2"  # Efficient and effective for semantic similarity
model = SentenceTransformer(model_name)

# Encode the combined text for each assessment
texts = df["combined_text"].tolist()
embeddings = model.encode(texts, convert_to_numpy=True)

# Normalize embeddings for cosine similarity (L2 normalization)
faiss.normalize_L2(embeddings)

# Step 4: Build a FAISS Index for fast retrieval
dimension = embeddings.shape[1]
index = faiss.IndexFlatIP(dimension)  # Using inner product as cosine similarity after normalization
index.add(embeddings)
print(f"Indexed {index.ntotal} assessments.")

# Step 5: Define a function for query-based search
def search_assessments(query, top_k=10):
    # Encode the query into the same embedding space
    query_embedding = model.encode([query], convert_to_numpy=True)
    faiss.normalize_L2(query_embedding)


    # Search the FAISS index
    distances, indices = index.search(query_embedding, top_k)

    # Retrieve corresponding assessments from the DataFrame
    results = df.iloc[indices[0]].copy()
    results["similarity"] = distances[0]
    return results

# Step 6: Run an example query and print the results
query = "Looking to hire mid-level professionals who are proficient in Python, SQL and Java Script. Need an assessment package that can test all skills with max duration of 30 minutes"
results = search_assessments(query, top_k=10)
print("Top Recommendations:")
print(results[["Assessment Name", "similarity"]])


Indexed 525 assessments.
Top Recommendations:
                                       Assessment Name  similarity
268                                       Python (New)    0.562657
157                         Job Control Language (New)    0.549362
361         Virtual Assessment and Development Centers    0.516284
382                       Verify - Deductive Reasoning    0.513048
346                       Verify - Deductive Reasoning    0.513048
514  Technology Professional 8.0 Job Focused Assess...    0.510788
472                        Professional + 7.0 Solution    0.507895
360              Verify Interactive Process Monitoring    0.504287
387            Apprentice + 8.0 Job Focused Assessment    0.493981
473                      Professional + 7.1 (Americas)    0.492788


In [7]:
 # Display recommended assessments along with their URLs
pd.set_option('display.max_colwidth', None)
if "Detail URL" in df.columns:
    print(results[["Assessment Name", "Detail URL", "similarity"]])
elif "Link" in df.columns:
    print(results[["Assessment Name", "Link", "similarity"]])
elif "Assessment URL" in df.columns:
    print(results[["Assessment Name", "Assessment URL", "similarity"]])
else:
    print("URL column not found in dataset. Please check the column name.")


                                        Assessment Name  \
268                                        Python (New)   
157                          Job Control Language (New)   
361          Virtual Assessment and Development Centers   
382                        Verify - Deductive Reasoning   
346                        Verify - Deductive Reasoning   
514  Technology Professional 8.0 Job Focused Assessment   
472                         Professional + 7.0 Solution   
360               Verify Interactive Process Monitoring   
387             Apprentice + 8.0 Job Focused Assessment   
473                       Professional + 7.1 (Americas)   

                                                                                                          Detail URL  \
268                                          https://www.shl.com/solutions/products/product-catalog/view/python-new/   
157                            https://www.shl.com/solutions/products/product-catalog/view/job-control-languag

In [8]:
!pip install Gradio

Collecting Gradio
  Downloading gradio-5.24.0-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<25.0,>=22.0 (from Gradio)
  Downloading aiofiles-24.1.0-py3-none-any.whl.metadata (10 kB)
Collecting fastapi<1.0,>=0.115.2 (from Gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from Gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from Gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from Gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from Gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from Gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from Gradio)
  Downloading ruff-0.11.5-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 (

In [9]:
import gradio as gr

# Gradio-compatible function
def gradio_search(query, top_k):
    if not query.strip():
        return []

    results = search_assessments(query, top_k=top_k)
    display_data = []

    for _, row in results.iterrows():
        name = row["Assessment Name"]
        score = f"{row['similarity']:.4f}"
        url = row.get("Detail URL") or row.get("Link") or row.get("Assessment URL") or "No URL found"
        remote=row.get("Adaptive/IRT")
        adaptive=row.get("Remote Testing")
        display_data.append([name, score, url,remote,adaptive])

    return display_data


gr.Interface(
    fn=gradio_search,
    inputs=[
        gr.Textbox(lines=3, placeholder="e.g., Python, SQL, mid-level, 30 minutes", label="Query"),
        gr.Slider(minimum=1, maximum=20, value=10, step=1, label="Top K Results"),
    ],
    outputs=gr.Dataframe(
        headers=[
            "Assessment Name",
            "Similarity Score",
            "URL",
            "Remote Testing",
            "Adaptive/IRT"
        ],
        label="Top Recommendations"
    ),
    title="🔍 SHL Assessment Recommendation Tool",
    description="Enter your hiring needs and get smart assessment recommendations.",
).launch(share=True)

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://92297dcc00af36d7a4.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




In [None]:
!pip install fastapi uvicorn nest-asyncio

import nest_asyncio
import uvicorn
from fastapi import FastAPI, Query
from pydantic import BaseModel
from typing import List

# Allow FastAPI to run inside Jupyter/Colab
nest_asyncio.apply()

app = FastAPI(title="SHL Assessment Recommender API")

class QueryRequest(BaseModel):
    query: str
    top_k: int = 10

@app.post("/recommend")
def recommend_assessments(req: QueryRequest):
    query = req.query
    top_k = req.top_k

    if not query.strip():
        return {"error": "Empty query"}

    results = search_assessments(query, top_k)
    response = []
    for _, row in results.iterrows():
        item = {
            "Assessment Name": row["Assessment Name"],
            "Similarity Score": float(f"{row['similarity']:.4f}"),
            "URL": row.get("Detail URL") or row.get("Link") or row.get("Assessment URL") or "N/A"
        }
        response.append(item)

    return {"results": response}

# Run the API (hosted locally inside Colab)
uvicorn.run(app, host="0.0.0.0", port=8000)




INFO:     Started server process [441]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit)


INFO:     172.31.62.201:0 - "GET /theme.css?v=63194d3741d384f9f85db890247b6c0ef9e7abac0f297f40a15c59fe4baba916 HTTP/1.1" 200 OK
INFO:     172.31.62.201:0 - "GET /static/fonts/IBMPlexMono/IBMPlexMono-Regular.woff2 HTTP/1.1" 200 OK
INFO:     172.31.62.201:0 - "GET /static/fonts/ui-sans-serif/ui-sans-serif-Regular.woff2 HTTP/1.1" 404 Not Found
INFO:     172.31.62.201:0 - "GET /static/fonts/IBMPlexMono/IBMPlexMono-Bold.woff2 HTTP/1.1" 200 OK
INFO:     172.31.62.201:0 - "GET /static/fonts/ui-sans-serif/ui-sans-serif-Bold.woff2 HTTP/1.1" 404 Not Found
INFO:     172.31.62.201:0 - "GET /static/fonts/system-ui/system-ui-Bold.woff2 HTTP/1.1" 404 Not Found
INFO:     172.31.62.201:0 - "GET /static/fonts/system-ui/system-ui-Regular.woff2 HTTP/1.1" 404 Not Found
INFO:     172.31.37.205:0 - "POST /gradio_api/queue/join HTTP/1.1" 200 OK
INFO:     172.31.37.205:0 - "GET /gradio_api/queue/data?session_hash=dxb0etlktw9 HTTP/1.1" 200 OK
