In [1]:
import torch
torch.cuda.is_available()

True

In [None]:
hr_knowledge = [

"Human Resources professionals are responsible for recruitment, onboarding, and employee relations.",
"HR departments manage payroll, benefits administration, and compliance with labor laws.",
"Human Resources teams support performance evaluation and workforce planning.",
"HR professionals handle conflict resolution and employee engagement initiatives.",
"Talent acquisition is a core function of Human Resources.",
"HR specialists often coordinate training and professional development programs.",
"Human Resources supports organizational culture and diversity initiatives.",
"HR roles involve maintaining employee records and ensuring policy compliance.",
"Recruitment and hiring are central responsibilities in HR positions.",
"HR professionals work closely with management to support strategic staffing decisions.",
"Key skills for HR professionals include communication, interpersonal skills, and organizational ability.",

"HR roles require knowledge of employment law and workplace regulations.",
"Strong problem-solving and conflict management skills are important in HR.",
"Human Resources professionals need attention to detail and confidentiality.",
"HR positions require administrative and coordination skills.",
"People management and employee support skills are essential in HR careers.",

"Entry-level HR roles include HR Assistant, HR Coordinator, and Recruiting Intern.",
"Aspiring HR professionals often start in talent acquisition or people operations roles.",
"Internships in recruitment or employee engagement are common entry paths into HR.",
"HR assistants support senior HR managers with administrative tasks.",
"People Operations roles are closely related to Human Resources functions.",

"Job titles such as HR Generalist, HR Assistant, and Recruiter are directly related to Human Resources.",
"People Operations Specialist roles are often equivalent to HR positions.",
"Talent Acquisition Specialist is a common HR-related title.",
"Employee Relations Coordinator works within the HR function.",
"Compensation and Benefits Analyst is part of Human Resources."
]


In [None]:
!pip install sentence-transformers faiss-cpu
!pip install -q \
    transformers>=4.38.0 \
    sentence-transformers \
    faiss-cpu \
    accelerate \
    bitsandbytes




In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

print("CUDA available:", torch.cuda.is_available())
print("Device:", torch.cuda.get_device_name(0))

model_name = "Qwen/Qwen2.5-7B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_name)

model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.float16
)

model.eval()
print("Generator model loaded.")


CUDA available: False


AssertionError: Torch not compiled with CUDA enabled

In [None]:
import pandas as pd
from google.colab import drive

drive.mount('/content/drive')
df = pd.read_csv("/content/drive/MyDrive/Apziva/ProjectC.csv")
df = df.drop_duplicates(subset=['job_title'])
df.head()

In [None]:
from sentence_transformers import SentenceTransformer
import numpy as np
import faiss

embed_model = SentenceTransformer("all-MiniLM-L6-v2")

doc_embeddings = embed_model.encode(hr_knowledge)
doc_embeddings = np.array(doc_embeddings).astype("float32")

index = faiss.IndexFlatL2(doc_embeddings.shape[1])
index.add(doc_embeddings)

print("Vector index built.")
print("Total documents indexed:", len(hr_knowledge))


In [None]:
def retrieve_context(query, k=3):
    query_embedding = embed_model.encode([query]).astype("float32")
    distances, indices = index.search(query_embedding, k)
    return [hr_knowledge[i] for i in indices[0]]

def build_equivalence_prompt(title_a, title_b):
    prompt = f"""
You are an expert HR recruiter.

Determine whether the following two job titles refer to the same or nearly the same role.

Consider:
- Abbreviations (e.g., HR = Human Resources)
- Synonyms
- Minor wording differences
- Singular/plural differences

If they describe essentially the same job function in practice, answer Yes.
Otherwise answer No.

IMPORTANT:
- Output ONLY one word: Yes or No.
- Do not explain.

A: {title_a}
B: {title_b}

Answer:
"""
    return prompt

def build_rag_pair_prompt(title_a, title_b, target_role):

    context_docs = retrieve_context(target_role, k=10)
    context = "\n".join(context_docs)

    prompt = f"""
You are an expert HR recruiter.

Target Role:
{target_role}

Relevant Background Information:
{context}

Compare the following job titles:

A: {title_a}
B: {title_b}

Which title is more suitable?
Answer guidance:
- The first line must be one of: A or B
- Choose A if it is more relevant to the target role.
- Choose B if it is more relevant to the target role.
- Even if both are weakly relevant, choose the more suitable one.
- Output ONLY one character: A or B.
- Do not default to A; follow the suitability logic strictly.
Answer:
"""
    return prompt

def generate_answer(prompt):
    inputs = tokenizer(prompt, return_tensors="pt").to("cuda")

    outputs = model.generate(
        **inputs,
        max_new_tokens=3,
        do_sample=False,
        temperature=0.0
    )

    generated_tokens = outputs[0][inputs["input_ids"].shape[-1]:]
    result = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
    return result

def rag_pairwise_winner(title_a, title_b):
    target_role = "Aspiring Human Resources"
    eq_prompt = build_equivalence_prompt(title_a, title_b)
    eq_result = generate_answer(eq_prompt)

    if eq_result.startswith("Yes"):
        return "Tie"

    comp_prompt = build_rag_pair_prompt(title_a, title_b, target_role)
    comp_result = generate_answer(comp_prompt)

    if comp_result.startswith("A"):
        return "A：" + title_a
    elif comp_result.startswith("B"):
        return "B：" + title_b
    else:
        # fallback 安全机制
        return "Error"

In [None]:
title_a = "HR Assistant"
title_b = "Human Resource Assistant"

winner = rag_pairwise_winner(title_a, title_b)
print("Winner:", winner)

title_a = "English Teacher"
title_b = "Marketing Data Analyst"

winner = rag_pairwise_winner(title_a, title_b)
print("Winner:", winner)

title_a = "People Operations Intern"
title_b = "Marketing Data Analyst"

winner = rag_pairwise_winner(title_a, title_b)
print("Winner:", winner)

In [None]:
import pandas as pd
from itertools import combinations

def rag_pairwise_sort(df, title_col="job_title"):

    titles = df[title_col].tolist()
    n = len(titles)
    scores = [0] * n
    pairwise_results = []

    for i in range(n):
        print(f"Processing {i+1}/{n} titles...")
        for j in range(i+1, n):

            winner = rag_pairwise_winner(titles[i], titles[j])

            pairwise_results.append({
                "A": titles[i],
                "B": titles[j],
                "winner": winner
            })

            if winner == titles[i]:
                scores[i] += 1
            elif winner == titles[j]:
                scores[j] += 1
            else:
                scores[i] += 0.5
                scores[j] += 0.5

    sorted_df = pd.DataFrame({
        "job_title": titles,
        "score": scores
    }).sort_values(by="score", ascending=False).reset_index(drop=True)

    pairwise_df = pd.DataFrame(pairwise_results)

    return sorted_df, pairwise_df

sorted_df, pairwise_df = rag_pairwise_sort(df, title_col="job_title")
print(sorted_df)


Processing 1/52 titles...
Processing 2/52 titles...
Processing 3/52 titles...
Processing 4/52 titles...
Processing 5/52 titles...
Processing 6/52 titles...
Processing 7/52 titles...
Processing 8/52 titles...
Processing 9/52 titles...
Processing 10/52 titles...
Processing 11/52 titles...
Processing 12/52 titles...
Processing 13/52 titles...
Processing 14/52 titles...
Processing 15/52 titles...
Processing 16/52 titles...
Processing 17/52 titles...
Processing 18/52 titles...
Processing 19/52 titles...
Processing 20/52 titles...
Processing 21/52 titles...
Processing 22/52 titles...
Processing 23/52 titles...
Processing 24/52 titles...
Processing 25/52 titles...
Processing 26/52 titles...
Processing 27/52 titles...
Processing 28/52 titles...
Processing 29/52 titles...
Processing 30/52 titles...
Processing 31/52 titles...
Processing 32/52 titles...
Processing 33/52 titles...
Processing 34/52 titles...
Processing 35/52 titles...
Processing 36/52 titles...
Processing 37/52 titles...
Processing

In [None]:
print(sorted_df)

NameError: name 'sorted_df' is not defined