In [None]:
import numpy as np
import polars as pl
import torch
from huggingface_hub import login
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig

from hiring_cv_bias.config import HARD_SOFT_SKILLS
from hiring_cv_bias.hard_soft_skills_labelling.utils import batch_classify_skills
from hiring_cv_bias.utils import load_data

login(token="[YOUR_TOKEN]")

In [None]:
cv_skills = load_data("/kaggle/input/skills-cleaned/Skills_cleaned.csv")
skills = (
    cv_skills.filter(pl.col("Skill_Type") == "Professional_Skill")["Skill"]
    .unique()
    .to_list()
)

In [None]:
bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=torch.float16,
    bnb_4bit_quant_type="nf4",
)

model_name = "meta-llama/Llama-3.1-8B-Instruct"

tokenizer = AutoTokenizer.from_pretrained(model_name, padding_side="left")
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    device_map="auto",
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    quantization_config=bnb_config,
)

In [None]:
hard_soft_labels = batch_classify_skills(skills, batch_size=64)

In [None]:
(
    hard_soft_labels.count("Hard"),
    hard_soft_labels.count("Soft"),
    hard_soft_labels.count("Unknown"),
)

In [None]:
output_df = pl.DataFrame({"Skill": skills, "label": hard_soft_labels})
output_df.write_csv("hard_soft_skills.csv")

### Estimating Accuracy

In this section, we estimated the model's accuracy using a sample of 100 skills: 50 predicted as hard skills and 50 predicted as soft skills.  
We then compared the model's predictions with evaluations provided by ChatGPT.
From this comparison, we derived separate accuracy estimates for hard and soft skills, as well as an overall accuracy score.

In [None]:
hard_soft_df = pl.read_csv(HARD_SOFT_SKILLS)
print(
    hard_soft_df["label"]
    .value_counts()
    .filter(pl.col("label").is_in("Hard", "Soft", "Unknown"))
)

In [None]:
hard_skills = (
    hard_soft_df.filter(pl.col("label") == "Hard").sample(50, shuffle=True).to_numpy()
)
soft_skills = (
    hard_soft_df.filter(pl.col("label") == "Soft").sample(50, shuffle=True).to_numpy()
)

skills_sample = np.concatenate((hard_skills, soft_skills))
np.random.shuffle(skills_sample)

ACCURACY 3-examples model:

-HARD: 49/50

-SOFT: 18/50

-OVERALL: 67/100

In [None]:
skills_sample