In [1]:
import torch, gc
gc.collect()
torch.cuda.empty_cache()


In [2]:
import spacy

# load model
nlp = spacy.load("en_core_web_sm")

text = "Hello world. This is a test sentence."

# tokenize
doc = nlp(text)
tokens = [token.text for token in doc]

print("spaCy result:", tokens)

import nltk
#  nltk.download("wordnet")
#  nltk.download("omw-1.4") 


spaCy result: ['Hello', 'world', '.', 'This', 'is', 'a', 'test', 'sentence', '.']


In [3]:
# Import all necessary libraries
import pandas as pd
import numpy as np
import torch
import os
import sys
from datasets import Dataset
from transformers import (
    AutoTokenizer, AutoModelForCausalLM, 
    BitsAndBytesConfig, TrainingArguments,
    Trainer, DataCollatorForLanguageModeling
)
import nltk
from nltk.corpus import wordnet
from nltk.tokenize import word_tokenize
import random
import nlpaug.augmenter.word as naw
import random
import re
from peft import prepare_model_for_kbit_training, get_peft_model, LoraConfig, PeftModel
import gradio as gr
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from transformers import AutoModelForSequenceClassification
from sklearn.metrics import classification_report
# Set constant
MODEL_PATH = "./gender-bias-detector"
MODEL_NAME = "Qwen/Qwen1.5-1.8B"
TOKEN = "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"

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
# Creating a unified quantitative configuration
def get_bnb_config():
    return BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
        bnb_4bit_compute_dtype=torch.bfloat16,
        load_in_8bit_fp32_cpu_offload=True 
    )

In [5]:

def build_fewshot_prompt(user_sentence: str) -> str:
    cot_examples = [
        (
            "Women belong in the kitchen.",
            "Step 1: This sentence states 'Women belong in the kitchen.'\n"
            "Step 2: It enforces a gender stereotype suggesting women should stay in domestic roles.\n"
            "Step 3: This is a clear example of explicit bias.\n"
            "Answer: Explicit"
        ),
        (
            "Women are more nurturing than men.",
            "Step 1: The sentence claims 'Women are more nurturing than men.'\n"
            "Step 2: It implies a generalization about women’s behavior.\n"
            "Step 3: This is an implicit stereotype.\n"
            "Answer: Implicit"
        ),
        (
            "He earned the job through hard work.",
            "Step 1: The sentence says 'He earned the job through hard work.'\n"
            "Step 2: It contains no mention of gender or stereotype.\n"
            "Step 3: This is a neutral statement.\n"
            "Answer: Non"
        ),
    ]
    
    # Splicing CoT example
    fewshot = "\n\n".join(
        f"Sentence: {sent}\nReasoning:\n{reason}"
        for sent, reason in cot_examples
    )
    
    return (
        "You are a gender bias classifier. For each sentence, follow the reasoning steps before predicting.\n"
        "Only output one line in the format: Answer: <label>, where <label> is one of:\n"
        "- Explicit\n- Implicit\n- Non\n\n"
        "If none of these three labels apply, output Answer: Non.\n"
        "Do not output anything else.\n\n"
        f"{fewshot}\n\n"
        f"Sentence: {user_sentence.strip()}\nReasoning:\n"
    )

In [6]:
class WordNetAugmenter:
    def __init__(self, spacy_model="en_core_web_sm"):
        self.nlp = spacy.load(spacy_model)

    def get_synonyms(self, word, pos=None):
        synonyms = set()
        for syn in wordnet.synsets(word, pos=pos):
            for lemma in syn.lemmas():
                synonym = lemma.name().replace("_", " ").lower()
                if synonym != word.lower():
                    synonyms.add(synonym)
        return list(synonyms)

    def spacy_pos_to_wordnet(self, spacy_pos):
        if spacy_pos.startswith("N"):
            return wordnet.NOUN
        elif spacy_pos.startswith("V"):
            return wordnet.VERB
        elif spacy_pos.startswith("J"):
            return wordnet.ADJ
        elif spacy_pos.startswith("R"):
            return wordnet.ADV
        else:
            return None

    def augment(self, text, n_replace=1):
        doc = self.nlp(text)
        words = [token.text for token in doc]
        candidates = []

        for i, token in enumerate(doc):
            if not token.is_alpha:
                continue
            wn_pos = self.spacy_pos_to_wordnet(token.tag_)
            if wn_pos and wordnet.synsets(token.text, pos=wn_pos):
                candidates.append((i, token.text, wn_pos))

        if not candidates:
            return text

        random.shuffle(candidates)
        replaced = 0
        for idx, word, pos in candidates:
            synonyms = self.get_synonyms(word, pos)
            if synonyms:
                words[idx] = random.choice(synonyms)
                replaced += 1
            if replaced >= n_replace:
                break

        return " ".join(words)

In [7]:

wordnet_augmenter = None 
# Define enhancement function
def augment_text(text, label, augment_rate=0.3):
    global wordnet_augmenter
    if label == "implicit" and random.random() < augment_rate and wordnet_augmenter:
        return wordnet_augmenter.augment(text)
    else:
        return text

# %%
def train_model(debug_mode=True):
    """Train the model and save it to"""
    # load dataset
    try:
        df = pd.read_csv("dataset.csv")
        print("Loading a dataset from a CSV file")
        df["label"] = df["label"].str.lower().str.replace("-", "_").str.strip()
        df["label"] = df["label"].replace({
            "implicit_sexist": "implicit",
            "explicit_sexist": "explicit",
            "non_sexist": "non",
            "non-sexist": "non",
            "nonsexist": "non",
            "sexist_implicit": "implicit",
            "sexist_explicit": "explicit"
        })

     #   if debug_mode:
      #      print(" Debug mode on: only 50 data points are sampled for each category")
     #       df = df.groupby("label").apply(lambda x: x.sample(n=30, random_state=42)).reset_index(drop=True)
        
        global wordnet_augmenter
        wordnet_augmenter = WordNetAugmenter()
        

    except Exception as e:
        print(f"Unable to load dataset: {e}")
        print("Create a sample dataset")
        data = {
            'text': ["She got the promotion because she's attractive."] * 1000 + 
                    ["Women belong in the kitchen"] * 1000 + 
                    ["He earned the promotion through hard work"] * 1000,
            'label': ["implicit"] * 1000 + 
                     ["explicit"] * 1000 + 
                     ["non"] * 1000
        }
        df = pd.DataFrame(data)
        
    # Application enhancement: After group sampling, enhance the implicit class
    df["text"] = df.apply(lambda row: augment_text(row["text"], row["label"]), axis=1)

    # Creating the Hugging Face dataset
    dataset = Dataset.from_pandas(df)
    
    # check dataset
    print(f"Data set size: {len(dataset)}")
    print(f"Label distribution:\n{df['label'].value_counts()}")
    
    # split dataset
    split_dataset = dataset.train_test_split(test_size=0.2, seed=42)
    train_dataset = split_dataset["train"]
    eval_dataset = split_dataset["test"]
    
    # load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(
        MODEL_NAME,
        trust_remote_code=True,
        token=TOKEN
    )
    tokenizer.pad_token = tokenizer.eos_token
    tokenizer.padding_side = "right"  
    
    # data progress
    def build_prompt(example):
        prompt = build_fewshot_prompt(example["text"])
        # Update label mapping
        label_mapping = {
            "implicit": "implicit",
            "explicit": "explicit",
            "non": "non",
        }
        
        # Get the label 
        target_label = str(example["label"]).lower().strip()
        full_text = prompt + f"Answer: {target_label.capitalize()}{tokenizer.eos_token}"

        tokenized = tokenizer(full_text, truncation=True, max_length=900, padding="max_length")
        input_ids = tokenized["input_ids"]

        prompt_len = len(tokenizer(prompt, add_special_tokens=False)["input_ids"])
        labels = [-100] * prompt_len + input_ids[prompt_len:]
        labels = labels[:900]
        if len(labels) < 900:
            labels += [-100] * (900 - len(labels))

        tokenized["labels"] = labels
        return tokenized

    # Application prompt project
    tokenized_train = train_dataset.map(build_prompt, remove_columns=['text', 'label'])
    tokenized_eval = eval_dataset.map(build_prompt, remove_columns=['text', 'label'])
    
    # Use a unified quantization configuration
    bnb_config = get_bnb_config()

    model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        device_map={"": 0},
        trust_remote_code=True,
        quantization_config=bnb_config,
        token=TOKEN
    )

    # LoRA configuration
    model = prepare_model_for_kbit_training(model)
    lora_config = LoraConfig(
        r=16,
        lora_alpha=32,
        target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
        lora_dropout=0.05,
        bias="none",
        task_type="CAUSAL_LM"
    )
    model = get_peft_model(model, lora_config)
    
    # Print trainable parameters
    model.print_trainable_parameters()
    
    # training configuration
    training_args = TrainingArguments(
        output_dir=MODEL_PATH,
        per_device_train_batch_size=4,
        per_device_eval_batch_size=4,
        gradient_accumulation_steps=2,
        eval_strategy="epoch",
        logging_steps=20,
        num_train_epochs=3,
        save_strategy="epoch",
        learning_rate=2e-4,
        bf16=False,
        fp16=True,
        optim="paged_adamw_8bit",
        load_best_model_at_end=True,
        metric_for_best_model="eval_loss",
        greater_is_better=False,
        report_to="none",
        save_total_limit=2
    )

    data_collator = DataCollatorForLanguageModeling(
        tokenizer=tokenizer,
        mlm=False
    )

    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=tokenized_train,
        eval_dataset=tokenized_eval,
        tokenizer=tokenizer,
        data_collator=data_collator,
        
    )
    
    # train model
    print("start train model...")
    trainer.train()




    def extract_answer(txt):
        import re
        m = re.findall(r"answer:\s*(explicit|implicit|non)", txt, re.I)
        return (m[-1] if m else "non").lower()

    predictions = []
    true_labels = []


    model.eval()
    for example in eval_dataset:
        prompt = build_fewshot_prompt(example["text"])
        inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=900).to("cuda")
        with torch.no_grad():
            output = model.generate(
                **inputs,
                max_new_tokens=16,
                temperature=0.0,
                do_sample=False,
                eos_token_id=tokenizer.eos_token_id,
                pad_token_id=tokenizer.eos_token_id,
                num_return_sequences=1,
                return_dict_in_generate=False
            )

            
  
        

        # Clean up punctuation and trailing spaces
        
        decoded = tokenizer.decode(output[0], skip_special_tokens=True).strip()
        label_candidate = extract_answer(decoded)
        
        predictions.append(label_candidate)
        true_labels.append(example["label"])

    #    print(f" model result: {decoded}")
    


    eval_results = trainer.evaluate()
    print("Verify assessment results:")
    for k, v in eval_results.items():
        if isinstance(v, (float, int)):
            print(f"{k}: {v:.4f}")
        else:
            print(f"{k}: {v}")



    # save model
    model.save_pretrained(MODEL_PATH)
    tokenizer.save_pretrained(MODEL_PATH)
    print(f"Training complete! Model saved to {MODEL_PATH}")
    
    # Evaluate model performance
    print("Evaluate model performance...")
    eval_results = trainer.evaluate()
    print(f"Validation loss: {eval_results['eval_loss']}")

     # Clean memory
    del model
    del trainer
    torch.cuda.empty_cache()

In [8]:
# %%
# Uncomment the following line to run the training
#train_model()

In [8]:
import os
MODEL_PATH = "./gender-bias-detector"
MODEL_NAME = "Qwen/Qwen1.5-1.8B"
TOKEN = "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"
# %%
def load_model():
    """Loading a trained model from disk"""

    if not os.path.exists(MODEL_PATH):
        raise FileNotFoundError(f"Model catalog {MODEL_PATH} does not exist. Please run the training part first.")

    print("Load base model...")
    bnb_config = get_bnb_config()

    base_model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
        device_map={"": 0},  
        trust_remote_code=True,
        quantization_config=bnb_config,
        token=TOKEN
    )


    print("load adapter...")
    model = PeftModel.from_pretrained(base_model, MODEL_PATH,is_trainable=False).eval()
 

    

    tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
    tokenizer.pad_token = tokenizer.eos_token 
    print("Number of trainable parameters:")
    model.print_trainable_parameters()
    print("Current Peft configuration：", model.peft_config)
    print("activated LoRA:", model.active_adapters)
    return model, tokenizer

# %%
# load model
try:
    model, tokenizer = load_model()
    print("Model loaded successfully!")
except Exception as e:
    print(f"Failed to load model: {e}")
    print("Please run the training part first or make sure the model is saved correctly")

Load base model...
load adapter...
Number of trainable parameters:
trainable params: 0 || all params: 1,843,120,128 || trainable%: 0.0000
Current Peft configuration： {'default': LoraConfig(task_type='CAUSAL_LM', peft_type=<PeftType.LORA: 'LORA'>, auto_mapping=None, base_model_name_or_path='Qwen/Qwen1.5-1.8B', revision=None, inference_mode=True, r=16, target_modules={'q_proj', 'o_proj', 'v_proj', 'k_proj'}, exclude_modules=None, lora_alpha=32, lora_dropout=0.05, fan_in_fan_out=False, bias='none', use_rslora=False, modules_to_save=None, init_lora_weights=True, layers_to_transform=None, layers_pattern=None, rank_pattern={}, alpha_pattern={}, megatron_config=None, megatron_core='megatron.core', trainable_token_indices=None, loftq_config={}, eva_config=None, corda_config=None, use_dora=False, use_qalora=False, qalora_group_size=16, layer_replication=None, runtime_config=LoraRuntimeConfig(ephemeral_gpu_offload=False), lora_bias=False)}
activated LoRA: ['default']
Model loaded successfully!


In [9]:
#Manually prepare eval_dataset
from datasets import Dataset
import pandas as pd

df = pd.read_csv("dataset.csv")


#df = df.groupby("label").apply(lambda x: x.sample(n=100, random_state=42)).reset_index(drop=True)



df["label"] = df["label"].str.lower().str.replace("-", "_").str.strip()
df["label"] = df["label"].replace({
    "implicit_sexist": "implicit",
    "explicit_sexist": "explicit",
    "non_sexist": "non",
    "non-sexist": "non",
    "nonsexist": "non",
    "sexist_implicit": "implicit",
    "sexist_explicit": "explicit"
})

# Apply enhancement logic
df["text"] = df.apply(lambda row: augment_text(row["text"], row["label"]), axis=1)

# split dataset
from sklearn.model_selection import train_test_split
train_df, eval_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df["label"])
eval_dataset = Dataset.from_pandas(eval_df)


In [None]:
import re
from sklearn.metrics import classification_report

VALID = {"explicit", "implicit", "non"}

def extract_answer(text: str) -> str:
    """Grab the last Answer: tag from the generated text, and use it as a backup."""
    m = re.findall(r"answer:\s*(explicit|implicit|non)", text, re.I)
    return m[-1].lower() if m else "non"

def evaluate_model_on_test_set(model, tokenizer, eval_dataset):
    model.eval()
    preds, truths = [], []

    for ex in eval_dataset:
        prompt = build_fewshot_prompt(ex["text"])          #  same to train parse
        ids = tokenizer(
            prompt,
            return_tensors="pt",
            truncation=True,
            max_length=900,
            padding=False         
        ).to(model.device)

        with torch.no_grad():
            out = model.generate(
                **ids,
                max_new_tokens=16,          
                do_sample=False,           
                eos_token_id=tokenizer.eos_token_id,
                pad_token_id=tokenizer.eos_token_id
                
            )

   
        decoded = tokenizer.decode(out[0], skip_special_tokens=True)
        pred = extract_answer(decoded)



        preds.append(pred)
        truths.append(ex["label"].lower())

    print("\n classification_report（Explicit / Implicit / Non）")
    print(classification_report(
        truths, preds,
        labels=["explicit", "implicit", "non"],
        digits=4,
        zero_division=0
    ))
evaluate_model_on_test_set(model, tokenizer, eval_dataset)


 分类报告（Explicit / Implicit / Non）
              precision    recall  f1-score   support

    explicit     0.8904    0.9112    0.9007       214
    implicit     0.9126    0.8350    0.8721       200
         non     0.8779    0.9303    0.9034       201

    accuracy                         0.8927       615
   macro avg     0.8936    0.8922    0.8920       615
weighted avg     0.8935    0.8927    0.8923       615



In [10]:
# %%

def classify_gender_bias(text):
    """Using the model for gender-biased classification)"""
    prompt = build_fewshot_prompt(text)

    inputs = tokenizer(prompt, return_tensors="pt", truncation=True, max_length=900).to(model.device)

    with torch.no_grad():
        outputs = model.generate(
            **inputs,
            max_new_tokens=16,
            num_beams=1,
            early_stopping=False,
            pad_token_id=tokenizer.eos_token_id,
            temperature=0.0,
            do_sample=False,
            eos_token_id=tokenizer.eos_token_id
        )

    decoded = tokenizer.decode(outputs[0], skip_special_tokens=True).strip()

    # Extract lines starting with "Answer:"
    answer_lines = [line for line in decoded.splitlines() if line.lower().startswith("answer:")]
    if answer_lines:
        content = answer_lines[-1][len("answer:"):].strip().lower()

        # Match in order to avoid early false hits of non
        if re.fullmatch(r"explicit", content, re.IGNORECASE):
            pred = "explicit"
        elif re.fullmatch(r"implicit", content, re.IGNORECASE):
            pred = "implicit"
        elif re.fullmatch(r"non", content, re.IGNORECASE):
            pred = "non"
        else:
            print(" Unmatched tag content：", content)
            pred = "unknown"
    else:
        print("Unable to find the Answer line, original output：", decoded)
        pred = "unknown"

    return pred.capitalize() if pred != "unknown" else "Unknown"



In [11]:
# %%
# Test the classification function
if 'model' in locals() and 'tokenizer' in locals():
    test_texts = [
        "She only got the job because she's pretty",
        "Women are really bad drivers.",
        "He earned the promotion through hard work",
        "Men are better leaders than women",
        "Act like a lady , think like a man ? .."
    ]

    for text in test_texts:
        result = classify_gender_bias(text)
        print(f"text: '{text}'")
        print(f"result: {result}\n")
        
else:
    print("Model not loaded, cannot be tested")



The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.
The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


text: 'She only got the job because she's pretty'
result: Implicit



The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


text: 'Women are really bad drivers.'
result: Explicit



The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


text: 'He earned the promotion through hard work'
result: Non



The following generation flags are not valid and may be ignored: ['temperature']. Set `TRANSFORMERS_VERBOSITY=info` for more details.


text: 'Men are better leaders than women'
result: Explicit

text: 'Act like a lady , think like a man ? ..'
result: Implicit



In [12]:

def create_gradio_interface():
    """Creating the Gradio Interactive Interface"""
    # Define a list of sample text
    example_texts = [
        "She only got the job because she's pretty",
        "Women are really bad drivers.",
        "Real women don't go along w/ crap! It's a fake cause manufactured by corporations.",
        "Men are better leaders than women",
        "The nurse took care of the patient while the doctor performed surgery",
        "Act like a lady , think like a man ? .."
    ]
    
    # Create interface
    with gr.Blocks(title="Gender Bias Detector", theme=gr.themes.Soft()) as demo:
        gr.Markdown("#  Gender Bias Detection")
        gr.Markdown("Identify implicit or explicit gender bias in text")
        
        with gr.Row():
            input_text = gr.Textbox(
                label="Enter text to analyze",
                placeholder="Type a sentence here...",
                lines=3
            )
            
        with gr.Row():
            submit_btn = gr.Button("Analyze", variant="primary")
            
        with gr.Row():
            output_label = gr.Label(label="Classification Result")
            
        with gr.Row():
            gr.Examples(
                examples=example_texts, 
                inputs=[input_text],  
                label="Example Sentences"
            )
        
        submit_btn.click(
            fn=classify_gender_bias,
            inputs=input_text,
            outputs=output_label
        )
    
    return demo


In [13]:
# %%
# Create and display the interface
if 'model' in locals() and 'tokenizer' in locals():
    demo = create_gradio_interface()
    demo.launch(share=False, inline=True) 
else:
    print("The model is not loaded and the interface cannot be created.")

* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.
