In [1]:
import os
import torch
import pandas as pd
import gradio as gr
from datasets import Dataset
from transformers import (
    AutoTokenizer, AutoModelForSequenceClassification,
    TrainingArguments, Trainer
)
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score, precision_recall_fscore_support, classification_report
)

MODEL_NAME = "bert-base-uncased"
MODEL_PATH = "./bert-baseline-detector"

LABEL2ID = {"explicit": 0, "implicit": 1, "non": 2}
ID2LABEL = {v: k for k, v in LABEL2ID.items()}


  from .autonotebook import tqdm as notebook_tqdm


In [2]:




# data progress and cleaning
def load_and_prepare_dataframe(debug_mode=True):
    """
    Read dataset.csv → Clean labels → Downsample 50 per class in debug mode
   
    """
    try:
        df = pd.read_csv("dataset.csv")
        print(" dataset.csv loaded")
    except Exception as e:
        print(f" dataset.csv{e} not found), automatically create example data")
        data = {
            "text": [
                "She only got the job because she's pretty",
                "Women belong in the kitchen",
                "He earned the promotion through hard work"
            ] * 100,
            "label": ["implicit", "explicit", "non"] * 100
        }
        df = pd.DataFrame(data)

   # Unified label format
    df["label"] = (
        df["label"]
        .str.lower()
        .str.replace("-", "_")
        .str.strip()
        .replace({
            "implicit_sexist": "implicit",
            "explicit_sexist": "explicit",
            "non_sexist": "non",
            "nonsexist": "non"
        })
    )

   # Debug mode: ≤50 items per category, for quick experimentation
 #   if debug_mode:
#        print("Debug mode: 50 samples per class")
#        df = (
 #           df.groupby("label", group_keys=False)
 #             .apply(lambda x: x.sample(n=min(50, len(x)), random_state=42))
  #            .reset_index(drop=True)
 #       )

    df["label_id"] = df["label"].map(LABEL2ID)
    print("Label distribution:\n", df["label"].value_counts())
    return df

# train 
def train_model(debug_mode=True):
    """Train BERT baseline and save weights to ./bert-baseline-detector"""
    df = load_and_prepare_dataframe(debug_mode)

    #  split dataset
    train_df, eval_df = train_test_split(
        df, test_size=0.2, stratify=df["label"], random_state=42
    )
    train_ds = Dataset.from_pandas(train_df[["text", "label_id"]])
    eval_ds  = Dataset.from_pandas(eval_df[["text", "label_id"]])

    # tokenizer
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)

    def tokenize_fn(batch):
        tok = tokenizer(
            batch["text"],
            truncation=True,
            padding="max_length",
            max_length=512
        )
        tok["labels"] = batch["label_id"]
        return tok

    train_ds = train_ds.map(tokenize_fn, batched=True)
    eval_ds  = eval_ds.map(tokenize_fn, batched=True)

    # model
    model = AutoModelForSequenceClassification.from_pretrained(
        MODEL_NAME,
        num_labels=3,
        id2label=ID2LABEL,
        label2id=LABEL2ID
    )

    # train paraments
    training_args = TrainingArguments(
        output_dir=MODEL_PATH,
        per_device_train_batch_size=4,
        per_device_eval_batch_size=4,
        gradient_accumulation_steps=2,
        num_train_epochs=3,
        learning_rate=2e-5,
        eval_strategy="epoch",
        save_strategy="epoch",
        save_total_limit=2,
        load_best_model_at_end=True,
        metric_for_best_model="eval_loss",
        greater_is_better=False,
        logging_steps=20,
        report_to="none"
    )

    # eval metrics
    def compute_metrics(eval_pred):
        preds = eval_pred.predictions.argmax(axis=-1)
        labels = eval_pred.label_ids
        acc = accuracy_score(labels, preds)
        p, r, f1, _ = precision_recall_fscore_support(
            labels, preds, average="weighted", zero_division=0
        )
        return {"accuracy": acc, "precision": p, "recall": r, "f1": f1}

    # Trainer
    trainer = Trainer(
        model=model,
        args=training_args,
        train_dataset=train_ds,
        eval_dataset=eval_ds,
        tokenizer=tokenizer,
        compute_metrics=compute_metrics
    )

    print(" Start training BERT baseline...")
    trainer.train()

    # save model and tokenizer
    model.save_pretrained(MODEL_PATH)
    tokenizer.save_pretrained(MODEL_PATH)
    print(f" train finished, model save to {MODEL_PATH}")

    # orint eval report
 #   print("\n eval dataset report：")
    preds = trainer.predict(eval_ds)
    y_true = eval_df["label_id"].tolist()
    y_pred = preds.predictions.argmax(axis=-1).tolist()
 #   print(classification_report(y_true, y_pred, target_names=LABEL2ID.keys()))




In [None]:

train_model()

 已加载 dataset.csv
标签分布:
 label
explicit    1067
non         1004
implicit    1000
Name: count, dtype: int64


Map: 100%|██████████| 2456/2456 [00:00<00:00, 4377.96 examples/s]
Map: 100%|██████████| 615/615 [00:00<00:00, 5711.02 examples/s]
Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.
  trainer = Trainer(


 开始训练 BERT baseline...


  return forward_call(*args, **kwargs)


Epoch,Training Loss,Validation Loss,Accuracy,Precision,Recall,F1
1,0.4926,0.420776,0.842276,0.842063,0.842276,0.841969
2,0.2316,0.436816,0.861789,0.865898,0.861789,0.862195
3,0.2433,0.484554,0.865041,0.867669,0.865041,0.865196


  return forward_call(*args, **kwargs)
  return forward_call(*args, **kwargs)


 训练完成，模型已保存到 ./bert-baseline-detector

 验证集分类报告：


  return forward_call(*args, **kwargs)


              precision    recall  f1-score   support

    explicit       0.83      0.80      0.82       214
    implicit       0.82      0.84      0.83       200
         non       0.87      0.89      0.88       201

    accuracy                           0.84       615
   macro avg       0.84      0.84      0.84       615
weighted avg       0.84      0.84      0.84       615



In [3]:
import os
import torch
from transformers import AutoTokenizer, AutoModelForSequenceClassification

MODEL_NAME = "bert-base-uncased"
MODEL_PATH = "./bert-baseline-detector"

def load_model():
    """Load a trained BERT baseline classification model from disk"""

    if not os.path.exists(MODEL_PATH):
        raise FileNotFoundError(f"Model directory {MODEL_PATH} does not exist. Please run the training first.")

    print(" Loading tokenizer...")
    tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)

    print(" Loading BERT classification model...")
    model = AutoModelForSequenceClassification.from_pretrained(MODEL_PATH)

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device).eval()

    print(f" Model loaded on device: {device}")
    print(f" Model type: {type(model).__name__}")
    return model, tokenizer, device

# Safe loader
try:
    model, tokenizer, device = load_model()
    print(" BERT model loaded successfully!")
except Exception as e:
    print(f" Failed to load BERT model: {e}")



 Loading tokenizer...
 Loading BERT classification model...
 Model loaded on device: cuda
 Model type: BertForSequenceClassification
 BERT model loaded successfully!


In [5]:
from datasets import Dataset
import pandas as pd
from sklearn.model_selection import train_test_split

# load dataset
df = pd.read_csv("dataset.csv")
df["label"] = df["label"].str.lower().str.replace("-", "_").str.strip()
df["label"] = df["label"].replace({
    "implicit_sexist": "implicit",
    "explicit_sexist": "explicit",
    "non_sexist": "non",
    "nonsexist": "non",
    "non-sexist": "non",
    "sexist_implicit": "implicit",
    "sexist_explicit": "explicit"
})

# label map
df["label_id"] = df["label"].map(LABEL2ID)

# spilt dataset
train_df, eval_df = train_test_split(df, test_size=0.2, random_state=42, stratify=df["label"])
eval_dataset = Dataset.from_pandas(eval_df[["text", "label_id"]])


In [8]:
from sklearn.metrics import classification_report

def evaluate_model_on_test_set(model, tokenizer, device, eval_dataset):
    model.eval()
    preds, truths = [], []

    print("Start validation set evaluation...")  
    print("Number of evaluation samples:", len(eval_dataset))  

    for ex in eval_dataset:
        encoded = tokenizer(
            ex["text"],
            return_tensors="pt",
            truncation=True,
            max_length=512,
            padding=True
        ).to(device)

        with torch.no_grad():
            logits = model(**encoded).logits
            pred_id = torch.argmax(logits, dim=-1).item()

        preds.append(pred_id)
        truths.append(ex["label_id"])  

    print("\n BERT classification report:")
    print(classification_report(
        truths, preds,
        target_names=["explicit", "implicit", "non"],
        digits=4,
        zero_division=0
    ))
evaluate_model_on_test_set(model, tokenizer,device, eval_dataset)

Start validation set evaluation...
Number of evaluation samples: 615


  return forward_call(*args, **kwargs)



 BERT classification report:
              precision    recall  f1-score   support

    explicit     0.8341    0.7991    0.8162       214
    implicit     0.8235    0.8400    0.8317       200
         non     0.8689    0.8905    0.8796       201

    accuracy                         0.8423       615
   macro avg     0.8422    0.8432    0.8425       615
weighted avg     0.8421    0.8423    0.8420       615



In [11]:


def classify_gender_bias(text: str) -> str:
    """
Using the BERT model for gender bias classification:
Input raw text and output one of three categories: Explicit, Implicit, or Non.
    """
    if model is None or tokenizer is None:
        return "Model not loaded"

    # encode input
    encoded = tokenizer(
        text,
        truncation=True,
        padding=True,
        max_length=512,
        return_tensors="pt"
    ).to(device)

    # interface
    with torch.no_grad():
        logits = model(**encoded).logits
        pred_id = torch.argmax(logits, dim=-1).item()

    # Mapping predictions to labels
    return ID2LABEL[pred_id].capitalize()







In [12]:
if 'model' in locals() and 'tokenizer' in locals():
    test_texts = [
        "She only got the job because she's pretty",
        "Women are really bad drivers.",
        "He earned the promotion through hard work",
        "Men are better leaders than women",
        "Act like a lady , think like a man ? .."
    ]

    for text in test_texts:
        result = classify_gender_bias(text)
        print(f"text: '{text}'")
        print(f"result: {result}\n")
        
else:
    print("Model not loaded, cannot be tested")

text: 'She only got the job because she's pretty'
result: Implicit

text: 'Women are really bad drivers.'
result: Explicit

text: 'He earned the promotion through hard work'
result: Non

text: 'Men are better leaders than women'
result: Non

text: 'Act like a lady , think like a man ? ..'
result: Implicit



In [13]:
#Gradio UI
def create_gradio_interface():
    example_texts = [
        "She only got the job because she's pretty",
        "Women are really bad drivers.",
        "Real women don't go along w/ crap! It's a fake cause manufactured by corporations.",
        "Men are better leaders than women",
        "The nurse took care of the patient while the doctor performed surgery",
        "Act like a lady , think like a man ? .."
    ]

    with gr.Blocks(title="BERT Gender Bias Detector",
                   theme=gr.themes.Soft()) as demo:
        gr.Markdown("#  BERT Gender Bias Classifier")
        gr.Markdown("Classify text as **Explicit / Implicit / Non** bias")

        with gr.Row():
            input_box = gr.Textbox(
                label="Enter a sentence",
                placeholder="Type a sentence here...",
                lines=3
            )

        with gr.Row():
            submit_btn = gr.Button("Classify", variant="primary")

        with gr.Row():
            output_label = gr.Label(label="Prediction")

        with gr.Row():
            gr.Examples(
                examples=example_texts,
                inputs=[input_box],
                label="Example Sentences"
            )

        submit_btn.click(
            fn=classify_gender_bias,
            inputs=input_box,
            outputs=output_label
        )
    return demo





In [14]:

if __name__ == "__main__":
    if model is None:  
        train_model(debug_mode=True)
        model, tokenizer, device = load_model()

    # create & launch UI
    demo = create_gradio_interface()
    demo.launch(share=False, inline=False)  

* Running on local URL:  http://127.0.0.1:7861
* To create a public link, set `share=True` in `launch()`.
