In [1]:
pip install transformers




In [2]:
pip install pandas torch



In [3]:
pip install datasets



In [4]:
pip install transformers[torch]



In [5]:
from transformers import pipeline, Trainer, TrainingArguments, AutoModelForSequenceClassification, AutoTokenizer
import nltk
import pandas as pd
import torch
from datasets import Dataset, DatasetDict
from sklearn.model_selection import train_test_split
nltk.download('punkt_tab')
from nltk.tokenize import sent_tokenize
import os


[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


In [6]:
# Load zero-shot classification pipeline
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli", device=-1)

tokenizer = AutoTokenizer.from_pretrained("facebook/bart-large-mnli")

# Define bias explanations
bias_explanations = {
    "political bias": "This statement reflects a viewpoint favoring one political ideology or party.",
    "gender bias": "This statement generalizes or discriminates based on gender.",
    "racial bias": "This statement shows prejudice or discrimination based on race.",
    "religious bias": "This statement shows preference or prejudice based on religious beliefs.",
    "economic bias": "This statement reflects an unfair opinion about economic status or policies.",
    "cultural bias": "This statement makes assumptions about a particular culture or societal norms."
}

def detect_bias(paragraph):
    sentences = sent_tokenize(paragraph)
    results = []

    for sentence in sentences:
        result = classifier(sentence, list(bias_explanations.keys()), multi_label=True)
        detected_biases = [
            (label, score) for label, score in zip(result['labels'], result['scores']) if score > 0.5
        ]

        if detected_biases:
            results.append({
                "sentence": sentence,
                "biases": [(bias, score, bias_explanations[bias]) for bias, score in detected_biases]
            })

    return results

# Sample paragraph for testing
paragraph = "The leader claimed that immigrants are the primary reason for economic downfall. Women are less capable of handling technical roles. Religious beliefs should dictate government policies."

# Detect and print biases
bias_results = detect_bias(paragraph)

for item in bias_results:
    print(f"Sentence: {item['sentence']}")
    for bias, score, explanation in item['biases']:
        print(f"  - Bias Detected: {bias} (Confidence: {score:.2f})")
        print(f"    Explanation: {explanation}")

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.
Device set to use cpu


Sentence: The leader claimed that immigrants are the primary reason for economic downfall.
  - Bias Detected: economic bias (Confidence: 0.96)
    Explanation: This statement reflects an unfair opinion about economic status or policies.
  - Bias Detected: cultural bias (Confidence: 0.83)
    Explanation: This statement makes assumptions about a particular culture or societal norms.
  - Bias Detected: political bias (Confidence: 0.74)
    Explanation: This statement reflects a viewpoint favoring one political ideology or party.
  - Bias Detected: racial bias (Confidence: 0.56)
    Explanation: This statement shows prejudice or discrimination based on race.
Sentence: Women are less capable of handling technical roles.
  - Bias Detected: gender bias (Confidence: 0.99)
    Explanation: This statement generalizes or discriminates based on gender.
  - Bias Detected: cultural bias (Confidence: 0.77)
    Explanation: This statement makes assumptions about a particular culture or societal norms

In [11]:
import pandas as pd
import os

# Folder containing the CSV files
input_folder = "/content/dataset_fewshot_original"  # Update this path
output_folder = "/content/dataset_fewshot_modified"  # Update this path

# Ensure output folder exists
os.makedirs(input_folder, exist_ok=True)  # Create input folder if not exists
os.makedirs(output_folder, exist_ok=True)

# Column rename mapping
rename_mapping = {
    "comment_text": "Sentence",
    "category": "Bias Detected",
    "rationale": "Explanation"
}

# Columns to remove
columns_to_remove = ["identity_attack_x", "bias", "target"]  # Update with actual column names

# Process all CSV files in the folder
for file_name in os.listdir(input_folder):
    if file_name.endswith(".csv"):  # Process only CSV files
        file_path = os.path.join(input_folder, file_name)

        # Load the CSV file
        df = pd.read_csv(file_path)

        # Rename columns
        df = df.rename(columns=rename_mapping)

        # Remove specified columns
        df = df.drop(columns=[col for col in columns_to_remove if col in df.columns], errors='ignore')

        # Save the modified file
        output_file = os.path.join(output_folder, f"modified_{file_name}")
        df.to_csv(output_file, index=False)

        print(f"Processed: {file_name} â†’ Saved as: modified_{file_name}")

print("All files processed successfully!")


Processed: toxicbias_train_updated.csv â†’ Saved as: modified_toxicbias_train_updated.csv
Processed: toxicbias_test_updated.csv â†’ Saved as: modified_toxicbias_test_updated.csv
All files processed successfully!


In [12]:
import pandas as pd

# Load datasets
train_dataset_path = "/content/dataset_fewshot_modified/modified_toxicbias_train_updated.csv"
test_dataset_path = "/content/dataset_fewshot_modified/modified_toxicbias_train_updated.csv"

train_df = pd.read_csv(train_dataset_path)
test_df = pd.read_csv(test_dataset_path)

# Display the first few rows
train_df.head()



Unnamed: 0,Sentence,Bias Detected,Explanation
0,We are still having their water put at risk in...,"race,gender",prejudice against white men
1,This whole tragedy is the face and body and ph...,religion,prejudice against muslims
2,Black on black killings. Stop this madness ma...,race,prejudice against blacks
3,The world would be at peace with out islam.,religion,prejudice against islam
4,Another Muslim commits a terrorist act and dem...,religion,prejudice against muslims


In [13]:
import pandas as pd
from datasets import Dataset, DatasetDict
from transformers import pipeline, Trainer, TrainingArguments, AutoModelForSequenceClassification, AutoTokenizer
from sklearn.model_selection import train_test_split
import nltk
import torch

# Load datasets
train_dataset_path = "/content/dataset_fewshot_modified/modified_toxicbias_train_updated.csv"
test_dataset_path = "/content/dataset_fewshot_modified/modified_toxicbias_test_updated.csv"

train_df = pd.read_csv(train_dataset_path)
test_df = pd.read_csv(test_dataset_path)

# Combine unique labels from both datasets for consistent mapping
unique_labels = sorted(set(train_df["Bias Detected"].unique()).union(set(test_df["Bias Detected"].unique())))

# Create a label mapping dictionary (starting from 1 instead of 0)
label_mapping = {label: i + 1 for i, label in enumerate(unique_labels)}

# Apply mapping to both datasets
train_df["bias_label"] = train_df["Bias Detected"].map(label_mapping)
test_df["bias_label"] = test_df["Bias Detected"].map(label_mapping)

# Save updated datasets (optional)
train_df.to_csv("/content/dataset_fewshot_modified/labeled_toxicbias_train.csv", index=False)
test_df.to_csv("/content/dataset_fewshot_modified/labeled_toxicbias_test.csv", index=False)


# Display the first few rows to verify
train_df.head()



Unnamed: 0,Sentence,Bias Detected,Explanation,bias_label
0,We are still having their water put at risk in...,"race,gender",prejudice against white men,37
1,This whole tragedy is the face and body and ph...,religion,prejudice against muslims,52
2,Black on black killings. Stop this madness ma...,race,prejudice against blacks,33
3,The world would be at peace with out islam.,religion,prejudice against islam,52
4,Another Muslim commits a terrorist act and dem...,religion,prejudice against muslims,52


In [14]:
test_df.head()

Unnamed: 0,Sentence,Bias Detected,Explanation,bias_label
0,As someone pointed out earlier ISIS and radic...,"religion,race",prejudice against whites and muslims,60
1,URL bellamycharlottesvilletwitter Read up on...,none,No rationale provided,21
2,. . . she wants to give other women of color ...,"race,gender",prejudice against white men,37
3,. The whole world hates the white world yet th...,race,prejudice against whites,33
4,... what horrible human beings .... pathetic. ...,religion,prejudice against christians,52


In [15]:
# Define model and tokenizer (You can switch to another model)
MODEL_NAME = "facebook/bart-large-mnli"  # Change to "roberta-large-mnli" or "t5-base" if needed
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME, num_labels=len(unique_labels), ignore_mismatched_sizes=True
)

Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-large-mnli and are newly initialized because the shapes did not match:
- classification_head.out_proj.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([62]) in the model instantiated
- classification_head.out_proj.weight: found shape torch.Size([3, 1024]) in the checkpoint and torch.Size([62, 1024]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [16]:
# Function to preprocess data
def preprocess_function(examples):
    return tokenizer(examples["Sentence"], truncation=True, padding="max_length", max_length=256)

In [17]:
# Convert dataset into Hugging Face format
dataset = DatasetDict({
    "train": Dataset.from_pandas(train_df),
    "test": Dataset.from_pandas(test_df),
})

# Rename the 'encoded_labels' column to 'labels'
dataset = dataset.rename_column("Bias Detected", "labels")

In [18]:
# Apply preprocessing to the dataset
tokenized_dataset = dataset.map(preprocess_function, batched=True)
# Define model
MODEL_NAME = "facebook/bart-large-mnli"
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)


Map:   0%|          | 0/4327 [00:00<?, ? examples/s]

Map:   0%|          | 0/650 [00:00<?, ? examples/s]

In [19]:
# Get the unique labels and ensure they are consistent across train and test sets
unique_labels = sorted(list(set(train_df["Bias Detected"].unique()) | set(test_df["Bias Detected"].unique())))

# Create a label mapping dictionary, starting from 0 for consistency
train_df["bias_label"] = train_df["Bias Detected"].map(label_mapping)
test_df["bias_label"] = test_df["Bias Detected"].map(label_mapping)


# Update the 'bias_label' column based on the label mapping
train_df["bias_label"] = train_df["Bias Detected"].map(label_mapping)
test_df["bias_label"] = test_df["Bias Detected"].map(label_mapping)



# Add the `label2id` and `id2label` parameters to match the label mapping to the model output.
model = AutoModelForSequenceClassification.from_pretrained(
    MODEL_NAME,
    num_labels=len(unique_labels),
    ignore_mismatched_sizes=True,
    label2id=label_mapping,
    id2label={v: k for k, v in label_mapping.items()},
)

Some weights of BartForSequenceClassification were not initialized from the model checkpoint at facebook/bart-large-mnli and are newly initialized because the shapes did not match:
- classification_head.out_proj.bias: found shape torch.Size([3]) in the checkpoint and torch.Size([62]) in the model instantiated
- classification_head.out_proj.weight: found shape torch.Size([3, 1024]) in the checkpoint and torch.Size([62, 1024]) in the model instantiated
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [20]:
# Ensure small dataset for few-shot learning
train_df_fewshot = train_df.sample(n=10, random_state=42)  # Reduce to 10 examples for few-shot
test_df_fewshot = test_df.sample(n=10, random_state=42)  # Reduce to 10 examples for few-shot

# Convert small datasets to Hugging Face format for few-shot
train_dataset_fewshot = Dataset.from_pandas(train_df_fewshot)
test_dataset_fewshot = Dataset.from_pandas(test_df_fewshot)

In [21]:
# Apply preprocessing to the few-shot dataset
tokenized_train_fewshot = train_dataset_fewshot.map(preprocess_function, batched=True)
tokenized_test_fewshot = test_dataset_fewshot.map(preprocess_function, batched=True)

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

In [22]:
# Apply preprocessing to the few-shot dataset
tokenized_train_fewshot = train_dataset_fewshot.map(preprocess_function, batched=True)
tokenized_test_fewshot = test_dataset_fewshot.map(preprocess_function, batched=True)

# Make sure 'labels' are present and correctly formatted
def preprocess_function(examples):
    # Assuming 'bias_label' is the column containing your labels
    # (replace with the correct column name if it's different)
    return tokenizer(
        examples["Sentence"],
        truncation=True,
        padding="max_length",
        max_length=256,
    )
tokenized_train_fewshot = tokenized_train_fewshot.rename_column("bias_label", "labels")
tokenized_test_fewshot = tokenized_test_fewshot.rename_column("bias_label", "labels")

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

Map:   0%|          | 0/10 [00:00<?, ? examples/s]

In [23]:
from transformers import TrainingArguments

training_args = TrainingArguments(
    output_dir="./results",
    evaluation_strategy="epoch",
    save_strategy="epoch",  # Save checkpoints at the end of each epoch
    learning_rate=2e-5,
    per_device_train_batch_size=8,
    per_device_eval_batch_size=8,
    num_train_epochs=3,
    weight_decay=0.01,
    logging_dir="./logs",
    logging_steps=50,
    save_total_limit=2,  # Keep only the last 2 checkpoints
    report_to="tensorboard",
)




In [24]:
# Define the Trainer for fine-tuning
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=tokenized_train_fewshot,
    eval_dataset=tokenized_test_fewshot,
)


In [25]:
# Start fine-tuning for few-shot learning
trainer.train()

Epoch,Training Loss,Validation Loss
1,No log,3.499311
2,No log,3.098791
3,No log,2.98174


TrainOutput(global_step=6, training_loss=2.7375926971435547, metrics={'train_runtime': 127.1487, 'train_samples_per_second': 0.236, 'train_steps_per_second': 0.047, 'total_flos': 16304578467840.0, 'train_loss': 2.7375926971435547, 'epoch': 3.0})

In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline

# Path to your fine-tuned model checkpoint (few-shot)
checkpoint_dir = './results'  # Update with the correct checkpoint path

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(checkpoint_dir)
model = AutoModelForSequenceClassification.from_pretrained(checkpoint_dir)

# Initialize a text classification pipeline with the fine-tuned model
classifier = pipeline("text-classification", model=model, tokenizer=tokenizer, device=-1)

# Function to detect bias in a sentence or paragraph (similar to what was done previously)
def detect_bias_using_finetuned_model(text):
    # Apply prediction to detect the bias label for each input text
    results = classifier(text)

    # Process the results to extract bias labels and their confidences
    detected_biases = []
    for result in results:
        bias_label = result['label']
        confidence = result['score']
        detected_biases.append((bias_label, confidence))

    # Return detected biases with explanations
    bias_explanations = {
        "political bias": "This statement reflects a viewpoint favoring one political ideology or party.",
        "gender bias": "This statement generalizes or discriminates based on gender.",
        "racial bias": "This statement shows prejudice or discrimination based on race.",
        "religious bias": "This statement shows preference or prejudice based on religious beliefs.",
        "economic bias": "This statement reflects an unfair opinion about economic status or policies.",
        "cultural bias": "This statement makes assumptions about a particular culture or societal norms."
    }

    results_with_explanation = [
        {"bias": bias, "confidence": confidence, "explanation": bias_explanations.get(bias, "No explanation available")}
        for bias, confidence in detected_biases
    ]

    return results_with_explanation

# Example of how you can test the few-shot model (this is where you would call the function)
few_shot_paragraph = """
The leader claimed that immigrants are the primary reason for economic downfall. Women are less capable of handling technical roles.
Religious beliefs should dictate government policies.
"""

bias_results = detect_bias_using_finetuned_model(few_shot_paragraph)

# Output the results
for item in bias_results:
    print(f"Bias Detected: {item['bias']}")
    print(f"  Confidence: {item['confidence']:.2f}")
    print(f"  Explanation: {item['explanation']}")


You passed along `num_labels=3` with an incompatible id to label map: {'1': 'gender', '2': 'gender, race', '3': 'gender,lgbtq', '4': 'gender,political', '5': 'gender,race', '6': 'gender,race,religion', '7': 'gender,religion', '8': 'gender,religion,lgbtq', '9': 'lgbtq', '10': 'lgbtq,gender', '11': 'lgbtq,gender,political', '12': 'lgbtq,gender,race', '13': 'lgbtq,gender,religion', '14': 'lgbtq,political', '15': 'lgbtq,political,religion', '16': 'lgbtq,race', '17': 'lgbtq,race,political,gender', '18': 'lgbtq,race,religion', '19': 'lgbtq,religion', '20': 'lgbtq,religion,gender', '21': 'none', '22': 'political', '23': 'political, race', '24': 'political,gender', '25': 'political,gender,lgbtq', '26': 'political,lgbtq', '27': 'political,lgbtq,race', '28': 'political,lgbtq,religion', '29': 'political,race', '30': 'political,race,gender', '31': 'political,race,lgbtq', '32': 'political,religion', '33': 'race', '34': 'race, gender', '35': 'race, lgbtq', '36': 'race, political', '37': 'race,gender

Bias Detected: race
  Confidence: 0.10
  Explanation: No explanation available


In [None]:
from transformers import AutoModelForSequenceClassification, AutoTokenizer, pipeline
import nltk

# Ensure to download the sentence tokenizer if not already available
nltk.download('punkt')

# Path to your fine-tuned model checkpoint (update with the correct checkpoint path)
checkpoint_dir = './results'  # Update this path with your model's checkpoint

# Load the tokenizer and model
tokenizer = AutoTokenizer.from_pretrained(checkpoint_dir)
model = AutoModelForSequenceClassification.from_pretrained(checkpoint_dir)

# Initialize a text classification pipeline with the fine-tuned model
classifier = pipeline("text-classification", model=model, tokenizer=tokenizer, device=-1)

# Define the label mapping manually based on your model's label format (62 labels)
label_mapping = {
    '1': 'gender',
    '2': 'gender, race',
    '3': 'gender, lgbtq',
    '4': 'gender, political',
    '5': 'gender, race',
    '6': 'gender, race, religion',
    '7': 'gender, religion',
    '8': 'gender, religion, lgbtq',
    '9': 'lgbtq',
    '10': 'lgbtq, gender',
    '11': 'lgbtq, gender, political',
    '12': 'lgbtq, gender, race',
    '13': 'lgbtq, gender, religion',
    '14': 'lgbtq, political',
    '15': 'lgbtq, political, religion',
    '16': 'lgbtq, race',
    '17': 'lgbtq, race, political, gender',
    '18': 'lgbtq, race, religion',
    '19': 'lgbtq, religion',
    '20': 'lgbtq, religion, gender',
    '21': 'none',
    '22': 'political',
    '23': 'political, race',
    '24': 'political, gender',
    '25': 'political, gender, lgbtq',
    '26': 'political, lgbtq',
    '27': 'political, lgbtq, race',
    '28': 'political, lgbtq, religion',
    '29': 'political, race',
    '30': 'political, race, gender',
    '31': 'political, race, lgbtq',
    '32': 'political, religion',
    '33': 'race',
    '34': 'race, gender',
    '35': 'race, lgbtq',
    '36': 'race, political',
    '37': 'race, gender',
    '38': 'race, gender, lgbtq',
    '39': 'race, gender, political',
    '40': 'race, gender, religion',
    '41': 'race, lgbtq',
    '42': 'race, lgbtq, gender',
    '43': 'race, lgbtq, religion',
    '44': 'race, political',
    '45': 'race, political, gender',
    '46': 'race, political, lgbtq',
    '47': 'race, religion',
    '48': 'race, religion, gender',
    '49': 'race, religion, gender, political',
    '50': 'race, religion, lgbtq, political',
    '51': 'race, religion, political',
    '52': 'religion',
    '53': 'religion, lgbtq, race',
    '54': 'religion, gender',
    '55': 'religion, gender, lgbtq',
    '56': 'religion, lgbtq',
    '57': 'religion, lgbtq, gender',
    '58': 'religion, political',
    '59': 'religion, political, lgbtq',
    '60': 'religion, race',
    '61': 'religion, race, gender',
    '62': 'religion, race, political'
}

# Define the bias explanation mapping (this needs to be adapted as needed)
bias_explanations = {
    "gender": "This statement generalizes or discriminates based on gender.",
    "race": "This statement shows prejudice or discrimination based on race.",
    "lgbtq": "This statement shows prejudice or discrimination against LGBTQ+ individuals.",
    "political": "This statement reflects a viewpoint favoring one political ideology or party.",
    "religion": "This statement shows preference or prejudice based on religious beliefs.",
    "none": "This statement does not show any significant bias."
}

# Function to detect bias in a sentence or paragraph
def detect_bias_using_finetuned_model(text):
    # Split the paragraph into sentences
    sentences = nltk.sent_tokenize(text)

    # Prepare to store the results
    all_bias_results = []

    # Process each sentence
    for sentence in sentences:
        # Apply prediction to detect the bias label for each sentence
        results = classifier(sentence)

        # Initialize a list to hold biases and explanations for this sentence
        sentence_biases = []

        # Process the results to extract bias labels and their confidences
        for result in results:
            label_id = str(result['label'])  # Get label ID (like '1', '2', etc.)

            if label_id in label_mapping:
                bias_type = label_mapping[label_id]  # Readable label (gender, race, etc.)
                explanation = bias_explanations.get(bias_type, "No explanation available")  # Get explanation
                confidence = result['score']  # Confidence score from the model

                # Add the bias information to the list
                sentence_biases.append({
                    'bias': bias_type,
                    'confidence': confidence,
                    'explanation': explanation
                })

        # Store the results for the sentence
        all_bias_results.append({
            'sentence': sentence,
            'detected_biases': sentence_biases
        })

    return all_bias_results

# Example paragraph for testing
few_shot_paragraph = """
The leader claimed that immigrants are the primary reason for economic downfall. Women are less capable of handling technical roles.
Religious beliefs should dictate government policies.
"""

# Get the bias detection results
bias_results = detect_bias_using_finetuned_model(few_shot_paragraph)

# Print the results
for result in bias_results:
    print(f"Sentence: {result['sentence']}")
    for item in result['detected_biases']:
        print(f"  Bias Detected: {item['bias']}")
        print(f"  Confidence: {item['confidence']:.2f}")
        print(f"  Explanation: {item['explanation']}")
    print()  # Add a newline after each sentence output




[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
You passed along `num_labels=3` with an incompatible id to label map: {'1': 'gender', '2': 'gender, race', '3': 'gender,lgbtq', '4': 'gender,political', '5': 'gender,race', '6': 'gender,race,religion', '7': 'gender,religion', '8': 'gender,religion,lgbtq', '9': 'lgbtq', '10': 'lgbtq,gender', '11': 'lgbtq,gender,political', '12': 'lgbtq,gender,race', '13': 'lgbtq,gender,religion', '14': 'lgbtq,political', '15': 'lgbtq,political,religion', '16': 'lgbtq,race', '17': 'lgbtq,race,political,gender', '18': 'lgbtq,race,religion', '19': 'lgbtq,religion', '20': 'lgbtq,religion,gender', '21': 'none', '22': 'political', '23': 'political, race', '24': 'political,gender', '25': 'political,gender,lgbtq', '26': 'political,lgbtq', '27': 'political,lgbtq,race', '28': 'political,lgbtq,religion', '29': 'political,race', '30': 'political,race,gender', '31': 'political,race,lgbtq', '32': 'political,

Sentence: 
The leader claimed that immigrants are the primary reason for economic downfall.

Sentence: Women are less capable of handling technical roles.

Sentence: Religious beliefs should dictate government policies.



FEW SHOT LEARNING (AMRITHA)

In [1]:
import pandas as pd
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.pipeline import Pipeline
from sklearn.multiclass import OneVsRestClassifier

# Step 1: Load the few-shot dataset
df = pd.read_csv("/content/sample_data/few_shot_multilabel_bias_balanced_all.csv")  # Update path if needed
X = df["comment_text"]
y = df[["gender", "racial", "religious", "political", "economic", "cultural"]]
labels = y.columns

# Step 2: Train the multi-label classifier
model = Pipeline([
    ("tfidf", TfidfVectorizer(max_features=5000, stop_words="english")),
    ("clf", OneVsRestClassifier(LogisticRegression(solver='liblinear')))
])
model.fit(X, y)

# Step 3: Bias explanation dictionary
explanations = {
    "gender": "This statement generalizes or discriminates based on gender.",
    "racial": "This statement shows prejudice or discrimination based on race.",
    "religious": "This statement shows preference or prejudice based on religious beliefs.",
    "political": "This reflects a viewpoint favoring one political ideology or party.",
    "economic": "This reflects an unfair opinion about economic status or policies.",
    "cultural": "This makes assumptions about a particular culture or societal norms."
}

# Step 4: Input paragraph (raw text)
paragraph = "The leader claimed that immigrants are the primary reason for economic downfall. Women are less capable of handling technical roles. Religious beliefs should dictate government policies."

# Step 5: Use SpaCy to split paragraph into sentences
import spacy
nlp = spacy.load("en_core_web_sm")  # Run this once if not already: python -m spacy download en_core_web_sm
doc = nlp(paragraph)
sentences = [sent.text.strip() for sent in doc.sents if sent.text.strip()]

# Step 6: Predict biases per sentence
proba = model.predict_proba(sentences)

# Step 7: Print results
for i, sent in enumerate(sentences):
    print(f"\nðŸ”¹ Sentence: {sent}")
    found_bias = False
    for j, label in enumerate(labels):
        confidence = proba[i][j]
        if confidence > 0.5:
            found_bias = True
            print(f"  - Bias Detected: {label.capitalize()} (Confidence: {confidence:.2f})")
            print(f"    Explanation: {explanations[label]}")
    if not found_bias:
        print("  - No significant bias detected.")





ðŸ”¹ Sentence: The leader claimed that immigrants are the primary reason for economic downfall.
  - Bias Detected: Political (Confidence: 0.69)
    Explanation: This reflects a viewpoint favoring one political ideology or party.
  - Bias Detected: Cultural (Confidence: 1.00)
    Explanation: This makes assumptions about a particular culture or societal norms.

ðŸ”¹ Sentence: Women are less capable of handling technical roles.
  - Bias Detected: Cultural (Confidence: 1.00)
    Explanation: This makes assumptions about a particular culture or societal norms.

ðŸ”¹ Sentence: Religious beliefs should dictate government policies.
  - Bias Detected: Religious (Confidence: 0.59)
    Explanation: This statement shows preference or prejudice based on religious beliefs.
  - Bias Detected: Political (Confidence: 0.68)
    Explanation: This reflects a viewpoint favoring one political ideology or party.
  - Bias Detected: Cultural (Confidence: 1.00)
    Explanation: This makes assumptions about a 