In [82]:
import pandas as pd

# Your keyword list with category, word, and suggestion
df_keywords = pd.read_csv("final_bias_keywords_dataset.csv")
df_keywords.dropna(subset=["word", "category", "suggestion"], inplace=True)

# Bias training data
df_train = pd.read_csv("bias_dataset_with_keywords.csv")
df_train = df_train[["text", "label"]].dropna()


In [83]:
import re

def clean_text(text):
    return re.sub(r'[^a-zA-Z0-9\s]', '', text).lower()

df_train['text'] = df_train['text'].apply(clean_text)


In [85]:
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

X_train, X_test, y_train, y_test = train_test_split(df_train['text'], df_train['label'], test_size=0.2)

vectorizer = TfidfVectorizer()
X_train_vec = vectorizer.fit_transform(X_train)
X_test_vec = vectorizer.transform(X_test)

model = LogisticRegression()
model.fit(X_train_vec, y_train)

print("Accuracy:", accuracy_score(y_test, model.predict(X_test_vec)))


Accuracy: 1.0


In [86]:
from fuzzywuzzy import fuzz

def match_fuzzy_keywords(text, bias_df, hide_categories=[]):
    text_clean = clean_text(text)
    results = []
    seen = set()

    for _, row in bias_df.iterrows():
        category = str(row['category']).lower()
        if category in [c.lower() for c in hide_categories]:
            continue

        word = str(row['word']).lower()
        if fuzz.partial_ratio(word, text_clean) >= 85 and word not in seen:
            seen.add(word)
            results.append({
                "matched_word": word,
                "original_word": row["word"],
                "category": row["category"],
                "suggestion": row["suggestion"]
            })
    return results


In [87]:
def predict_bias_and_keywords(text, hide_categories=[]):
    cleaned = clean_text(text)
    vector = vectorizer.transform([cleaned])
    prediction = model.predict(vector)[0]

    keywords = match_fuzzy_keywords(text, df_keywords, hide_categories)

    print(f"🧠 Bias Prediction: {prediction.upper()}")
    if keywords:
        print("\n⚠️ Bias Keywords Detected:")
        for item in keywords:
            print(f"- {item['original_word']} ({item['category']}) → Suggested: {item['suggestion']}")
    else:
        print("✅ No bias keywords detected.")


In [93]:
description = """i want to hide aggressivly candidate """
predict_bias_and_keywords(description, hide_categories=["gender"])


🧠 Bias Prediction: BIASED

⚠️ Bias Keywords Detected:
- aggressive (Tone) → Suggested: assertive
