### Finds gender biased word stems

In [22]:
import re
from nltk.stem import PorterStemmer

stemmer = PorterStemmer()

def stem_words(words):
    return [stemmer.stem(word.lower()) for word in words]

def detect_biased_words(text, biased_words):
    # Tokenize and stem the job description
    text_tokens = re.findall(r'\b\w+\b', text.lower())
    text_stems = stem_words(text_tokens)

    # Stem the biased word list
    biased_stems = stem_words(biased_words)

    # Match stemmed biased words
    found_indices = [i for i, stem in enumerate(text_stems) if stem in biased_stems]
    found_words = [text_tokens[i] for i in found_indices]

    return sorted(set(found_words))  # return unique matched words (original form)


In [23]:
biased = ["competitive", "dominant", "nurturing", "supportive"]

text = """
We are seeking a competitive and dominating individual who thrives in a fast-paced environment. 
The person should be nurture supporting but able to assert dominance when needed.
"""

print(detect_biased_words(text, biased))
# Output: ['competitive', 'dominating', 'dominance', 'nurturing']



['competitive', 'dominance', 'dominating', 'nurture', 'supporting']


### Finds gender biased word stems and gives synonyms

In [19]:
# ONLY EXAMPLES THIS SHOULD BE SWITCHED OUT WITH THE REAL WORDS WE FIND

biased_synonyms = {
    "dominant": ["confident", "strong leadership"],
    "competitive": ["ambitious", "goal-oriented"],
    "nurturing": ["supportive", "team-oriented"],
    "aggressive": ["proactive", "assertive"]
    # etc...
}

In [24]:
import re
from nltk.stem import PorterStemmer

stemmer = PorterStemmer()

def stem_words(words):
    return [stemmer.stem(word.lower()) for word in words]

def detect_biased_words_synonyms(text, biased_dict):
    # Tokenize and stem job description
    text_tokens = re.findall(r'\b\w+\b', text.lower())
    text_stems = stem_words(text_tokens)

    # Prepare stemmed version of the biased dictionary
    stemmed_biased_dict = {stemmer.stem(k.lower()): (k, v) for k, v in biased_dict.items()}

    results = []
    for idx, stem in enumerate(text_stems):
        if stem in stemmed_biased_dict:
            original_word = text_tokens[idx]
            biased_word, alternatives = stemmed_biased_dict[stem]
            results.append({
                "matched_word": original_word,
                "biased_word": biased_word,
                "suggestions": alternatives
            })

    return results

In [25]:
text = """
We are looking for a dominant and aggressive leader who is highly competitive and nurturing
to younger staff.
"""

output = detect_biased_words_synonyms(text, biased_synonyms)

for item in output:
    print(f"Found: '{item['matched_word']}' → Suggest: {item['suggestions']}")

Found: 'dominant' → Suggest: ['confident', 'strong leadership']
Found: 'aggressive' → Suggest: ['proactive', 'assertive']
Found: 'competitive' → Suggest: ['ambitious', 'goal-oriented']
Found: 'nurturing' → Suggest: ['supportive', 'team-oriented']
