In [2]:
# pip install nltk
# pip install transformers
# pip install lime

import sys
import os

import pandas as pd
import numpy as np

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.svm import LinearSVC
from sklearn.calibration import CalibratedClassifierCV
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier

from lime import lime_text
from lime.lime_text import LimeTextExplainer

import pickle, re, spacy
import xgboost as xgb
import tensorflow as tf
from transformers import BertTokenizer, TFBertForSequenceClassification
import spacy
from flask import Flask, request, jsonify
from lime import lime_text
from lime.lime_text import LimeTextExplainer
from spacy.lang.en.stop_words import STOP_WORDS
from nltk.stem import PorterStemmer
from sklearn.metrics.pairwise import cosine_similarity
from gensim.models import KeyedVectors

In [4]:
# preprocessing functions
# # Preprocessing functions
# add space before punctuations

app = Flask(__name__)
explainer = LimeTextExplainer(class_names=['female', 'male'])
stemmer = PorterStemmer()
def add_space_before(text):
    # regular expression to add space before punctuations
    processed_text = re.sub(r'([^\s\w])', r' \1', text)
    return processed_text

# remove gendered pronounds, names, stop words, and apply stemming
def removeUnnecessaryWords(text):
    nlp = spacy.load("en_core_web_sm")
    doc = nlp(text)

    result = " ".join([
        "" if (
            token.pos_ == "PRON" and token.lemma_ not in ["I", "you"]
        ) or (
            token.ent_type_ == "PERSON" or token.text.lower() in ["woman", "women", "man", "men", "he", "she", "him", "her"]
        ) or (
            token.text.lower() in STOP_WORDS
        ) else stemmer.stem(token.lemma_) for token in doc])

    return result.strip()

SVM using BUG dataset

In [21]:
# load data
dataset = pd.read_csv('../datasets/BUG/gold_BUG.csv')
train, test = train_test_split(dataset, test_size=0.2, random_state=42)
# get samples with only neutral or stereotype sentence
train = train[train['stereotype'].isin([0, 1])]
test = test[test['stereotype'].isin([0, 1])]

# preprocess data
print("PREPROCESSING DATA")
train['sentence_text'] = train['sentence_text'].apply(add_space_before).apply(removeUnnecessaryWords)
test['sentence_text'] = test['sentence_text'].apply(add_space_before).apply(removeUnnecessaryWords)
print("TRAIN", train['sentence_text'])
print("TEST", test['sentence_text'])

vectorizer = CountVectorizer()
trainTexts = vectorizer.fit_transform(train['sentence_text'])
testTexts = vectorizer.transform(test['sentence_text'])

xTrain = pd.DataFrame(trainTexts.toarray(), columns=vectorizer.get_feature_names_out())
xTest = pd.DataFrame(testTexts.toarray(), columns=vectorizer.get_feature_names_out())
yTrain = train['predicted gender']
yTest = test['predicted gender']

# train model
model = LinearSVC()
model.fit(xTrain, yTrain)

model = CalibratedClassifierCV(model, method='sigmoid', cv='prefit')
model.fit(xTrain, yTrain)

with open('../savedModels/svmModel.pkl', 'wb') as model_file:
    pickle.dump((model, vectorizer), model_file)

PREPROCESSING DATA
TRAIN 798     final  16 juli  chief   polic  klagenfurt anno...
1175    film   , produc  gowri product   ,  music scor...
1392    fist day  treatment   ,  patient  abl    limb ...
1118    captain   galley   " sã £ o    "   ,      , de...
1228    patient   # 28   develop fever  home quarantin...
                              ...                        
1130    follow  6 -week dietari lead -in phase   ,  pa...
1294    2011   ,     intern coach   time   ,   ¶w   , ...
860     patient  success treat   combin  plasmapheresi...
1459    goalscor debut   , barcelona manag   prais mun...
1126    initi  architect  matteo di cittã di castello ...
Name: sentence_text, Length: 1040, dtype: object
TEST 599     child hold  pomegran   hand   ,  symbol  passi...
1348    fire  kgo  novemb 2008   engin leav  microphon...
135     film   ,  young fighter pilot introduc  song  ...
694     irregular menstrual cycl   ,  patient  unclear...
240     person address  commend   pieti   ,   warn 



In [22]:
# accuracy
pred = model.predict(xTest)

accuracy = accuracy_score(yTest, pred)
print("Accuracy:", accuracy)

# confusion matrix
cm = confusion_matrix(yTest, pred)
print("Confusion matrix:\n", cm)

Accuracy: 0.8521400778210116
Confusion matrix:
 [[ 19  29]
 [  9 200]]


In [23]:
text = [
"""
Once upon a time in the vibrant city of Harmonyville , there lived a college student named Mia Rodriguez . Mia was a junior majoring in environmental science at Rivertide University . Her love for nature and a deep sense of responsibility towards the planet fueled her determination to make a positive impact . One sunny afternoon , Mia stumbled upon a notice about the annual Green Innovation Challenge —an event where students could pitch eco -friendly projects to a panel of environmental experts . Inspired and eager to contribute , Mia decided to develop a sustainable urban gardening initiative called  "GreenHaven . " With her hands in the soil and a heart full of passion , Mia transformed an unused corner of the campus into a thriving community garden . She envisioned GreenHaven as a place where students could come together , learn about sustainable agriculture , and cultivate their own fruits and vegetables . Mia believed that this initiative could not only promote environmental consciousness but also foster a sense of community among her peers . As the garden flourished , so did Mia 's connection with her fellow students . The project became a hub of creativity , where ideas for sustainable living blossomed alongside the vibrant array of fruits and vegetables . Mia 's dedication and leadership drew the attention of both students and faculty alike . When the day of the Green Innovation Challenge arrived , Mia nervously but proudly presented GreenHaven to the panel of judges . The vision , dedication , and positive impact of her project resonated deeply , earning her the first prize and a scholarship for further environmental studies . Word of Mia 's success spread , and GreenHaven became a symbol of sustainable living on campus . Mia 's journey didn 't end with the competition ; instead , it marked the beginning of a new chapter . With the scholarship in hand , Mia continued her studies , conducting research on innovative ways to create sustainable urban environments . As Mia graduated from Rivertide University , she left behind a legacy of green initiatives and a campus that had been transformed by the power of community and sustainability . GreenHaven continued to thrive , inspiring future generations of students to think creatively about environmental issues . Mia 's story became a beacon of hope , showing that even a single college student with a passion for change could make a lasting impact on the world . And so , as Mia embarked on her journey beyond college , she carried with her not just a degree but the knowledge that small , meaningful actions could ripple into waves of positive transformation for the planet and its people .
""",
"""
Once upon a time in the vibrant city of Harmonyville ,  lived a college student named   .  was a junior majoring in environmental science at Rivertide University .  love for nature and a deep sense of responsibility towards the planet fueled  determination to make a positive impact . One sunny afternoon ,  stumbled upon a notice about the annual Green Innovation Challenge — an event where students could pitch eco -friendly projects to a panel of environmental experts . Inspired and eager to contribute ,  decided to develop a sustainable urban gardening initiative called   " GreenHaven . " With  hands in the soil and a heart full of passion ,  transformed an unused corner of the campus into a thriving community garden .  envisioned GreenHaven as a place where students could come together , learn about sustainable agriculture , and cultivate  own fruits and vegetables .  believed that this initiative could not only promote environmental consciousness but also foster a sense of community among  peers . As the garden flourished , so did  's connection with  fellow students . The project became a hub of creativity , where ideas for sustainable living blossomed alongside the vibrant array of fruits and vegetables .  's dedication and leadership drew the attention of both students and faculty alike . When the day of the Green Innovation Challenge arrived ,  nervously but proudly presented GreenHaven to the panel of judges . The vision , dedication , and positive impact of  project resonated deeply , earning  the first prize and a scholarship for further environmental studies . Word of  's success spread , and  became a symbol of sustainable living on campus .  's    t end with the competition ; instead ,  marked the beginning of a new chapter . With the scholarship in hand ,  continued  studies , conducting research on innovative ways to create sustainable urban environments . As  graduated from Rivertide University ,  left behind a legacy of green initiatives and a campus  had been transformed by the power of community and sustainability . GreenHaven continued to thrive , inspiring future generations of students to think creatively about environmental issues .  's story became a beacon of hope , showing that even a single college student with a passion for change could make a lasting impact on the world . And so , as  embarked on  journey beyond college ,  carried with  not just a degree but the knowledge  small , meaningful actions could ripple into waves of positive transformation for the planet and  people .
""",
"""
Once upon a time in the bustling city of Arcadia , there lived a college student named Alex Reynolds . Alex was a junior majoring in computer science at the prestigious Arcadia University . He was a diligent student with a passion for coding and a penchant for exploring the world of technology . One day , as Alex was immersed in his studies at the campus library , he stumbled upon an intriguing flyer .
""",
"""
Once upon a time in the bustling city of Arcadia , there lived a college student . This student was a junior majoring in computer science at the prestigious Arcadia University . They were a diligent student with a passion for coding and a penchant for exploring the world of technology . One day , as this student was immersed in their studies at the campus library , they stumbled upon an intriguing flyer .
""",
"""
Once upon a time in the vibrant college town of Crestwood , there lived a spirited young student named Emily . She was a sophomore at Crestwood University , pursuing a degree in environmental science with dreams of making a positive impact on the planet . Emily was known for her boundless enthusiasm , infectious energy , and a love for exploring the world around her . One crisp autumn day , Emily stumbled upon a flyer for an environmental awareness club called  "Green Harmony " on the college bulletin board . Intrigued and passionate about environmental causes , she decided to attend their next meeting . As Emily walked into the meeting room , she was greeted by a diverse group of students who shared her passion for sustainability . The club was planning an ambitious project to transform an unused campus space into a thriving community garden . Emily was immediately captivated by the idea and eagerly joined the efforts . The days turned into weeks as the Green Harmony team worked tirelessly , planning , planting , and nurturing their garden . Emily found herself forming deep connections with her fellow club members as they faced challenges and celebrated victories together . The project not only brought life to the neglected space but also breathed new life into Emily 's college experience . Amidst the busy academic schedule , Emily discovered a sense of purpose beyond textbooks and exams . The garden project became a symbol of unity , showcasing the power of collaboration and the positive impact a group of dedicated individuals could make . As the garden flourished , so did Emily 's personal growth . She learned about sustainable practices , organic gardening , and the importance of community engagement . Her once mundane college routine transformed into a fulfilling journey , where every day brought new lessons and opportunities . Word of the Green Harmony garden spread throughout the campus , attracting attention from both students and faculty . The college recognized the club 's efforts and even awarded them a grant to expand their project . Emily and her friends found themselves at the forefront of a campus -wide movement toward sustainability . Through this journey , Emily discovered not only her passion for environmental science but also her ability to lead and inspire change . The once ordinary college student had become a beacon of hope and inspiration for those around her . As the seasons changed , so did Emily and her friends , leaving behind a legacy of a thriving community garden and a more environmentally conscious campus . The tale of the college student who discovered her purpose in the embrace of nature and community echoed through the years , inspiring future generations of students to dream big and make a difference in the world .
""",
"""
Once upon a time in the vibrant college town of Crestwood ,  lived a spirited young student named  .  was a sophomore at Crestwood University , pursuing a degree in environmental science with dreams of making a positive impact on the planet . Emily was known for  boundless enthusiasm , infectious energy , and a love for exploring the world around  . One crisp autumn day , Emily stumbled upon a flyer for an environmental awareness club called   " Green Harmony " on the college bulletin board . Intrigued and passionate about environmental causes ,  decided to attend  next meeting . As Emily walked into the meeting room ,  was greeted by a diverse group of students  shared  passion for sustainability . The club was planning an ambitious project to transform an unused campus space into a thriving community garden . Emily was immediately captivated by the idea and eagerly joined the efforts . The days turned into weeks as the   team worked tirelessly , planning , planting , and nurturing  garden . Emily found  forming deep connections with  fellow club members as  faced challenges and celebrated victories together . The project not only brought life to the neglected space but also breathed new life into Emily 's college experience . Amidst the busy academic schedule , Emily discovered a sense of purpose beyond textbooks and exams . The garden project became a symbol of unity , showcasing the power of collaboration and the positive impact a group of dedicated individuals could make . As the garden flourished , so did  's personal growth .  learned about sustainable practices , organic gardening , and the importance of community engagement .  once mundane college routine transformed into a fulfilling journey , where every day brought new lessons and opportunities . Word of the Green Harmony garden spread throughout the campus , attracting attention from both students and faculty . The college recognized the club 's efforts and even awarded  a grant to expand  project . Emily and  friends found  at the forefront of a campus -wide movement toward sustainability . Through this journey , Emily discovered not only  passion for environmental science but also  ability to lead and inspire change . The once ordinary college student had become a beacon of hope and inspiration for  around  . As the seasons changed , so did  and  friends , leaving behind a legacy of a thriving community garden and a more environmentally conscious campus . The tale of the college student  discovered  purpose in the embrace of nature and community echoed through the years , inspiring future generations of students to dream big and make a difference in the world .
""",
"""
In the vibrant city of Rivertown , there lived a college student named Jason Harris . Jason was a senior at Rivertown University , majoring in journalism and aspiring to be a storyteller . He had a keen interest in human experiences and a passion for shedding light on untold stories . One day , while perusing the local newspaper , Jason stumbled upon an article about an elderly woman named Mrs . Eleanor Bennett , who had spent decades working as a librarian in the city . Intrigued by the snippet of her life story , Jason felt compelled to dig deeper and share her tale with the world . After some research , Jason learned that Mrs . Bennett had not only been a dedicated librarian but had also been involved in community initiatives , fostering a love for reading among children and organizing events to bring people together . However , her own life story had largely remained in the shadows . Determined to uncover the full narrative , Jason reached out to Mrs . Bennett and asked for an interview . To his surprise , she welcomed the opportunity to share her story , and soon they sat down for a conversation that would unveil a lifetime of experiences . As Jason delved into Mrs . Bennett 's past , he discovered a woman who had overcome personal challenges , witnessed historical events , and touched the lives of many in Rivertown . Her journey was filled with moments of joy , heartbreak , and resilience , making for a compelling narrative that Jason couldn 't wait to share with his readers . With meticulous attention to detail , Jason crafted a feature article that not only highlighted Mrs . Bennett 's contributions to the community but also captured the essence of her character . The story resonated with readers , and the local newspaper received an overwhelming response . Mrs . Bennett became a beloved figure in Rivertown , celebrated for her dedication and the richness of her life story . The success of the article opened doors for Jason , who found himself on the path to becoming a respected journalist . Inspired by Mrs . Bennett 's story , he continued to seek out and share the untold narratives within his community , becoming a storyteller who bridged the gaps between generations . As graduation day approached , Jason reflected on his college journey . His time at Rivertown University had not only provided him with an education but had also instilled in him the power of storytelling to connect people and inspire change . With a heart full of gratitude and a portfolio filled with impactful stories , Jason ventured into the world , ready to make a difference , one narrative at a time .
""",
"""
In the vibrant city of Rivertown ,  lived a college student named   .  was a senior at Rivertown University , majoring in journalism and aspiring to be a storyteller .  had a keen interest in human experiences and a passion for shedding light on untold stories . One day , while perusing the local newspaper ,  stumbled upon an article about an elderly woman named  .   ,  had spent decades working as a librarian in the city . Intrigued by the snippet of  life story ,  felt compelled to dig deeper and share  tale with the world . After some research ,  learned that  .  had not only been a dedicated librarian but had also been involved in community initiatives , fostering a love for reading among children and organizing events to bring people together . However ,  own life story had largely remained in the shadows . Determined to uncover the full narrative ,  reached out to  .  and asked for an interview . To  surprise ,  welcomed the opportunity to share  story , and soon  sat down for a conversation  would unveil a lifetime of experiences . As  delved into  .  's past ,  discovered a woman  had overcome personal challenges , witnessed historical events , and touched the lives of many in Rivertown .  journey was filled with moments of joy , heartbreak , and resilience , making for a compelling narrative   couldn ' t wait to share with  readers . With meticulous attention to detail ,  crafted a feature article that not only highlighted  .  's contributions to the community but also captured the essence of  character . The story resonated with readers , and the local newspaper received an overwhelming response . Mrs .  became a beloved figure in Rivertown , celebrated for  dedication and the richness of  life story . The success of the article opened doors for  ,  found  on the path to becoming a respected journalist . Inspired by  .  's story ,  continued to seek out and share the untold narratives within  community , becoming a storyteller  bridged the gaps between generations . As graduation day approached ,  reflected on  college journey .  time at Rivertown University had not only provided  with an education but had also instilled in  the power of storytelling to connect people and inspire change . With a heart full of gratitude and a portfolio filled with impactful stories ,  ventured into the world , ready to make a difference , one narrative at a time .
""",
"""
In the bustling town of Summitville , there lived a college man named Lucas Turner . Lucas was a mechanical engineering major at Summitville Tech , known for his passion for building things from the ground up . His college life was a dynamic mix of late -night design sessions , hands -on projects , and a camaraderie with his fellow engineering students . One day , Lucas stumbled upon an old , abandoned workshop on the outskirts of town . The sight of forgotten tools and discarded materials sparked a fire in him . Inspired by the potential of the neglected space , he decided to breathe new life into it and create a collaborative hub for engineering students . Lucas rallied a group of like -minded friends , and together they transformed the workshop into the  "Innovation Forge . " It became a haven for students to work on personal projects , exchange ideas , and bring their engineering dreams to life . The space buzzed with the sounds of drills , the clanking of metal , and the hum of 3D printers as students collaborated on everything from robotics to sustainable energy solutions . As the Innovation Forge gained popularity , Lucas and his team decided to organize an annual engineering expo , showcasing the innovative projects that emerged from their workshop . The expo not only attracted students and faculty but also local businesses and industry professionals keen on discovering emerging talent . One year , Lucas and his team developed a prototype for a solar -powered irrigation system designed to help local farmers in Summitville . The project gained attention not only for its engineering prowess but also for its potential to make a real impact in the community . The Innovation Forge became a focal point for collaborative projects that aimed to address real -world challenges . Lucas 's journey through college was not just about earning a degree ; it was about leaving a lasting mark on the engineering community . As graduation approached , the Innovation Forge continued to thrive , and Lucas felt a sense of pride in the legacy he had created . The town of Summitville now had a vibrant hub of innovation , thanks to the hard work and dedication of Lucas Turner . His college experience became a testament to the transformative power of hands -on learning , collaboration , and the drive to make a positive impact on the world through engineering ingenuity .
""",
"""
In the bustling town of Summitville ,  lived a college man named   . Lucas was a mechanical engineering major at Summitville Tech , known for  passion for building things from the ground up .  college life was a dynamic mix of late -night design sessions , hands -on projects , and a camaraderie with  fellow engineering students . One day , Lucas stumbled upon an old , abandoned workshop on the outskirts of town . The sight of forgotten tools and discarded materials sparked a fire in  . Inspired by the potential of the neglected space ,  decided to breathe new life into  and create a collaborative hub for engineering students . Lucas rallied a group of like -minded friends , and together  transformed the workshop into the   " Innovation Forge . "  became a haven for students to work on personal projects , exchange ideas , and bring  engineering dreams to life . The space buzzed with the sounds of drills , the clanking of metal , and the hum of 3D printers as students collaborated on  from robotics to sustainable energy solutions . As the Innovation Forge gained popularity , Lucas and  team decided to organize an annual engineering expo , showcasing the innovative projects  emerged from  workshop . The expo not only attracted students and faculty but also local businesses and industry professionals keen on discovering emerging talent . One year , Lucas and  team developed a prototype for a solar -powered irrigation system designed to help local farmers in Summitville . The project gained attention not only for  engineering prowess but also for  potential to make a real impact in the community . The Innovation Forge became a focal point for collaborative projects  aimed to address real -world challenges . Lucas 's journey through college was not just about earning a degree ;  was about leaving a lasting mark on the engineering community . As graduation approached , the Innovation Forge continued to thrive , and Lucas felt a sense of pride in the legacy  had created . The town of Summitville now had a vibrant hub of innovation , thanks to the hard work and dedication of Lucas Turner .  college experience became a testament to the transformative power of hands -on learning , collaboration , and the drive to make a positive impact on the world through engineering ingenuity .
"""
]
vectorizedText = vectorizer.transform(text)
textsTransformed = pd.DataFrame(vectorizedText.toarray(), columns=vectorizer.get_feature_names_out())
predProb = model.predict_proba(textsTransformed)
pred = model.predict(textsTransformed)

yActual = [0, 0, 1, 1, 0, 0, 1, 1, 1, 1]
explainer = LimeTextExplainer(class_names=['female', 'male'])
for i in range(len(pred)):
    print(i + 1, ":")
    print("Male: ", predProb[i][1])
    print("Female: ", predProb[i][0])
    print("Predicted: ", pred[i])
    if(yActual[i] == 0):
        print("Actual: Female")
    else:
        print("Actual: Male")
    
    vectorized_text = vectorizer.transform([text[i]])
    predict_function = lambda x: model.predict_proba(vectorizer.transform(x))
    explanation = explainer.explain_instance(text[i], predict_function, num_features=20)
    top_words_lime = explanation.as_list()
    print(f"Top words for text response {i + 1}:")
    masculineWords = []
    feminineWords = []
    for word, score in top_words_lime:
        if score > 0:
            masculineWords.append((word, score))
        else:
            feminineWords.append((word, score))
    print("Masculine words: ", masculineWords)
    print("Feminine words: ", feminineWords)
    print("")





1 :
Male:  0.00010151018667889043
Female:  0.9998984898133211
Predicted:  Female
Actual: Female




Top words for text response 1:
Masculine words:  [('in', 0.16784448145898015), ('success', 0.1060485291667354), ('instead', 0.07485774435452584), ('time', 0.055928654847194975), ('develop', 0.037598707982474826), ('new', 0.03743953891859474), ('think', 0.037157703825689774), ('end', 0.035420828884542774), ('prize', 0.03352300466424995), ('dedication', 0.027878248466988137)]
Feminine words:  [('of', -0.49410816788661044), ('student', -0.31601551173255005), ('Mia', -0.24004766540140787), ('impact', -0.20806484155074614), ('love', -0.06232551456346695), ('power', -0.025861047539943435), ('small', -0.023043332950955567), ('day', -0.010089543046171387), ('Word', -0.009301476820896241), ('fruits', -0.004091221268628665)]

2 :
Male:  0.03578053637047803
Female:  0.9642194636295219
Predicted:  Female
Actual: Female




Top words for text response 2:
Masculine words:  [('in', 0.2208948409810855), ('success', 0.13924898460050372), ('time', 0.07786954033185205), ('instead', 0.07625541698581587), ('hope', 0.0553089179603407), ('come', 0.04434693113386587), ('hand', 0.04323522371084792), ('new', 0.042407701955249955), ('passion', 0.04006048738492008), ('learn', 0.03931408838963436), ('array', 0.03564656562083695)]
Feminine words:  [('of', -0.6003722599849535), ('student', -0.4885739244195459), ('impact', -0.2636778567663052), ('love', -0.07124049947039983), ('power', -0.03251009382464544), ('small', -0.02859588501794974), ('day', -0.02308320170671741), ('Word', -0.017991665775622515), ('University', -0.012772341010177057)]

3 :
Male:  0.4639135382306674
Female:  0.5360864617693326
Predicted:  Female
Actual: Male




Top words for text response 3:
Masculine words:  [('in', 0.26455823762922537), ('time', 0.16018876795893874), ('One', 0.028271033158327446), ('passion', 0.02362538525216148), ('junior', 0.01640690357112139), ('intriguing', 0.011420910831789196), ('as', 0.009506169249466368), ('bustling', 0.008589079527807015), ('computer', 0.007052240536983513), ('the', 0.005176410826334747), ('and', 0.005107208273047635)]
Feminine words:  [('student', -0.6968124946133076), ('of', -0.11319577423494766), ('day', -0.08178143954230029), ('an', -0.008383433206168403), ('world', -0.008268935558163118), ('campus', -0.007279211689168079), ('city', -0.005263896096640494), ('flyer', -0.004925962075769424), ('penchant', -0.004700055210008556)]

4 :
Male:  0.00041254865744741486
Female:  0.9995874513425526
Predicted:  Female
Actual: Male




Top words for text response 4:
Masculine words:  [('in', 0.010343632372091044), ('time', 0.006959535145720242), ('One', 0.0016969511295350944), ('passion', 0.0010290134024030497), ('coding', 0.0008287210747019788), ('there', 0.0006918309296950025), ('Arcadia', 0.0005720571922151267), ('stumbled', 0.0005462406224167309), ('library', 0.0005149226711141393), ('immersed', 0.0005057909271908547)]
Feminine words:  [('student', -0.9864636389656746), ('of', -0.004398544708439524), ('day', -0.003924767584974211), ('were', -0.0010770237261862657), ('flyer', -0.0007138767588632547), ('lived', -0.0007009848110524725), ('this', -0.0006644612134427812), ('bustling', -0.0006366509664858856), ('they', -0.0005938788944944701), ('at', -0.0005040579623090862)]

5 :
Male:  1.0293681843137464e-05
Female:  0.9999897063181569
Predicted:  Female
Actual: Female




Top words for text response 5:
Masculine words:  [('in', 0.14299915664607094), ('time', 0.06948792435562545), ('lead', 0.06301390485660277), ('new', 0.06130045325121815), ('team', 0.049829305039304404), ('conscious', 0.038822440667134264), ('expand', 0.034609783985968354), ('passion', 0.032263626621083195), ('movement', 0.02899887608797622)]
Feminine words:  [('student', -0.543998074481364), ('of', -0.23510398459416706), ('impact', -0.11819411297435543), ('life', -0.0802121407239637), ('room', -0.07223968916977144), ('power', -0.04663789443295397), ('love', -0.046133034515264335), ('club', -0.035713757655514315), ('day', -0.035446820822122097), ('board', -0.01874089821881861), ('named', -0.014334493043996756)]

6 :
Male:  1.0293681843137464e-05
Female:  0.9999897063181569
Predicted:  Female
Actual: Female




Top words for text response 6:
Masculine words:  [('in', 0.13256420500037727), ('new', 0.06886661675431491), ('time', 0.056462589214122845), ('team', 0.04818367682644263), ('lead', 0.04772139171525502), ('conscious', 0.04558278026242227), ('expand', 0.03750007084041019), ('passion', 0.029752075107540004), ('journey', 0.028573133618897992), ('young', 0.028336143040121064)]
Feminine words:  [('student', -0.553382699966538), ('of', -0.24701456700800045), ('impact', -0.1172333726827235), ('life', -0.07117813972889203), ('room', -0.06919252779148588), ('love', -0.06019243791599931), ('day', -0.051927705219641636), ('power', -0.041452991845982806), ('club', -0.029562666923855695), ('Word', -0.02398632425915734)]

7 :
Male:  0.9997201576219416
Female:  0.000279842378058448
Predicted:  Male
Actual: Male




Top words for text response 7:
Masculine words:  [('in', 0.42161475404564364), ('time', 0.12185671231861342), ('success', 0.1169770528542782), ('local', 0.06867410393366666), ('interview', 0.066986052943321), ('share', 0.05432474654897562), ('In', 0.043938690968905875), ('bring', 0.03710940800747659), ('keen', 0.035672479966010404), ('heart', 0.03493384052666579)]
Feminine words:  [('of', -0.16051180193215317), ('life', -0.10168914514672354), ('student', -0.1000744944943365), ('light', -0.0769260795918413), ('journalist', -0.06092682689792174), ('love', -0.053646097192507854), ('day', -0.046512184443728274), ('interest', -0.04032957043618411), ('power', -0.0370734534565606), ('shedding', -0.020248676131184123)]

8 :
Male:  0.9997201576219416
Female:  0.000279842378058448
Predicted:  Male
Actual: Male




Top words for text response 8:
Masculine words:  [('in', 0.4127605989671078), ('time', 0.12130716855094838), ('success', 0.10975568495167257), ('local', 0.06209393413465407), ('share', 0.05765952957182412), ('interview', 0.05398820811430485), ('In', 0.05074526945952506), ('keen', 0.04389417662423809), ('heart', 0.03328257689306341), ('bring', 0.03248936854191634), ('character', 0.02560803053380053)]
Feminine words:  [('of', -0.16751611231803626), ('student', -0.12648876976250728), ('life', -0.1092466320833714), ('light', -0.09100802713849462), ('journalist', -0.07003420302760817), ('day', -0.051504016499563335), ('love', -0.04536587555790603), ('interest', -0.0427930545889603), ('power', -0.02498147093269775)]

9 :
Male:  0.9519687149875187
Female:  0.04803128501248133
Predicted:  Male
Actual: Male




Top words for text response 9:
Masculine words:  [('in', 0.3319813333186945), ('team', 0.20530919738035633), ('point', 0.16082034728311498), ('local', 0.1494505145038244), ('keen', 0.10383849402836731), ('work', 0.09526347108209587), ('solar', 0.08809188592834459), ('design', 0.07938666240124265), ('new', 0.06989940405225974), ('town', 0.06884490064713782)]
Feminine words:  [('of', -0.3597467831639391), ('life', -0.1914702212737775), ('impact', -0.18397627453580961), ('up', -0.11389055648131191), ('mark', -0.07223430815030914), ('late', -0.06659482934167288), ('ground', -0.06609396252560543), ('focal', -0.06020482752647419), ('like', -0.05916354951107339), ('power', -0.05127104395100955)]

10 :
Male:  0.9519687149875187
Female:  0.04803128501248133
Predicted:  Male
Actual: Male
Top words for text response 10:
Masculine words:  [('in', 0.33952895451559983), ('team', 0.1974735173607874), ('point', 0.1679523042501242), ('local', 0.13525823035677462), ('work', 0.08890591641093085), ('keen'



Random Forest Using BUG Dataset

In [24]:
# load data
dataset = pd.read_csv('../datasets/BUG/gold_BUG.csv')
train, test = train_test_split(dataset, test_size=0.2, random_state=42)
# get samples with only neutral or stereotype sentence
train = train[train['stereotype'].isin([0, 1])]
test = test[test['stereotype'].isin([0, 1])]

# preprocess data
print("PREPROCESSING DATA")
train['sentence_text'] = train['sentence_text'].apply(add_space_before).apply(removeUnnecessaryWords)
test['sentence_text'] = test['sentence_text'].apply(add_space_before).apply(removeUnnecessaryWords)
print("TRAIN", train['sentence_text'])
print("TEST", test['sentence_text'])

# vectorize data
vectorizer = CountVectorizer()
trainTexts = vectorizer.fit_transform(train['sentence_text'])
testTexts = vectorizer.transform(test['sentence_text'])

xTrain = pd.DataFrame(trainTexts.toarray(), columns=vectorizer.get_feature_names_out())
xTest = pd.DataFrame(testTexts.toarray(), columns=vectorizer.get_feature_names_out())
yTrain = train['predicted gender']
yTest = test['predicted gender']

# train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(xTrain, yTrain)

with open('../savedModels/randomForestModel.pkl', 'wb') as model_file:
    pickle.dump((model, vectorizer), model_file)

PREPROCESSING DATA
TRAIN 798     final  16 juli  chief   polic  klagenfurt anno...
1175    film   , produc  gowri product   ,  music scor...
1392    fist day  treatment   ,  patient  abl    limb ...
1118    captain   galley   " sã £ o    "   ,      , de...
1228    patient   # 28   develop fever  home quarantin...
                              ...                        
1130    follow  6 -week dietari lead -in phase   ,  pa...
1294    2011   ,     intern coach   time   ,   ¶w   , ...
860     patient  success treat   combin  plasmapheresi...
1459    goalscor debut   , barcelona manag   prais mun...
1126    initi  architect  matteo di cittã di castello ...
Name: sentence_text, Length: 1040, dtype: object
TEST 599     child hold  pomegran   hand   ,  symbol  passi...
1348    fire  kgo  novemb 2008   engin leav  microphon...
135     film   ,  young fighter pilot introduc  song  ...
694     irregular menstrual cycl   ,  patient  unclear...
240     person address  commend   pieti   ,   warn 

In [25]:
# accuracy
pred = model.predict(xTest)

accuracy = accuracy_score(yTest, pred)
print("Accuracy:", accuracy)

# confusion matrix
cm = confusion_matrix(yTest, pred)
print("Confusion matrix:\n", cm)

Accuracy: 0.8404669260700389
Confusion matrix:
 [[  9  39]
 [  2 207]]


In [26]:
text = [
"""
Once upon a time in the vibrant city of Harmonyville , there lived a college student named Mia Rodriguez . Mia was a junior majoring in environmental science at Rivertide University . Her love for nature and a deep sense of responsibility towards the planet fueled her determination to make a positive impact . One sunny afternoon , Mia stumbled upon a notice about the annual Green Innovation Challenge —an event where students could pitch eco -friendly projects to a panel of environmental experts . Inspired and eager to contribute , Mia decided to develop a sustainable urban gardening initiative called  "GreenHaven . " With her hands in the soil and a heart full of passion , Mia transformed an unused corner of the campus into a thriving community garden . She envisioned GreenHaven as a place where students could come together , learn about sustainable agriculture , and cultivate their own fruits and vegetables . Mia believed that this initiative could not only promote environmental consciousness but also foster a sense of community among her peers . As the garden flourished , so did Mia 's connection with her fellow students . The project became a hub of creativity , where ideas for sustainable living blossomed alongside the vibrant array of fruits and vegetables . Mia 's dedication and leadership drew the attention of both students and faculty alike . When the day of the Green Innovation Challenge arrived , Mia nervously but proudly presented GreenHaven to the panel of judges . The vision , dedication , and positive impact of her project resonated deeply , earning her the first prize and a scholarship for further environmental studies . Word of Mia 's success spread , and GreenHaven became a symbol of sustainable living on campus . Mia 's journey didn 't end with the competition ; instead , it marked the beginning of a new chapter . With the scholarship in hand , Mia continued her studies , conducting research on innovative ways to create sustainable urban environments . As Mia graduated from Rivertide University , she left behind a legacy of green initiatives and a campus that had been transformed by the power of community and sustainability . GreenHaven continued to thrive , inspiring future generations of students to think creatively about environmental issues . Mia 's story became a beacon of hope , showing that even a single college student with a passion for change could make a lasting impact on the world . And so , as Mia embarked on her journey beyond college , she carried with her not just a degree but the knowledge that small , meaningful actions could ripple into waves of positive transformation for the planet and its people .
""",
"""
Once upon a time in the vibrant city of Harmonyville ,  lived a college student named   .  was a junior majoring in environmental science at Rivertide University .  love for nature and a deep sense of responsibility towards the planet fueled  determination to make a positive impact . One sunny afternoon ,  stumbled upon a notice about the annual Green Innovation Challenge — an event where students could pitch eco -friendly projects to a panel of environmental experts . Inspired and eager to contribute ,  decided to develop a sustainable urban gardening initiative called   " GreenHaven . " With  hands in the soil and a heart full of passion ,  transformed an unused corner of the campus into a thriving community garden .  envisioned GreenHaven as a place where students could come together , learn about sustainable agriculture , and cultivate  own fruits and vegetables .  believed that this initiative could not only promote environmental consciousness but also foster a sense of community among  peers . As the garden flourished , so did  's connection with  fellow students . The project became a hub of creativity , where ideas for sustainable living blossomed alongside the vibrant array of fruits and vegetables .  's dedication and leadership drew the attention of both students and faculty alike . When the day of the Green Innovation Challenge arrived ,  nervously but proudly presented GreenHaven to the panel of judges . The vision , dedication , and positive impact of  project resonated deeply , earning  the first prize and a scholarship for further environmental studies . Word of  's success spread , and  became a symbol of sustainable living on campus .  's    t end with the competition ; instead ,  marked the beginning of a new chapter . With the scholarship in hand ,  continued  studies , conducting research on innovative ways to create sustainable urban environments . As  graduated from Rivertide University ,  left behind a legacy of green initiatives and a campus  had been transformed by the power of community and sustainability . GreenHaven continued to thrive , inspiring future generations of students to think creatively about environmental issues .  's story became a beacon of hope , showing that even a single college student with a passion for change could make a lasting impact on the world . And so , as  embarked on  journey beyond college ,  carried with  not just a degree but the knowledge  small , meaningful actions could ripple into waves of positive transformation for the planet and  people .
""",
"""
Once upon a time in the bustling city of Arcadia , there lived a college student named Alex Reynolds . Alex was a junior majoring in computer science at the prestigious Arcadia University . He was a diligent student with a passion for coding and a penchant for exploring the world of technology . One day , as Alex was immersed in his studies at the campus library , he stumbled upon an intriguing flyer .
""",
"""
Once upon a time in the bustling city of Arcadia , there lived a college student . This student was a junior majoring in computer science at the prestigious Arcadia University . They were a diligent student with a passion for coding and a penchant for exploring the world of technology . One day , as this student was immersed in their studies at the campus library , they stumbled upon an intriguing flyer .
""",
"""
Once upon a time in the vibrant college town of Crestwood , there lived a spirited young student named Emily . She was a sophomore at Crestwood University , pursuing a degree in environmental science with dreams of making a positive impact on the planet . Emily was known for her boundless enthusiasm , infectious energy , and a love for exploring the world around her . One crisp autumn day , Emily stumbled upon a flyer for an environmental awareness club called  "Green Harmony " on the college bulletin board . Intrigued and passionate about environmental causes , she decided to attend their next meeting . As Emily walked into the meeting room , she was greeted by a diverse group of students who shared her passion for sustainability . The club was planning an ambitious project to transform an unused campus space into a thriving community garden . Emily was immediately captivated by the idea and eagerly joined the efforts . The days turned into weeks as the Green Harmony team worked tirelessly , planning , planting , and nurturing their garden . Emily found herself forming deep connections with her fellow club members as they faced challenges and celebrated victories together . The project not only brought life to the neglected space but also breathed new life into Emily 's college experience . Amidst the busy academic schedule , Emily discovered a sense of purpose beyond textbooks and exams . The garden project became a symbol of unity , showcasing the power of collaboration and the positive impact a group of dedicated individuals could make . As the garden flourished , so did Emily 's personal growth . She learned about sustainable practices , organic gardening , and the importance of community engagement . Her once mundane college routine transformed into a fulfilling journey , where every day brought new lessons and opportunities . Word of the Green Harmony garden spread throughout the campus , attracting attention from both students and faculty . The college recognized the club 's efforts and even awarded them a grant to expand their project . Emily and her friends found themselves at the forefront of a campus -wide movement toward sustainability . Through this journey , Emily discovered not only her passion for environmental science but also her ability to lead and inspire change . The once ordinary college student had become a beacon of hope and inspiration for those around her . As the seasons changed , so did Emily and her friends , leaving behind a legacy of a thriving community garden and a more environmentally conscious campus . The tale of the college student who discovered her purpose in the embrace of nature and community echoed through the years , inspiring future generations of students to dream big and make a difference in the world .
""",
"""
Once upon a time in the vibrant college town of Crestwood ,  lived a spirited young student named  .  was a sophomore at Crestwood University , pursuing a degree in environmental science with dreams of making a positive impact on the planet . Emily was known for  boundless enthusiasm , infectious energy , and a love for exploring the world around  . One crisp autumn day , Emily stumbled upon a flyer for an environmental awareness club called   " Green Harmony " on the college bulletin board . Intrigued and passionate about environmental causes ,  decided to attend  next meeting . As Emily walked into the meeting room ,  was greeted by a diverse group of students  shared  passion for sustainability . The club was planning an ambitious project to transform an unused campus space into a thriving community garden . Emily was immediately captivated by the idea and eagerly joined the efforts . The days turned into weeks as the   team worked tirelessly , planning , planting , and nurturing  garden . Emily found  forming deep connections with  fellow club members as  faced challenges and celebrated victories together . The project not only brought life to the neglected space but also breathed new life into Emily 's college experience . Amidst the busy academic schedule , Emily discovered a sense of purpose beyond textbooks and exams . The garden project became a symbol of unity , showcasing the power of collaboration and the positive impact a group of dedicated individuals could make . As the garden flourished , so did  's personal growth .  learned about sustainable practices , organic gardening , and the importance of community engagement .  once mundane college routine transformed into a fulfilling journey , where every day brought new lessons and opportunities . Word of the Green Harmony garden spread throughout the campus , attracting attention from both students and faculty . The college recognized the club 's efforts and even awarded  a grant to expand  project . Emily and  friends found  at the forefront of a campus -wide movement toward sustainability . Through this journey , Emily discovered not only  passion for environmental science but also  ability to lead and inspire change . The once ordinary college student had become a beacon of hope and inspiration for  around  . As the seasons changed , so did  and  friends , leaving behind a legacy of a thriving community garden and a more environmentally conscious campus . The tale of the college student  discovered  purpose in the embrace of nature and community echoed through the years , inspiring future generations of students to dream big and make a difference in the world .
""",
"""
In the vibrant city of Rivertown , there lived a college student named Jason Harris . Jason was a senior at Rivertown University , majoring in journalism and aspiring to be a storyteller . He had a keen interest in human experiences and a passion for shedding light on untold stories . One day , while perusing the local newspaper , Jason stumbled upon an article about an elderly woman named Mrs . Eleanor Bennett , who had spent decades working as a librarian in the city . Intrigued by the snippet of her life story , Jason felt compelled to dig deeper and share her tale with the world . After some research , Jason learned that Mrs . Bennett had not only been a dedicated librarian but had also been involved in community initiatives , fostering a love for reading among children and organizing events to bring people together . However , her own life story had largely remained in the shadows . Determined to uncover the full narrative , Jason reached out to Mrs . Bennett and asked for an interview . To his surprise , she welcomed the opportunity to share her story , and soon they sat down for a conversation that would unveil a lifetime of experiences . As Jason delved into Mrs . Bennett 's past , he discovered a woman who had overcome personal challenges , witnessed historical events , and touched the lives of many in Rivertown . Her journey was filled with moments of joy , heartbreak , and resilience , making for a compelling narrative that Jason couldn 't wait to share with his readers . With meticulous attention to detail , Jason crafted a feature article that not only highlighted Mrs . Bennett 's contributions to the community but also captured the essence of her character . The story resonated with readers , and the local newspaper received an overwhelming response . Mrs . Bennett became a beloved figure in Rivertown , celebrated for her dedication and the richness of her life story . The success of the article opened doors for Jason , who found himself on the path to becoming a respected journalist . Inspired by Mrs . Bennett 's story , he continued to seek out and share the untold narratives within his community , becoming a storyteller who bridged the gaps between generations . As graduation day approached , Jason reflected on his college journey . His time at Rivertown University had not only provided him with an education but had also instilled in him the power of storytelling to connect people and inspire change . With a heart full of gratitude and a portfolio filled with impactful stories , Jason ventured into the world , ready to make a difference , one narrative at a time .
""",
"""
In the vibrant city of Rivertown ,  lived a college student named   .  was a senior at Rivertown University , majoring in journalism and aspiring to be a storyteller .  had a keen interest in human experiences and a passion for shedding light on untold stories . One day , while perusing the local newspaper ,  stumbled upon an article about an elderly woman named  .   ,  had spent decades working as a librarian in the city . Intrigued by the snippet of  life story ,  felt compelled to dig deeper and share  tale with the world . After some research ,  learned that  .  had not only been a dedicated librarian but had also been involved in community initiatives , fostering a love for reading among children and organizing events to bring people together . However ,  own life story had largely remained in the shadows . Determined to uncover the full narrative ,  reached out to  .  and asked for an interview . To  surprise ,  welcomed the opportunity to share  story , and soon  sat down for a conversation  would unveil a lifetime of experiences . As  delved into  .  's past ,  discovered a woman  had overcome personal challenges , witnessed historical events , and touched the lives of many in Rivertown .  journey was filled with moments of joy , heartbreak , and resilience , making for a compelling narrative   couldn ' t wait to share with  readers . With meticulous attention to detail ,  crafted a feature article that not only highlighted  .  's contributions to the community but also captured the essence of  character . The story resonated with readers , and the local newspaper received an overwhelming response . Mrs .  became a beloved figure in Rivertown , celebrated for  dedication and the richness of  life story . The success of the article opened doors for  ,  found  on the path to becoming a respected journalist . Inspired by  .  's story ,  continued to seek out and share the untold narratives within  community , becoming a storyteller  bridged the gaps between generations . As graduation day approached ,  reflected on  college journey .  time at Rivertown University had not only provided  with an education but had also instilled in  the power of storytelling to connect people and inspire change . With a heart full of gratitude and a portfolio filled with impactful stories ,  ventured into the world , ready to make a difference , one narrative at a time .
""",
"""
In the bustling town of Summitville , there lived a college man named Lucas Turner . Lucas was a mechanical engineering major at Summitville Tech , known for his passion for building things from the ground up . His college life was a dynamic mix of late -night design sessions , hands -on projects , and a camaraderie with his fellow engineering students . One day , Lucas stumbled upon an old , abandoned workshop on the outskirts of town . The sight of forgotten tools and discarded materials sparked a fire in him . Inspired by the potential of the neglected space , he decided to breathe new life into it and create a collaborative hub for engineering students . Lucas rallied a group of like -minded friends , and together they transformed the workshop into the  "Innovation Forge . " It became a haven for students to work on personal projects , exchange ideas , and bring their engineering dreams to life . The space buzzed with the sounds of drills , the clanking of metal , and the hum of 3D printers as students collaborated on everything from robotics to sustainable energy solutions . As the Innovation Forge gained popularity , Lucas and his team decided to organize an annual engineering expo , showcasing the innovative projects that emerged from their workshop . The expo not only attracted students and faculty but also local businesses and industry professionals keen on discovering emerging talent . One year , Lucas and his team developed a prototype for a solar -powered irrigation system designed to help local farmers in Summitville . The project gained attention not only for its engineering prowess but also for its potential to make a real impact in the community . The Innovation Forge became a focal point for collaborative projects that aimed to address real -world challenges . Lucas 's journey through college was not just about earning a degree ; it was about leaving a lasting mark on the engineering community . As graduation approached , the Innovation Forge continued to thrive , and Lucas felt a sense of pride in the legacy he had created . The town of Summitville now had a vibrant hub of innovation , thanks to the hard work and dedication of Lucas Turner . His college experience became a testament to the transformative power of hands -on learning , collaboration , and the drive to make a positive impact on the world through engineering ingenuity .
""",
"""
In the bustling town of Summitville ,  lived a college man named   . Lucas was a mechanical engineering major at Summitville Tech , known for  passion for building things from the ground up .  college life was a dynamic mix of late -night design sessions , hands -on projects , and a camaraderie with  fellow engineering students . One day , Lucas stumbled upon an old , abandoned workshop on the outskirts of town . The sight of forgotten tools and discarded materials sparked a fire in  . Inspired by the potential of the neglected space ,  decided to breathe new life into  and create a collaborative hub for engineering students . Lucas rallied a group of like -minded friends , and together  transformed the workshop into the   " Innovation Forge . "  became a haven for students to work on personal projects , exchange ideas , and bring  engineering dreams to life . The space buzzed with the sounds of drills , the clanking of metal , and the hum of 3D printers as students collaborated on  from robotics to sustainable energy solutions . As the Innovation Forge gained popularity , Lucas and  team decided to organize an annual engineering expo , showcasing the innovative projects  emerged from  workshop . The expo not only attracted students and faculty but also local businesses and industry professionals keen on discovering emerging talent . One year , Lucas and  team developed a prototype for a solar -powered irrigation system designed to help local farmers in Summitville . The project gained attention not only for  engineering prowess but also for  potential to make a real impact in the community . The Innovation Forge became a focal point for collaborative projects  aimed to address real -world challenges . Lucas 's journey through college was not just about earning a degree ;  was about leaving a lasting mark on the engineering community . As graduation approached , the Innovation Forge continued to thrive , and Lucas felt a sense of pride in the legacy  had created . The town of Summitville now had a vibrant hub of innovation , thanks to the hard work and dedication of Lucas Turner .  college experience became a testament to the transformative power of hands -on learning , collaboration , and the drive to make a positive impact on the world through engineering ingenuity .
"""
]
vectorizedText = vectorizer.transform(text)
textsTransformed = pd.DataFrame(vectorizedText.toarray(), columns=vectorizer.get_feature_names_out())
predProb = model.predict_proba(textsTransformed)
pred = model.predict(textsTransformed)

yActual = [0, 0, 1, 1, 0, 0, 1, 1, 1, 1]
explainer = LimeTextExplainer(class_names=['female', 'male'])
for i in range(len(pred)):
    print(i + 1, ":")
    print("Male: ", predProb[i][1])
    print("Female: ", predProb[i][0])
    print("Predicted: ", pred[i])
    if(yActual[i] == 0):
        print("Actual: Female")
    else:
        print("Actual: Male")
        
    vectorized_text = vectorizer.transform([text[i]])
    predict_function = lambda x: model.predict_proba(vectorizer.transform(x))
    explanation = explainer.explain_instance(text[i], predict_function, num_features=20)
    top_words_lime = explanation.as_list()
    print(f"Top words for text response {i + 1}:")
    masculineWords = []
    feminineWords = []
    for word, score in top_words_lime:
        if score > 0:
            masculineWords.append((word, score))
        else:
            feminineWords.append((word, score))
    print("Masculine words: ", masculineWords)
    print("Feminine words: ", feminineWords)
    print("")





1 :
Male:  0.52
Female:  0.48
Predicted:  Male
Actual: Female




Top words for text response 1:
Masculine words:  [('time', 0.00927089278924466), ('develop', 0.007428537396026901), ('end', 0.007021843137478775), ('new', 0.006926128327087234), ('research', 0.006694003925181444), ('hand', 0.006204821990946373), ('left', 0.004139101336736421), ('array', 0.002289186586949232)]
Feminine words:  [('student', -0.21376884559377704), ('impact', -0.06951558281722608), ('small', -0.058355220513086954), ('power', -0.03220018244970204), ('Mia', -0.02719209727124276), ('Word', -0.02154162997852734), ('love', -0.01282924299116249), ('place', -0.006305989040283224), ('day', -0.004147303816270619), ('towards', -0.002086317198364583), ('spread', -0.002047637143273982), ('faculty', -0.002046249482503989)]

2 :
Male:  0.54
Female:  0.46
Predicted:  Male
Actual: Female




Top words for text response 2:
Masculine words:  [('time', 0.008709211916261), ('develop', 0.0073515294863326025), ('hand', 0.007171301907474633), ('end', 0.007098561525220502), ('new', 0.004773462498642545), ('left', 0.004679254737176837), ('heart', 0.003283001429299799), ('blossomed', 0.0029351455260106084), ('of', 0.002388547909575502)]
Feminine words:  [('student', -0.21221981413366292), ('impact', -0.07098389382712726), ('small', -0.0656962388742452), ('power', -0.031368434508664444), ('Word', -0.020229868878014843), ('love', -0.012185351987118261), ('place', -0.006949046036416054), ('day', -0.004344097924809001), ('Inspired', -0.0019651890258904634), ('also', -0.0019094703377027272), ('generations', -0.0013649981771274796)]

3 :
Male:  0.69
Female:  0.31
Predicted:  Male
Actual: Male




Top words for text response 3:
Masculine words:  [('time', 0.009980458741336427), ('exploring', 0.0003481429049362341), ('penchant', 0.00021138169888605684), ('upon', 0.0001547907086509868), ('immersed', 0.00010839431308189119), ('stumbled', 0.00010758634716467531), ('at', 0.0001064023437135106), ('his', 7.557237318744593e-05), ('of', 7.02415534484977e-05)]
Feminine words:  [('student', -0.28449943634371755), ('day', -0.018207610641003142), ('world', -0.0017643934788704665), ('diligent', -0.0002862061284626344), ('computer', -0.00022957546826660996), ('junior', -0.00017751713913217903), ('the', -0.00015427248362366756), ('coding', -0.00015326719642085008), ('and', -0.00014606398350918969), ('intriguing', -0.00014604042292088755), ('prestigious', -0.00012566503489686557)]

4 :
Male:  0.67
Female:  0.33
Predicted:  Male
Actual: Male




Top words for text response 4:
Masculine words:  [('time', 0.009954949900091123), ('they', 0.0002424929565764422), ('an', 0.00014957927965622266), ('stumbled', 0.00012789454990082963), ('for', 0.00010773042295127337), ('at', 8.67036995502618e-05), ('This', 8.303136965308226e-05)]
Feminine words:  [('student', -0.3045247676646713), ('day', -0.017822208619284265), ('world', -0.0014972983113197911), ('One', -0.0002673781571674218), ('of', -0.0002443480018049281), ('library', -0.0002332032155704052), ('campus', -0.00019937294021922804), ('passion', -0.0001572316905372197), ('was', -0.00015057004385472624), ('studies', -0.0001492352311188289), ('junior', -0.0001289359602041608), ('in', -0.00011405215702162537), ('city', -0.00010390654196305228)]

5 :
Male:  0.51
Female:  0.49
Predicted:  Male
Actual: Female




Top words for text response 5:
Masculine words:  [('time', 0.010896227947357475), ('had', 0.003543613471378731), ('world', 0.0033478233784690636), ('project', 0.002461200166370664), ('new', 0.0024342104211970135)]
Feminine words:  [('student', -0.17852615855022388), ('impact', -0.056017322328403736), ('day', -0.03951266571226547), ('room', -0.03757754736457982), ('Word', -0.036826657776227505), ('power', -0.0312412267119105), ('life', -0.010896826933247073), ('love', -0.005272151374702828), ('young', -0.003757984261138548), ('board', -0.0024633769811521263), ('expand', -0.002296876700568217), ('turned', -0.0018791087186872967), ('those', -0.001849861523108184), ('every', -0.0016297425068684803), ('learned', -0.0016106197087998185)]

6 :
Male:  0.51
Female:  0.49
Predicted:  Male
Actual: Female




Top words for text response 6:
Masculine words:  [('time', 0.01068692742805908), ('dreams', 0.003936071978282219), ('degree', 0.0028396348277235496), ('nurturing', 0.0027427963323182264), ('world', 0.002197701990122575), ('expand', 0.002121362342281108), ('connections', 0.002086569748129786), ('days', 0.0018673553860885641), ('this', 0.001804434523246125)]
Feminine words:  [('student', -0.17792643447826792), ('impact', -0.05480046236942316), ('room', -0.04255717216720842), ('day', -0.03882315469445327), ('Word', -0.03602825604390915), ('power', -0.031092164426108664), ('life', -0.009989378227169523), ('love', -0.006038399927817068), ('idea', -0.0055078872337332154), ('young', -0.005305932285572062), ('change', -0.0024747377567349463)]

7 :
Male:  0.5
Female:  0.5
Predicted:  Female
Actual: Male




Top words for text response 7:
Masculine words:  [('time', 0.010990950861302477), ('world', 0.004684324976226233), ('bring', 0.002893026211102834), ('spent', 0.001660331363726208), ('heart', 0.0015367250284355081), ('within', 0.0011516548598955669), ('seek', 0.0010572914339068394), ('ventured', 0.0010515743550113922)]
Feminine words:  [('student', -0.2273665622144489), ('day', -0.05965778185869973), ('journalist', -0.0532455437972501), ('interest', -0.04357034264559351), ('power', -0.027787357120919204), ('light', -0.025723060686296317), ('love', -0.015163699191066313), ('life', -0.011618905203521261), ('an', -0.0017877233073167662), ('vibrant', -0.0016727861729982335), ('past', -0.0015546002754864101), ('response', -0.0013062329299272672)]

8 :
Male:  0.5
Female:  0.5
Predicted:  Female
Actual: Male




Top words for text response 8:
Masculine words:  [('time', 0.010224458493069171), ('world', 0.00445155598208983), ('bring', 0.002510797072474786), ('seek', 0.0013294773255724138), ('city', 0.0011814126243902222), ('richness', 0.0011564878155308632), ('overcome', 0.0011141331684466685)]
Feminine words:  [('student', -0.2277096698008102), ('day', -0.0594294164692745), ('journalist', -0.052948113529874524), ('interest', -0.04398502523395989), ('power', -0.026703904322319335), ('light', -0.02650005597881668), ('love', -0.01481609442360043), ('life', -0.01121380702368885), ('past', -0.0013965394217949894), ('about', -0.0011590877957780657), ('also', -0.0010585496010671015), ('change', -0.0009652046978408562), ('contributions', -0.0008590006386957322)]

9 :
Male:  0.63
Female:  0.37
Predicted:  Male
Actual: Male




Top words for text response 9:
Masculine words:  [('old', 0.018051179262996854), ('year', 0.01713585601751771), ('mark', 0.006355681293945065), ('day', 0.0037787611943143717), ('new', 0.002699773676338348), ('like', 0.0025220731038304977), ('design', 0.0021028306339310843)]
Feminine words:  [('impact', -0.08884729917003174), ('up', -0.08335788767611445), ('sight', -0.04004663505214486), ('focal', -0.03237335892103803), ('power', -0.027394049267096737), ('major', -0.023752631653094828), ('ground', -0.023248360545464294), ('real', -0.013603410548758058), ('help', -0.006716002252940963), ('life', -0.0065124032431688565), ('system', -0.005041828792271771), ('late', -0.003277116030129005), ('work', -0.0017966632841367866)]

10 :
Male:  0.63
Female:  0.37
Predicted:  Male
Actual: Male
Top words for text response 10:
Masculine words:  [('old', 0.018650530042573398), ('year', 0.016242545493405286), ('mark', 0.005768809124855754), ('new', 0.003454635578900664), ('day', 0.0033139298896936852), (



XGBoost

In [35]:
# load data
dataset = pd.read_csv('../datasets/BUG/gold_BUG.csv')
dataset.replace("Male", 1, inplace=True)
dataset.replace("Female", 0, inplace=True)
print(dataset.head())
train, test = train_test_split(dataset, test_size=0.2, random_state=42)
# get samples with only neutral or stereotype sentence
train = train[train['stereotype'].isin([0, 1])]
test = test[test['stereotype'].isin([0, 1])]

# preprocess data
print("PREPROCESSING DATA")
train['sentence_text'] = train['sentence_text'].apply(add_space_before).apply(removeUnnecessaryWords)
test['sentence_text'] = test['sentence_text'].apply(add_space_before).apply(removeUnnecessaryWords)
print("TRAIN", train['sentence_text'])
print("TEST", test['sentence_text'])

vectorizer = CountVectorizer()
trainTexts = vectorizer.fit_transform(train['sentence_text'])
testTexts = vectorizer.transform(test['sentence_text'])

xTrain = pd.DataFrame(trainTexts.toarray(), columns=vectorizer.get_feature_names_out())
xTest = pd.DataFrame(testTexts.toarray(), columns=vectorizer.get_feature_names_out())
yTrain = train['predicted gender']
yTest = test['predicted gender']

# train model
model = xgb.XGBClassifier(tree_method="hist", early_stopping_rounds=2)
model.fit(xTrain, yTrain, eval_set=[(xTest, yTest)])

with open('../savedModels/XGBoostModel.pkl', 'wb') as model_file:
    pickle.dump((model, vectorizer), model_file)

   Unnamed: 0  uid                                      sentence_text  \
0           0    0  My friend , who grew up in Africa , explained ...   
1           1    1  â€¢ Lastly , for the patient that did not need...   
2           2    2  Her early years as a resident doctor in the No...   
3           3    3  Another participant stated , " Without network...   
4           4    4  The patient followed up in the nephrology clin...   

                                              tokens profession    g  \
0  ['My', 'friend', ',', 'who', 'grew', 'up', 'in...     friend   he   
1  ['â€¢', 'Lastly', ',', 'for', 'the', 'patient'...    patient  his   
2  ['Her', 'early', 'years', 'as', 'a', 'resident...     doctor  her   
3  ['Another', 'participant', 'stated', ',', '"',...    teacher  she   
4  ['The', 'patient', 'followed', 'up', 'in', 'th...    patient  his   

   profession_first_index  g_first_index predicted gender  stereotype  \
0                       1             16             Ma

ValueError: Invalid classes inferred from unique values of `y`.  Expected: [0 1], got ['Female' 'Male']

In [None]:
# accuracy
pred = model.predict(xTest)

accuracy = accuracy_score(yTest, pred)
print("Accuracy:", accuracy)

# confusion matrix
cm = confusion_matrix(yTest, pred)
print("Confusion matrix:\n", cm)

Accuracy: 0.8249027237354085
Confusion matrix:
 [[ 11  37]
 [  8 201]]


In [38]:
text = [
"""
Once upon a time in the vibrant city of Harmonyville , there lived a college student named Mia Rodriguez . Mia was a junior majoring in environmental science at Rivertide University . Her love for nature and a deep sense of responsibility towards the planet fueled her determination to make a positive impact . One sunny afternoon , Mia stumbled upon a notice about the annual Green Innovation Challenge —an event where students could pitch eco -friendly projects to a panel of environmental experts . Inspired and eager to contribute , Mia decided to develop a sustainable urban gardening initiative called  "GreenHaven . " With her hands in the soil and a heart full of passion , Mia transformed an unused corner of the campus into a thriving community garden . She envisioned GreenHaven as a place where students could come together , learn about sustainable agriculture , and cultivate their own fruits and vegetables . Mia believed that this initiative could not only promote environmental consciousness but also foster a sense of community among her peers . As the garden flourished , so did Mia 's connection with her fellow students . The project became a hub of creativity , where ideas for sustainable living blossomed alongside the vibrant array of fruits and vegetables . Mia 's dedication and leadership drew the attention of both students and faculty alike . When the day of the Green Innovation Challenge arrived , Mia nervously but proudly presented GreenHaven to the panel of judges . The vision , dedication , and positive impact of her project resonated deeply , earning her the first prize and a scholarship for further environmental studies . Word of Mia 's success spread , and GreenHaven became a symbol of sustainable living on campus . Mia 's journey didn 't end with the competition ; instead , it marked the beginning of a new chapter . With the scholarship in hand , Mia continued her studies , conducting research on innovative ways to create sustainable urban environments . As Mia graduated from Rivertide University , she left behind a legacy of green initiatives and a campus that had been transformed by the power of community and sustainability . GreenHaven continued to thrive , inspiring future generations of students to think creatively about environmental issues . Mia 's story became a beacon of hope , showing that even a single college student with a passion for change could make a lasting impact on the world . And so , as Mia embarked on her journey beyond college , she carried with her not just a degree but the knowledge that small , meaningful actions could ripple into waves of positive transformation for the planet and its people .
""",
"""
Once upon a time in the vibrant city of Harmonyville ,  lived a college student named   .  was a junior majoring in environmental science at Rivertide University .  love for nature and a deep sense of responsibility towards the planet fueled  determination to make a positive impact . One sunny afternoon ,  stumbled upon a notice about the annual Green Innovation Challenge — an event where students could pitch eco -friendly projects to a panel of environmental experts . Inspired and eager to contribute ,  decided to develop a sustainable urban gardening initiative called   " GreenHaven . " With  hands in the soil and a heart full of passion ,  transformed an unused corner of the campus into a thriving community garden .  envisioned GreenHaven as a place where students could come together , learn about sustainable agriculture , and cultivate  own fruits and vegetables .  believed that this initiative could not only promote environmental consciousness but also foster a sense of community among  peers . As the garden flourished , so did  's connection with  fellow students . The project became a hub of creativity , where ideas for sustainable living blossomed alongside the vibrant array of fruits and vegetables .  's dedication and leadership drew the attention of both students and faculty alike . When the day of the Green Innovation Challenge arrived ,  nervously but proudly presented GreenHaven to the panel of judges . The vision , dedication , and positive impact of  project resonated deeply , earning  the first prize and a scholarship for further environmental studies . Word of  's success spread , and  became a symbol of sustainable living on campus .  's    t end with the competition ; instead ,  marked the beginning of a new chapter . With the scholarship in hand ,  continued  studies , conducting research on innovative ways to create sustainable urban environments . As  graduated from Rivertide University ,  left behind a legacy of green initiatives and a campus  had been transformed by the power of community and sustainability . GreenHaven continued to thrive , inspiring future generations of students to think creatively about environmental issues .  's story became a beacon of hope , showing that even a single college student with a passion for change could make a lasting impact on the world . And so , as  embarked on  journey beyond college ,  carried with  not just a degree but the knowledge  small , meaningful actions could ripple into waves of positive transformation for the planet and  people .
""",
"""
Once upon a time in the bustling city of Arcadia , there lived a college student named Alex Reynolds . Alex was a junior majoring in computer science at the prestigious Arcadia University . He was a diligent student with a passion for coding and a penchant for exploring the world of technology . One day , as Alex was immersed in his studies at the campus library , he stumbled upon an intriguing flyer .
""",
"""
Once upon a time in the bustling city of Arcadia , there lived a college student . This student was a junior majoring in computer science at the prestigious Arcadia University . They were a diligent student with a passion for coding and a penchant for exploring the world of technology . One day , as this student was immersed in their studies at the campus library , they stumbled upon an intriguing flyer .
""",
"""
Once upon a time in the vibrant college town of Crestwood , there lived a spirited young student named Emily . She was a sophomore at Crestwood University , pursuing a degree in environmental science with dreams of making a positive impact on the planet . Emily was known for her boundless enthusiasm , infectious energy , and a love for exploring the world around her . One crisp autumn day , Emily stumbled upon a flyer for an environmental awareness club called  "Green Harmony " on the college bulletin board . Intrigued and passionate about environmental causes , she decided to attend their next meeting . As Emily walked into the meeting room , she was greeted by a diverse group of students who shared her passion for sustainability . The club was planning an ambitious project to transform an unused campus space into a thriving community garden . Emily was immediately captivated by the idea and eagerly joined the efforts . The days turned into weeks as the Green Harmony team worked tirelessly , planning , planting , and nurturing their garden . Emily found herself forming deep connections with her fellow club members as they faced challenges and celebrated victories together . The project not only brought life to the neglected space but also breathed new life into Emily 's college experience . Amidst the busy academic schedule , Emily discovered a sense of purpose beyond textbooks and exams . The garden project became a symbol of unity , showcasing the power of collaboration and the positive impact a group of dedicated individuals could make . As the garden flourished , so did Emily 's personal growth . She learned about sustainable practices , organic gardening , and the importance of community engagement . Her once mundane college routine transformed into a fulfilling journey , where every day brought new lessons and opportunities . Word of the Green Harmony garden spread throughout the campus , attracting attention from both students and faculty . The college recognized the club 's efforts and even awarded them a grant to expand their project . Emily and her friends found themselves at the forefront of a campus -wide movement toward sustainability . Through this journey , Emily discovered not only her passion for environmental science but also her ability to lead and inspire change . The once ordinary college student had become a beacon of hope and inspiration for those around her . As the seasons changed , so did Emily and her friends , leaving behind a legacy of a thriving community garden and a more environmentally conscious campus . The tale of the college student who discovered her purpose in the embrace of nature and community echoed through the years , inspiring future generations of students to dream big and make a difference in the world .
""",
"""
Once upon a time in the vibrant college town of Crestwood ,  lived a spirited young student named  .  was a sophomore at Crestwood University , pursuing a degree in environmental science with dreams of making a positive impact on the planet . Emily was known for  boundless enthusiasm , infectious energy , and a love for exploring the world around  . One crisp autumn day , Emily stumbled upon a flyer for an environmental awareness club called   " Green Harmony " on the college bulletin board . Intrigued and passionate about environmental causes ,  decided to attend  next meeting . As Emily walked into the meeting room ,  was greeted by a diverse group of students  shared  passion for sustainability . The club was planning an ambitious project to transform an unused campus space into a thriving community garden . Emily was immediately captivated by the idea and eagerly joined the efforts . The days turned into weeks as the   team worked tirelessly , planning , planting , and nurturing  garden . Emily found  forming deep connections with  fellow club members as  faced challenges and celebrated victories together . The project not only brought life to the neglected space but also breathed new life into Emily 's college experience . Amidst the busy academic schedule , Emily discovered a sense of purpose beyond textbooks and exams . The garden project became a symbol of unity , showcasing the power of collaboration and the positive impact a group of dedicated individuals could make . As the garden flourished , so did  's personal growth .  learned about sustainable practices , organic gardening , and the importance of community engagement .  once mundane college routine transformed into a fulfilling journey , where every day brought new lessons and opportunities . Word of the Green Harmony garden spread throughout the campus , attracting attention from both students and faculty . The college recognized the club 's efforts and even awarded  a grant to expand  project . Emily and  friends found  at the forefront of a campus -wide movement toward sustainability . Through this journey , Emily discovered not only  passion for environmental science but also  ability to lead and inspire change . The once ordinary college student had become a beacon of hope and inspiration for  around  . As the seasons changed , so did  and  friends , leaving behind a legacy of a thriving community garden and a more environmentally conscious campus . The tale of the college student  discovered  purpose in the embrace of nature and community echoed through the years , inspiring future generations of students to dream big and make a difference in the world .
""",
"""
In the vibrant city of Rivertown , there lived a college student named Jason Harris . Jason was a senior at Rivertown University , majoring in journalism and aspiring to be a storyteller . He had a keen interest in human experiences and a passion for shedding light on untold stories . One day , while perusing the local newspaper , Jason stumbled upon an article about an elderly woman named Mrs . Eleanor Bennett , who had spent decades working as a librarian in the city . Intrigued by the snippet of her life story , Jason felt compelled to dig deeper and share her tale with the world . After some research , Jason learned that Mrs . Bennett had not only been a dedicated librarian but had also been involved in community initiatives , fostering a love for reading among children and organizing events to bring people together . However , her own life story had largely remained in the shadows . Determined to uncover the full narrative , Jason reached out to Mrs . Bennett and asked for an interview . To his surprise , she welcomed the opportunity to share her story , and soon they sat down for a conversation that would unveil a lifetime of experiences . As Jason delved into Mrs . Bennett 's past , he discovered a woman who had overcome personal challenges , witnessed historical events , and touched the lives of many in Rivertown . Her journey was filled with moments of joy , heartbreak , and resilience , making for a compelling narrative that Jason couldn 't wait to share with his readers . With meticulous attention to detail , Jason crafted a feature article that not only highlighted Mrs . Bennett 's contributions to the community but also captured the essence of her character . The story resonated with readers , and the local newspaper received an overwhelming response . Mrs . Bennett became a beloved figure in Rivertown , celebrated for her dedication and the richness of her life story . The success of the article opened doors for Jason , who found himself on the path to becoming a respected journalist . Inspired by Mrs . Bennett 's story , he continued to seek out and share the untold narratives within his community , becoming a storyteller who bridged the gaps between generations . As graduation day approached , Jason reflected on his college journey . His time at Rivertown University had not only provided him with an education but had also instilled in him the power of storytelling to connect people and inspire change . With a heart full of gratitude and a portfolio filled with impactful stories , Jason ventured into the world , ready to make a difference , one narrative at a time .
""",
"""
In the vibrant city of Rivertown ,  lived a college student named   .  was a senior at Rivertown University , majoring in journalism and aspiring to be a storyteller .  had a keen interest in human experiences and a passion for shedding light on untold stories . One day , while perusing the local newspaper ,  stumbled upon an article about an elderly woman named  .   ,  had spent decades working as a librarian in the city . Intrigued by the snippet of  life story ,  felt compelled to dig deeper and share  tale with the world . After some research ,  learned that  .  had not only been a dedicated librarian but had also been involved in community initiatives , fostering a love for reading among children and organizing events to bring people together . However ,  own life story had largely remained in the shadows . Determined to uncover the full narrative ,  reached out to  .  and asked for an interview . To  surprise ,  welcomed the opportunity to share  story , and soon  sat down for a conversation  would unveil a lifetime of experiences . As  delved into  .  's past ,  discovered a woman  had overcome personal challenges , witnessed historical events , and touched the lives of many in Rivertown .  journey was filled with moments of joy , heartbreak , and resilience , making for a compelling narrative   couldn ' t wait to share with  readers . With meticulous attention to detail ,  crafted a feature article that not only highlighted  .  's contributions to the community but also captured the essence of  character . The story resonated with readers , and the local newspaper received an overwhelming response . Mrs .  became a beloved figure in Rivertown , celebrated for  dedication and the richness of  life story . The success of the article opened doors for  ,  found  on the path to becoming a respected journalist . Inspired by  .  's story ,  continued to seek out and share the untold narratives within  community , becoming a storyteller  bridged the gaps between generations . As graduation day approached ,  reflected on  college journey .  time at Rivertown University had not only provided  with an education but had also instilled in  the power of storytelling to connect people and inspire change . With a heart full of gratitude and a portfolio filled with impactful stories ,  ventured into the world , ready to make a difference , one narrative at a time .
""",
"""
In the bustling town of Summitville , there lived a college man named Lucas Turner . Lucas was a mechanical engineering major at Summitville Tech , known for his passion for building things from the ground up . His college life was a dynamic mix of late -night design sessions , hands -on projects , and a camaraderie with his fellow engineering students . One day , Lucas stumbled upon an old , abandoned workshop on the outskirts of town . The sight of forgotten tools and discarded materials sparked a fire in him . Inspired by the potential of the neglected space , he decided to breathe new life into it and create a collaborative hub for engineering students . Lucas rallied a group of like -minded friends , and together they transformed the workshop into the  "Innovation Forge . " It became a haven for students to work on personal projects , exchange ideas , and bring their engineering dreams to life . The space buzzed with the sounds of drills , the clanking of metal , and the hum of 3D printers as students collaborated on everything from robotics to sustainable energy solutions . As the Innovation Forge gained popularity , Lucas and his team decided to organize an annual engineering expo , showcasing the innovative projects that emerged from their workshop . The expo not only attracted students and faculty but also local businesses and industry professionals keen on discovering emerging talent . One year , Lucas and his team developed a prototype for a solar -powered irrigation system designed to help local farmers in Summitville . The project gained attention not only for its engineering prowess but also for its potential to make a real impact in the community . The Innovation Forge became a focal point for collaborative projects that aimed to address real -world challenges . Lucas 's journey through college was not just about earning a degree ; it was about leaving a lasting mark on the engineering community . As graduation approached , the Innovation Forge continued to thrive , and Lucas felt a sense of pride in the legacy he had created . The town of Summitville now had a vibrant hub of innovation , thanks to the hard work and dedication of Lucas Turner . His college experience became a testament to the transformative power of hands -on learning , collaboration , and the drive to make a positive impact on the world through engineering ingenuity .
""",
"""
In the bustling town of Summitville ,  lived a college man named   . Lucas was a mechanical engineering major at Summitville Tech , known for  passion for building things from the ground up .  college life was a dynamic mix of late -night design sessions , hands -on projects , and a camaraderie with  fellow engineering students . One day , Lucas stumbled upon an old , abandoned workshop on the outskirts of town . The sight of forgotten tools and discarded materials sparked a fire in  . Inspired by the potential of the neglected space ,  decided to breathe new life into  and create a collaborative hub for engineering students . Lucas rallied a group of like -minded friends , and together  transformed the workshop into the   " Innovation Forge . "  became a haven for students to work on personal projects , exchange ideas , and bring  engineering dreams to life . The space buzzed with the sounds of drills , the clanking of metal , and the hum of 3D printers as students collaborated on  from robotics to sustainable energy solutions . As the Innovation Forge gained popularity , Lucas and  team decided to organize an annual engineering expo , showcasing the innovative projects  emerged from  workshop . The expo not only attracted students and faculty but also local businesses and industry professionals keen on discovering emerging talent . One year , Lucas and  team developed a prototype for a solar -powered irrigation system designed to help local farmers in Summitville . The project gained attention not only for  engineering prowess but also for  potential to make a real impact in the community . The Innovation Forge became a focal point for collaborative projects  aimed to address real -world challenges . Lucas 's journey through college was not just about earning a degree ;  was about leaving a lasting mark on the engineering community . As graduation approached , the Innovation Forge continued to thrive , and Lucas felt a sense of pride in the legacy  had created . The town of Summitville now had a vibrant hub of innovation , thanks to the hard work and dedication of Lucas Turner .  college experience became a testament to the transformative power of hands -on learning , collaboration , and the drive to make a positive impact on the world through engineering ingenuity .
"""
]

vectorizedText = vectorizer.transform(text)
textsTransformed = pd.DataFrame(vectorizedText.toarray(), columns=vectorizer.get_feature_names_out())
predProb = model.predict_proba(textsTransformed)
pred = model.predict(textsTransformed)

yActual = [0, 0, 1, 1, 0, 0, 1, 1, 1, 1]
explainer = LimeTextExplainer(class_names=['Female', 'Male'])
for i in range(len(pred)):
    print(i + 1, ":")
    print("Male: ", predProb[i][1])
    print("Female: ", predProb[i][0])
    print("Predicted: ", pred[i])
    if(yActual[i] == 0):
        print("Actual: Female")
    else:
        print("Actual: Male")
        
    vectorized_text = vectorizer.transform([text[i]])
    predict_function = lambda x: model.predict_proba(vectorizer.transform(x))
    explanation = explainer.explain_instance(text[i], predict_function, num_features=20)
    top_words_lime = explanation.as_list()
    print(f"Top words for text response {i + 1}:")
    masculineWords = []
    feminineWords = []
    for word, score in top_words_lime:
        if score > 0:
            masculineWords.append((word, score))
        else:
            feminineWords.append((word, score))
    print("Masculine words: ", masculineWords)
    print("Feminine words: ", feminineWords)
    print("")

1 :
Male:  0.46639824
Female:  0.53360176
Predicted:  0
Actual: Female
Top words for text response 1:
Masculine words:  []
Feminine words:  [('Once', 0.0), ('upon', 0.0), ('a', 0.0), ('time', 0.0), ('in', 0.0), ('the', 0.0), ('vibrant', 0.0), ('city', 0.0), ('of', 0.0), ('Harmonyville', 0.0), ('there', 0.0), ('lived', 0.0), ('college', 0.0), ('student', 0.0), ('named', 0.0), ('Mia', 0.0), ('Rodriguez', 0.0), ('was', 0.0), ('junior', 0.0), ('majoring', 0.0)]

2 :
Male:  0.46639824
Female:  0.53360176
Predicted:  0
Actual: Female
Top words for text response 2:
Masculine words:  [('blossomed', 3.969483156802812e-32), ('proudly', 2.7338132267734537e-32), ('to', 2.584543509224092e-32), ('living', 1.522902858257441e-32), ('initiatives', 1.3218004826283423e-32), ('cultivate', 1.1799136570805102e-32), ('symbol', 1.1646976763743473e-32), ('both', 1.1635809657990455e-32), ('instead', 1.073609999430179e-32), ('nervously', 5.292648670055194e-33), ('panel', 4.74249912240328e-33)]
Feminine words:  [

BERT (very slow run on Google Colab or high performance machine)

In [None]:
# load data
dataset = pd.read_csv('gold_BUG.csv')
train, test = train_test_split(dataset, test_size=0.2, random_state=42)
# get samples with only neutral or stereotype sentence
train = train[train['stereotype'].isin([0, 1])]
test = test[test['stereotype'].isin([0, 1])]

# preprocess data
print("PREPROCESSING DATA")
train['sentence_text'] = train['sentence_text'].apply(add_space_before).apply(removeUnnecessaryWords)
test['sentence_text'] = test['sentence_text'].apply(add_space_before).apply(removeUnnecessaryWords)
print("TRAIN", train['sentence_text'])
print("TEST", test['sentence_text'])

tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

xTrain = tokenizer(list(train['sentence_text']), padding=True, truncation=True, return_tensors='pt')
xTest = tokenizer(list(test['sentence_text']), padding=True, truncation=True, return_tensors='pt')
xTrain = xTrain['input_ids'].numpy(), xTrain['attention_mask'].numpy(), xTrain['token_type_ids'].numpy()
xTest = xTest['input_ids'].numpy(), xTest['attention_mask'].numpy(), xTest['token_type_ids'].numpy()


yTrain = train['predicted gender']
yTest = test['predicted gender']

FileNotFoundError: [Errno 2] No such file or directory: 'gold_BUG.csv'

In [None]:
# train model
model = TFBertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)
# loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)
# metric = tf.keras.metrics.SparseCategoricalAccuracy('accuracy')
# optimizer = tf.keras.optimizers.Adam(learning_rate=2e-5)

model.compile(loss='sparse_categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

yTrain = pd.get_dummies(yTrain)
yTest = pd.get_dummies(yTest)

model.fit(xTrain, yTrain, validation_data=(xTest, yTest), epochs=3)

model.save_pretrained('/content/drive/My Drive/mlModels/bertModel')
tokenizer.save_pretrained('/content/drive/My Drive/mlModels/bertTokenizer')

In [None]:
model = TFBertForSequenceClassification.from_pretrained('/content/drive/My Drive/mlModels/bertModel')
tokenizer = BertTokenizer.from_pretrained('/content/drive/My Drive/mlModels/bertTokenizer')

# accuracy
pred = model.predict(xTest).logits
pred = np.argmax(pred, axis=1)
yTest = np.argmax(yTest, axis=1)

pred = pred.tolist()
accuracy = accuracy_score(yTest, pred)
print("Accuracy:", accuracy)

# confusion matrix
cm = confusion_matrix(yTest, pred)
print("Confusion matrix:\n", cm)

In [None]:
# Load the BERT model and tokenizer
model = TFBertForSequenceClassification.from_pretrained('/content/drive/My Drive/mlModels/bertModel')
tokenizer = BertTokenizer.from_pretrained('/content/drive/My Drive/mlModels/bertTokenizer')

# List of texts to analyze
text = [
    """
    Once upon a time in the vibrant city of Harmonyville , there lived a college student named Mia Rodriguez . Mia was a junior majoring in environmental science at Rivertide University . Her love for nature and a deep sense of responsibility towards the planet fueled her determination to make a positive impact . One sunny afternoon , Mia stumbled upon a notice about the annual Green Innovation Challenge —an event where students could pitch eco -friendly projects to a panel of environmental experts . Inspired and eager to contribute , Mia decided to develop a sustainable urban gardening initiative called  "GreenHaven . " With her hands in the soil and a heart full of passion , Mia transformed an unused corner of the campus into a thriving community garden . She envisioned GreenHaven as a place where students could come together , learn about sustainable agriculture , and cultivate their own fruits and vegetables . Mia believed that this initiative could not only promote environmental consciousness but also foster a sense of community among her peers . As the garden flourished , so did Mia 's connection with her fellow students . The project became a hub of creativity , where ideas for sustainable living blossomed alongside the vibrant array of fruits and vegetables . Mia 's dedication and leadership drew the attention of both students and faculty alike . When the day of the Green Innovation Challenge arrived , Mia nervously but proudly presented GreenHaven to the panel of judges . The vision , dedication , and positive impact of her project resonated deeply , earning her the first prize and a scholarship for further environmental studies . Word of Mia 's success spread , and GreenHaven became a symbol of sustainable living on campus . Mia 's journey didn 't end with the competition ; instead , it marked the beginning of a new chapter . With the scholarship in hand , Mia continued her studies , conducting research on innovative ways to create sustainable urban environments . As Mia graduated from Rivertide University , she left behind a legacy of green initiatives and a campus that had been transformed by the power of community and sustainability . GreenHaven continued to thrive , inspiring future generations of students to think creatively about environmental issues . Mia 's story became a beacon of hope , showing that even a single college student with a passion for change could make a lasting impact on the world . And so , as Mia embarked on her journey beyond college , she carried with her not just a degree but the knowledge that small , meaningful actions could ripple into waves of positive transformation for the planet and its people .
    """,
    """
    Once upon a time in the vibrant city of Harmonyville ,  lived a college student named   .  was a junior majoring in environmental science at Rivertide University .  love for nature and a deep sense of responsibility towards the planet fueled  determination to make a positive impact . One sunny afternoon ,  stumbled upon a notice about the annual Green Innovation Challenge — an event where students could pitch eco -friendly projects to a panel of environmental experts . Inspired and eager to contribute ,  decided to develop a sustainable urban gardening initiative called   " GreenHaven . " With  hands in the soil and a heart full of passion ,  transformed an unused corner of the campus into a thriving community garden .  envisioned GreenHaven as a place where students could come together , learn about sustainable agriculture , and cultivate  own fruits and vegetables .  believed that this initiative could not only promote environmental consciousness but also foster a sense of community among  peers . As the garden flourished , so did  's connection with  fellow students . The project became a hub of creativity , where ideas for sustainable living blossomed alongside the vibrant array of fruits and vegetables .  's dedication and leadership drew the attention of both students and faculty alike . When the day of the Green Innovation Challenge arrived ,  nervously but proudly presented GreenHaven to the panel of judges . The vision , dedication , and positive impact of  project resonated deeply , earning  the first prize and a scholarship for further environmental studies . Word of  's success spread , and  became a symbol of sustainable living on campus .  's    t end with the competition ; instead ,  marked the beginning of a new chapter . With the scholarship in hand ,  continued  studies , conducting research on innovative ways to create sustainable urban environments . As  graduated from Rivertide University ,  left behind a legacy of green initiatives and a campus  had been transformed by the power of community and sustainability . GreenHaven continued to thrive , inspiring future generations of students to think creatively about environmental issues .  's story became a beacon of hope , showing that even a single college student with a passion for change could make a lasting impact on the world . And so , as  embarked on  journey beyond college ,  carried with  not just a degree but the knowledge  small , meaningful actions could ripple into waves of positive transformation for the planet and  people .
    """,
    """
    Once upon a time in the bustling city of Arcadia , there lived a college student named Alex Reynolds . Alex was a junior majoring in computer science at the prestigious Arcadia University . He was a diligent student with a passion for coding and a penchant for exploring the world of technology . One day , as Alex was immersed in his studies at the campus library , he stumbled upon an intriguing flyer .
    """,
    """
    Once upon a time in the bustling city of Arcadia , there lived a college student . This student was a junior majoring in computer science at the prestigious Arcadia University . They were a diligent student with a passion for coding and a penchant for exploring the world of technology . One day , as this student was immersed in their studies at the campus library , they stumbled upon an intriguing flyer .
    """,
    """
    Once upon a time in the vibrant college town of Crestwood , there lived a spirited young student named Emily . She was a sophomore at Crestwood University , pursuing a degree in environmental science with dreams of making a positive impact on the planet . Emily was known for her boundless enthusiasm , infectious energy , and a love for exploring the world around her . One crisp autumn day , Emily stumbled upon a flyer for an environmental awareness club called  "Green Harmony " on the college bulletin board . Intrigued and passionate about environmental causes , she decided to attend their next meeting . As Emily walked into the meeting room , she was greeted by a diverse group of students who shared her passion for sustainability . The club was planning an ambitious project to transform an unused campus space into a thriving community garden . Emily was immediately captivated by the idea and eagerly joined the efforts . The days turned into weeks as the Green Harmony team worked tirelessly , planning , planting , and nurturing their garden . Emily found herself forming deep connections with her fellow club members as they faced challenges and celebrated victories together . The project not only brought life to the neglected space but also breathed new life into Emily 's college experience . Amidst the busy academic schedule , Emily discovered a sense of purpose beyond textbooks and exams . The garden project became a symbol of unity , showcasing the power of collaboration and the positive impact a group of dedicated individuals could make . As the garden flourished , so did Emily 's personal growth . She learned about sustainable practices , organic gardening , and the importance of community engagement . Her once mundane college routine transformed into a fulfilling journey , where every day brought new lessons and opportunities . Word of the Green Harmony garden spread throughout the campus , attracting attention from both students and faculty . The college recognized the club 's efforts and even awarded them a grant to expand their project . Emily and her friends found themselves at the forefront of a campus -wide movement toward sustainability . Through this journey , Emily discovered not only her passion for environmental science but also her ability to lead and inspire change . The once ordinary college student had become a beacon of hope and inspiration for those around her . As the seasons changed , so did Emily and her friends , leaving behind a legacy of a thriving community garden and a more environmentally conscious campus . The tale of the college student who discovered her purpose in the embrace of nature and community echoed through the years , inspiring future generations of students to dream big and make a difference in the world .
    """,
]

# Define the actual labels (0 for Female, 1 for Male)
y_actual = [0, 1, 0, 1, 0, 1, 0, 1, 0, 1]  # Example labels, replace with actual labels

# Define a function for prediction
def predict_proba(texts):
    tokenized_texts = tokenizer(texts, padding=True, truncation=True, return_tensors="tf")
    tokenized_texts = tokenized_texts['input_ids'].numpy(), tokenized_texts['attention_mask'].numpy(), tokenized_texts['token_type_ids'].numpy()
    logits = model.predict(tokenized_texts).logits
    probabilities = np.exp(logits) / np.sum(np.exp(logits), axis=1, keepdims=True)
    return probabilities

# Use LIME to explain predictions
explainer = LimeTextExplainer(class_names=["Female", "Male"])
pred_proba = predict_proba(text[0])
print("PRED", pred_proba)
# Define a lambda function to predict probabilities for a single text instance
predict_proba_fn = lambda x: predict_proba(x)

# Explain the prediction using LIME
explanation = explainer.explain_instance(text[0], predict_proba_fn, num_features=10)
explanation.show_in_notebook(text=True)

Word Embeddings with training model through dataset
Requirements: python -m spacy download en_core_web_md

In [24]:
# load spacy pre trained word embeddings
nlp = spacy.load("en_core_web_md") 

def text_to_vectors(text):
    # doc = nlp(text)
    # return doc.vector
    return nlp(text).text

In [5]:
# load data
dataset = pd.read_csv('../datasets/BUG/gold_BUG.csv')
train, test = train_test_split(dataset, test_size=0.2, random_state=42)

# preprocess data
print("PREPROCESSING DATA")
train['sentence_text'] = train['sentence_text'].apply(add_space_before).apply(removeUnnecessaryWords)
test['sentence_text'] = test['sentence_text'].apply(add_space_before).apply(removeUnnecessaryWords)
print("TRAIN", train['sentence_text'])
print("TEST", test['sentence_text'])

# get samples with only neutral or stereotype sentence
train = train[train['stereotype'].isin([0, 1])]
test = test[test['stereotype'].isin([0, 1])]

# convert xTrain and xTest to vectors based on word embeddings
xTrain = np.array([text_to_vectors(text) for text in train['sentence_text']])
xTest = np.array([text_to_vectors(text) for text in test['sentence_text']])
yTrain = train['predicted gender']
yTest = test['predicted gender']

# train model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(xTrain, yTrain)

# save model
with open('../savedModels/randomForestModelSpacyEmbeddings.pkl', 'wb') as model_file:
    pickle.dump(model, model_file)

PREPROCESSING DATA


KeyboardInterrupt: 

In [6]:
# accuracy
pred = model.predict(xTest)

accuracy = accuracy_score(yTest, pred)
print("Accuracy:", accuracy)

# confusion matrix
cm = confusion_matrix(yTest, pred)
print("Confusion matrix:\n", cm)

Accuracy: 0.8132295719844358
Confusion matrix:
 [[  3  45]
 [  3 206]]


In [25]:
with open('../savedModels/randomForestModelSpacyEmbeddings.pkl', 'rb') as model_file:
    model = pickle.load(model_file)

text = [
"""
Once upon a time in the vibrant city of Harmonyville , there lived a college student named Mia Rodriguez . Mia was a junior majoring in environmental science at Rivertide University . Her love for nature and a deep sense of responsibility towards the planet fueled her determination to make a positive impact . One sunny afternoon , Mia stumbled upon a notice about the annual Green Innovation Challenge —an event where students could pitch eco -friendly projects to a panel of environmental experts . Inspired and eager to contribute , Mia decided to develop a sustainable urban gardening initiative called  "GreenHaven . " With her hands in the soil and a heart full of passion , Mia transformed an unused corner of the campus into a thriving community garden . She envisioned GreenHaven as a place where students could come together , learn about sustainable agriculture , and cultivate their own fruits and vegetables . Mia believed that this initiative could not only promote environmental consciousness but also foster a sense of community among her peers . As the garden flourished , so did Mia 's connection with her fellow students . The project became a hub of creativity , where ideas for sustainable living blossomed alongside the vibrant array of fruits and vegetables . Mia 's dedication and leadership drew the attention of both students and faculty alike . When the day of the Green Innovation Challenge arrived , Mia nervously but proudly presented GreenHaven to the panel of judges . The vision , dedication , and positive impact of her project resonated deeply , earning her the first prize and a scholarship for further environmental studies . Word of Mia 's success spread , and GreenHaven became a symbol of sustainable living on campus . Mia 's journey didn 't end with the competition ; instead , it marked the beginning of a new chapter . With the scholarship in hand , Mia continued her studies , conducting research on innovative ways to create sustainable urban environments . As Mia graduated from Rivertide University , she left behind a legacy of green initiatives and a campus that had been transformed by the power of community and sustainability . GreenHaven continued to thrive , inspiring future generations of students to think creatively about environmental issues . Mia 's story became a beacon of hope , showing that even a single college student with a passion for change could make a lasting impact on the world . And so , as Mia embarked on her journey beyond college , she carried with her not just a degree but the knowledge that small , meaningful actions could ripple into waves of positive transformation for the planet and its people .
""",
"""
Once upon a time in the vibrant city of Harmonyville ,  lived a college student named   .  was a junior majoring in environmental science at Rivertide University .  love for nature and a deep sense of responsibility towards the planet fueled  determination to make a positive impact . One sunny afternoon ,  stumbled upon a notice about the annual Green Innovation Challenge — an event where students could pitch eco -friendly projects to a panel of environmental experts . Inspired and eager to contribute ,  decided to develop a sustainable urban gardening initiative called   " GreenHaven . " With  hands in the soil and a heart full of passion ,  transformed an unused corner of the campus into a thriving community garden .  envisioned GreenHaven as a place where students could come together , learn about sustainable agriculture , and cultivate  own fruits and vegetables .  believed that this initiative could not only promote environmental consciousness but also foster a sense of community among  peers . As the garden flourished , so did  's connection with  fellow students . The project became a hub of creativity , where ideas for sustainable living blossomed alongside the vibrant array of fruits and vegetables .  's dedication and leadership drew the attention of both students and faculty alike . When the day of the Green Innovation Challenge arrived ,  nervously but proudly presented GreenHaven to the panel of judges . The vision , dedication , and positive impact of  project resonated deeply , earning  the first prize and a scholarship for further environmental studies . Word of  's success spread , and  became a symbol of sustainable living on campus .  's    t end with the competition ; instead ,  marked the beginning of a new chapter . With the scholarship in hand ,  continued  studies , conducting research on innovative ways to create sustainable urban environments . As  graduated from Rivertide University ,  left behind a legacy of green initiatives and a campus  had been transformed by the power of community and sustainability . GreenHaven continued to thrive , inspiring future generations of students to think creatively about environmental issues .  's story became a beacon of hope , showing that even a single college student with a passion for change could make a lasting impact on the world . And so , as  embarked on  journey beyond college ,  carried with  not just a degree but the knowledge  small , meaningful actions could ripple into waves of positive transformation for the planet and  people .
""",
"""
Once upon a time in the bustling city of Arcadia , there lived a college student named Alex Reynolds . Alex was a junior majoring in computer science at the prestigious Arcadia University . He was a diligent student with a passion for coding and a penchant for exploring the world of technology . One day , as Alex was immersed in his studies at the campus library , he stumbled upon an intriguing flyer .
""",
"""
Once upon a time in the bustling city of Arcadia , there lived a college student . This student was a junior majoring in computer science at the prestigious Arcadia University . They were a diligent student with a passion for coding and a penchant for exploring the world of technology . One day , as this student was immersed in their studies at the campus library , they stumbled upon an intriguing flyer .
""",
"""
Once upon a time in the vibrant college town of Crestwood , there lived a spirited young student named Emily . She was a sophomore at Crestwood University , pursuing a degree in environmental science with dreams of making a positive impact on the planet . Emily was known for her boundless enthusiasm , infectious energy , and a love for exploring the world around her . One crisp autumn day , Emily stumbled upon a flyer for an environmental awareness club called  "Green Harmony " on the college bulletin board . Intrigued and passionate about environmental causes , she decided to attend their next meeting . As Emily walked into the meeting room , she was greeted by a diverse group of students who shared her passion for sustainability . The club was planning an ambitious project to transform an unused campus space into a thriving community garden . Emily was immediately captivated by the idea and eagerly joined the efforts . The days turned into weeks as the Green Harmony team worked tirelessly , planning , planting , and nurturing their garden . Emily found herself forming deep connections with her fellow club members as they faced challenges and celebrated victories together . The project not only brought life to the neglected space but also breathed new life into Emily 's college experience . Amidst the busy academic schedule , Emily discovered a sense of purpose beyond textbooks and exams . The garden project became a symbol of unity , showcasing the power of collaboration and the positive impact a group of dedicated individuals could make . As the garden flourished , so did Emily 's personal growth . She learned about sustainable practices , organic gardening , and the importance of community engagement . Her once mundane college routine transformed into a fulfilling journey , where every day brought new lessons and opportunities . Word of the Green Harmony garden spread throughout the campus , attracting attention from both students and faculty . The college recognized the club 's efforts and even awarded them a grant to expand their project . Emily and her friends found themselves at the forefront of a campus -wide movement toward sustainability . Through this journey , Emily discovered not only her passion for environmental science but also her ability to lead and inspire change . The once ordinary college student had become a beacon of hope and inspiration for those around her . As the seasons changed , so did Emily and her friends , leaving behind a legacy of a thriving community garden and a more environmentally conscious campus . The tale of the college student who discovered her purpose in the embrace of nature and community echoed through the years , inspiring future generations of students to dream big and make a difference in the world .
""",
"""
Once upon a time in the vibrant college town of Crestwood ,  lived a spirited young student named  .  was a sophomore at Crestwood University , pursuing a degree in environmental science with dreams of making a positive impact on the planet . Emily was known for  boundless enthusiasm , infectious energy , and a love for exploring the world around  . One crisp autumn day , Emily stumbled upon a flyer for an environmental awareness club called   " Green Harmony " on the college bulletin board . Intrigued and passionate about environmental causes ,  decided to attend  next meeting . As Emily walked into the meeting room ,  was greeted by a diverse group of students  shared  passion for sustainability . The club was planning an ambitious project to transform an unused campus space into a thriving community garden . Emily was immediately captivated by the idea and eagerly joined the efforts . The days turned into weeks as the   team worked tirelessly , planning , planting , and nurturing  garden . Emily found  forming deep connections with  fellow club members as  faced challenges and celebrated victories together . The project not only brought life to the neglected space but also breathed new life into Emily 's college experience . Amidst the busy academic schedule , Emily discovered a sense of purpose beyond textbooks and exams . The garden project became a symbol of unity , showcasing the power of collaboration and the positive impact a group of dedicated individuals could make . As the garden flourished , so did  's personal growth .  learned about sustainable practices , organic gardening , and the importance of community engagement .  once mundane college routine transformed into a fulfilling journey , where every day brought new lessons and opportunities . Word of the Green Harmony garden spread throughout the campus , attracting attention from both students and faculty . The college recognized the club 's efforts and even awarded  a grant to expand  project . Emily and  friends found  at the forefront of a campus -wide movement toward sustainability . Through this journey , Emily discovered not only  passion for environmental science but also  ability to lead and inspire change . The once ordinary college student had become a beacon of hope and inspiration for  around  . As the seasons changed , so did  and  friends , leaving behind a legacy of a thriving community garden and a more environmentally conscious campus . The tale of the college student  discovered  purpose in the embrace of nature and community echoed through the years , inspiring future generations of students to dream big and make a difference in the world .
""",
"""
In the vibrant city of Rivertown , there lived a college student named Jason Harris . Jason was a senior at Rivertown University , majoring in journalism and aspiring to be a storyteller . He had a keen interest in human experiences and a passion for shedding light on untold stories . One day , while perusing the local newspaper , Jason stumbled upon an article about an elderly woman named Mrs . Eleanor Bennett , who had spent decades working as a librarian in the city . Intrigued by the snippet of her life story , Jason felt compelled to dig deeper and share her tale with the world . After some research , Jason learned that Mrs . Bennett had not only been a dedicated librarian but had also been involved in community initiatives , fostering a love for reading among children and organizing events to bring people together . However , her own life story had largely remained in the shadows . Determined to uncover the full narrative , Jason reached out to Mrs . Bennett and asked for an interview . To his surprise , she welcomed the opportunity to share her story , and soon they sat down for a conversation that would unveil a lifetime of experiences . As Jason delved into Mrs . Bennett 's past , he discovered a woman who had overcome personal challenges , witnessed historical events , and touched the lives of many in Rivertown . Her journey was filled with moments of joy , heartbreak , and resilience , making for a compelling narrative that Jason couldn 't wait to share with his readers . With meticulous attention to detail , Jason crafted a feature article that not only highlighted Mrs . Bennett 's contributions to the community but also captured the essence of her character . The story resonated with readers , and the local newspaper received an overwhelming response . Mrs . Bennett became a beloved figure in Rivertown , celebrated for her dedication and the richness of her life story . The success of the article opened doors for Jason , who found himself on the path to becoming a respected journalist . Inspired by Mrs . Bennett 's story , he continued to seek out and share the untold narratives within his community , becoming a storyteller who bridged the gaps between generations . As graduation day approached , Jason reflected on his college journey . His time at Rivertown University had not only provided him with an education but had also instilled in him the power of storytelling to connect people and inspire change . With a heart full of gratitude and a portfolio filled with impactful stories , Jason ventured into the world , ready to make a difference , one narrative at a time .
""",
"""
In the vibrant city of Rivertown ,  lived a college student named   .  was a senior at Rivertown University , majoring in journalism and aspiring to be a storyteller .  had a keen interest in human experiences and a passion for shedding light on untold stories . One day , while perusing the local newspaper ,  stumbled upon an article about an elderly woman named  .   ,  had spent decades working as a librarian in the city . Intrigued by the snippet of  life story ,  felt compelled to dig deeper and share  tale with the world . After some research ,  learned that  .  had not only been a dedicated librarian but had also been involved in community initiatives , fostering a love for reading among children and organizing events to bring people together . However ,  own life story had largely remained in the shadows . Determined to uncover the full narrative ,  reached out to  .  and asked for an interview . To  surprise ,  welcomed the opportunity to share  story , and soon  sat down for a conversation  would unveil a lifetime of experiences . As  delved into  .  's past ,  discovered a woman  had overcome personal challenges , witnessed historical events , and touched the lives of many in Rivertown .  journey was filled with moments of joy , heartbreak , and resilience , making for a compelling narrative   couldn ' t wait to share with  readers . With meticulous attention to detail ,  crafted a feature article that not only highlighted  .  's contributions to the community but also captured the essence of  character . The story resonated with readers , and the local newspaper received an overwhelming response . Mrs .  became a beloved figure in Rivertown , celebrated for  dedication and the richness of  life story . The success of the article opened doors for  ,  found  on the path to becoming a respected journalist . Inspired by  .  's story ,  continued to seek out and share the untold narratives within  community , becoming a storyteller  bridged the gaps between generations . As graduation day approached ,  reflected on  college journey .  time at Rivertown University had not only provided  with an education but had also instilled in  the power of storytelling to connect people and inspire change . With a heart full of gratitude and a portfolio filled with impactful stories ,  ventured into the world , ready to make a difference , one narrative at a time .
""",
"""
In the bustling town of Summitville , there lived a college man named Lucas Turner . Lucas was a mechanical engineering major at Summitville Tech , known for his passion for building things from the ground up . His college life was a dynamic mix of late -night design sessions , hands -on projects , and a camaraderie with his fellow engineering students . One day , Lucas stumbled upon an old , abandoned workshop on the outskirts of town . The sight of forgotten tools and discarded materials sparked a fire in him . Inspired by the potential of the neglected space , he decided to breathe new life into it and create a collaborative hub for engineering students . Lucas rallied a group of like -minded friends , and together they transformed the workshop into the  "Innovation Forge . " It became a haven for students to work on personal projects , exchange ideas , and bring their engineering dreams to life . The space buzzed with the sounds of drills , the clanking of metal , and the hum of 3D printers as students collaborated on everything from robotics to sustainable energy solutions . As the Innovation Forge gained popularity , Lucas and his team decided to organize an annual engineering expo , showcasing the innovative projects that emerged from their workshop . The expo not only attracted students and faculty but also local businesses and industry professionals keen on discovering emerging talent . One year , Lucas and his team developed a prototype for a solar -powered irrigation system designed to help local farmers in Summitville . The project gained attention not only for its engineering prowess but also for its potential to make a real impact in the community . The Innovation Forge became a focal point for collaborative projects that aimed to address real -world challenges . Lucas 's journey through college was not just about earning a degree ; it was about leaving a lasting mark on the engineering community . As graduation approached , the Innovation Forge continued to thrive , and Lucas felt a sense of pride in the legacy he had created . The town of Summitville now had a vibrant hub of innovation , thanks to the hard work and dedication of Lucas Turner . His college experience became a testament to the transformative power of hands -on learning , collaboration , and the drive to make a positive impact on the world through engineering ingenuity .
""",
"""
In the bustling town of Summitville ,  lived a college man named   . Lucas was a mechanical engineering major at Summitville Tech , known for  passion for building things from the ground up .  college life was a dynamic mix of late -night design sessions , hands -on projects , and a camaraderie with  fellow engineering students . One day , Lucas stumbled upon an old , abandoned workshop on the outskirts of town . The sight of forgotten tools and discarded materials sparked a fire in  . Inspired by the potential of the neglected space ,  decided to breathe new life into  and create a collaborative hub for engineering students . Lucas rallied a group of like -minded friends , and together  transformed the workshop into the   " Innovation Forge . "  became a haven for students to work on personal projects , exchange ideas , and bring  engineering dreams to life . The space buzzed with the sounds of drills , the clanking of metal , and the hum of 3D printers as students collaborated on  from robotics to sustainable energy solutions . As the Innovation Forge gained popularity , Lucas and  team decided to organize an annual engineering expo , showcasing the innovative projects  emerged from  workshop . The expo not only attracted students and faculty but also local businesses and industry professionals keen on discovering emerging talent . One year , Lucas and  team developed a prototype for a solar -powered irrigation system designed to help local farmers in Summitville . The project gained attention not only for  engineering prowess but also for  potential to make a real impact in the community . The Innovation Forge became a focal point for collaborative projects  aimed to address real -world challenges . Lucas 's journey through college was not just about earning a degree ;  was about leaving a lasting mark on the engineering community . As graduation approached , the Innovation Forge continued to thrive , and Lucas felt a sense of pride in the legacy  had created . The town of Summitville now had a vibrant hub of innovation , thanks to the hard work and dedication of Lucas Turner .  college experience became a testament to the transformative power of hands -on learning , collaboration , and the drive to make a positive impact on the world through engineering ingenuity .
""",
"""
Once upon a time, in the bustling city of Academia, there lived an ambitious scientist named Alex. He was a dedicated soul with a passion to unravel the mysteries of science. Alex led an insatiable curiosity that led him to the hallowed halls of the prestigious Kepler University. From a humble background, Alex worked tirelessly to secure a scholarship that would pave the way to his academic journey. The moment he stepped onto the university grounds, he felt a surge of determination to make the most of the golden opportunity. Alex's days were filled with challenging lectures, fascinating experiments, countless hours in the library. His favorite haunt was the physics lab, where he immersed himself in the world of particles, equations, the boundless wonders of the universe. The spark in his eyes ignited with each breakthrough, no matter the size. Outside the realm of academia, Alex was known to be kind to lend a helping hand. Tutoring a struggling classmate or participating in community service projects, he believed in the power of knowledge to uplift not just himself but those around him. As the semesters passed, Alex's reputation as a brilliant, compassionate scientist grew. Professors admired his commitment to learning; fellow scientists sought his guidance. Alex remained humble, always acknowledging the contributions of others, fostering a sense of camaraderie.
""",
"""
bustl town  summitvil ,  live  colleg  name   . luca   mechan engin major  summitvil tech , know   passion  build thing   ground  .  colleg life   dynam mix  late -night design session , hand -on project ,   camaraderi   fellow engin student .  day , luca stumbl   old , abandon workshop   outskirt  town .  sight  forget tool  discard materi spark  fire   . inspir   potenti   neglect space ,  decid  breath new life    creat  collabor hub  engin student . luca ralli  group  like -mind friend ,    transform  workshop     " innov forg . "    haven  student  work  person project , exchang idea ,  bring  engin dream  life .  space buzz   sound  drill ,  clank  metal ,   hum  3d printer  student collabor    robot  sustain energi solut .   innov forg gain popular , luca   team decid  organ  annual engin expo , showcas  innov project  emerg   workshop .  expo   attract student  faculti   local busi  industri profession keen  discov emerg talent .  year , luca   team develop  prototyp   solar -power irrig system design  help local farmer  summitvil .  project gain attent     engin prowess     potenti    real impact   commun .  innov forg   focal point  collabor project  aim  address real -world challeng . luca  journey  colleg     earn  degre ;    leav  last mark   engin commun .  graduat approach ,  innov forg continu  thrive ,  luca feel  sens  pride   legaci   creat .  town  summitvil    vibrant hub  innov , thank   hard work  dedic  luca turner .  colleg experi   testament   transform power  hand -on learn , collabor ,   drive    posit impact   world  engin ingenu .
""",
"""
Once upon a time in the bustling city of Arcadia, there lived a spirited and ambitious young woman named Emma Turner. Emma was a sophomore at Arcadia University, majoring in environmental science with a deep passion for sustainability and conservation. Her days were filled with classes, environmental club meetings, and the occasional part-time job at a local bookstore. One sunny afternoon, Emma received an invitation to participate in an environmental research project that would take her deep into the heart of the nearby Redwood Forest. The project, led by renowned ecologist Dr. Victoria Hayes, aimed to study the flora and fauna of the forest to better understand its ecosystem and potential threats. Excitement and anticipation bubbled within Emma as she packed her bags and set off on the adventure of a lifetime. Alongside a diverse group of fellow students, Emma embarked on a journey into the towering trees and lush undergrowth of the Redwood Forest. As the days unfolded, Emma found herself enchanted by the beauty and complexity of the ecosystem. She meticulously documented the various plant species, observed elusive wildlife, and collaborated with her peers on data collection. Amidst the research, Emma formed deep connections with her teammates, each of them bringing a unique perspective to the project. However, the tranquility of the forest was disrupted when the team stumbled upon signs of illegal logging. Deeply troubled by the discovery, Emma and her friends decided to take action. They used their findings to raise awareness both on campus and within the local community, organizing protests and initiating a campaign to protect the Redwood Forest. Emma's dedication caught the attention of local environmental organizations, and soon she found herself at the forefront of a movement to preserve not only the Redwood Forest but also other endangered ecosystems. Her passion and leadership skills shone brightly as she advocated for sustainable practices and conservation efforts. Through it all, Emma continued to balance her academic responsibilities, never losing sight of her dream to make a positive impact on the environment. As the campaign gained momentum, Emma's story became an inspiration to fellow students and activists alike. The once-unassuming college student had transformed into a force for change, proving that passion, determination, and a strong sense of purpose could make a significant difference in the world. And so, as the sun set over the Redwood Forest, casting a warm glow on the towering trees, Emma reflected on her journey. The adventure had not only shaped her college experience but had set her on a path to contribute meaningfully to the planet's well-being. In the heart of nature, surrounded by friends and a shared sense of purpose, Emma's story continued to unfold, leaving an indelible mark on both her and the world she sought to protect.
""",
"""
time   bustl citi  arcadia ,  live  spirit  ambiti young  name   .    sophomor  arcadia univers , major  environment scienc   deep passion  sustain  conserv .  day  fill  class , environment club meet ,   occasion  -time job   local bookstor .  sunni afternoon ,  receiv  invit  particip   environment research project     deep   heart   nearbi redwood forest .  project , lead  renown ecologist dr .   , aim  studi  flora  fauna   forest  well understand  ecosystem  potenti threat . excit  anticip bubbl     pack  bag  set    adventur   lifetim . alongsid  divers group  fellow student ,  embark   journey   tower tree  lush undergrowth   redwood forest .   day unfold ,  find  enchant   beauti  complex   ecosystem .  meticul document   plant speci , observ elus wildlif ,  collabor   peer  datum collect . amidst  research ,  form deep connect   teammat ,    bring  uniqu perspect   project .  ,  tranquil   forest  disrupt   team stumbl  sign  illeg log . deepli troubl   discoveri ,    friend decid   action .    find  rais awar   campu    local commun , organ protest  initi  campaign  protect  redwood forest .   dedic catch  attent  local environment organ ,  soon  find    forefront   movement  preserv    redwood forest    endang ecosystem .  passion  leadership skill shine brightli   advoc  sustain practic  conserv effort .    ,  continu  balanc  academ respons ,  lose sight   dream    posit impact   environ .   campaign gain momentum ,   stori   inspir  fellow student  activist alik .   -unassum colleg student  transform   forc  chang , prove  passion , determin ,   strong sens  purpos    signific differ   world .   ,   sun set   redwood forest , cast  warm glow   tower tree ,  reflect   journey .  adventur    shape  colleg experi   set    path  contribut meaning   planet   -be .   heart  natur , surround  friend   share sens  purpos ,   stori continu  unfold , leav  indel mark      world  seek  protect .
"""
]
transformedText = np.array([text_to_vectors(t) for t in text])
explainer = LimeTextExplainer(class_names=['female', 'male'])
predProb = model.predict_proba(transformedText)
pred = model.predict(transformedText)
predict_function = lambda x: model.predict_proba(np.array([text_to_vectors(x)]))
print("PRED", pred)
print("PRED PROB", predProb)

# Test with a single string
single_text = text[0]
print("Type of single_text:", type(single_text))
print("Single text:", single_text)
explanation = explainer.explain_instance(single_text, predict_function, num_features=20)
top_words_lime = explanation.as_list()
print("Top words for single text:")
print(top_words_lime)

yActual = [0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0]
for i in range(len(pred)):
    print(i + 1, ":")
    print("Male: ", predProb[i][1])
    print("Female: ", predProb[i][0])
    print("Predicted: ", pred[i])
    if(yActual[i] == 0):
        print("Actual: Female")
    else:
        print("Actual: Male")
    
    
    explanation = explainer.explain_instance(text[i], predict_function, num_features=20)
    top_words_lime = explanation.as_list()
    print(f"Top words for text response {i + 1}:")
    masculineWords = []
    feminineWords = []
    for word, score in top_words_lime:
        if score > 0:
            masculineWords.append((word, score))
        else:
            feminineWords.append((word, score))
    print("Masculine words: ", masculineWords)
    print("Feminine words: ", feminineWords)
    print("")

ValueError: could not convert string to float: '\nOnce upon a time in the vibrant city of Harmonyville , there lived a college student named Mia Rodriguez . Mia was a junior majoring in environmental science at Rivertide University . Her love for nature and a deep sense of responsibility towards the planet fueled her determination to make a positive impact . One sunny afternoon , Mia stumbled upon a notice about the annual Green Innovation Challenge —an event where students could pitch eco -friendly projects to a panel of environmental experts . Inspired and eager to contribute , Mia decided to develop a sustainable urban gardening initiative called  "GreenHaven . " With her hands in the soil and a heart full of passion , Mia transformed an unused corner of the campus into a thriving community garden . She envisioned GreenHaven as a place where students could come together , learn about sustainable agriculture , and cultivate their own fruits and vegetables . Mia believed that this initiative could not only promote environmental consciousness but also foster a sense of community among her peers . As the garden flourished , so did Mia \'s connection with her fellow students . The project became a hub of creativity , where ideas for sustainable living blossomed alongside the vibrant array of fruits and vegetables . Mia \'s dedication and leadership drew the attention of both students and faculty alike . When the day of the Green Innovation Challenge arrived , Mia nervously but proudly presented GreenHaven to the panel of judges . The vision , dedication , and positive impact of her project resonated deeply , earning her the first prize and a scholarship for further environmental studies . Word of Mia \'s success spread , and GreenHaven became a symbol of sustainable living on campus . Mia \'s journey didn \'t end with the competition ; instead , it marked the beginning of a new chapter . With the scholarship in hand , Mia continued her studies , conducting research on innovative ways to create sustainable urban environments . As Mia graduated from Rivertide University , she left behind a legacy of green initiatives and a campus that had been transformed by the power of community and sustainability . GreenHaven continued to thrive , inspiring future generations of students to think creatively about environmental issues . Mia \'s story became a beacon of hope , showing that even a single college student with a passion for change could make a lasting impact on the world . And so , as Mia embarked on her journey beyond college , she carried with her not just a degree but the knowledge that small , meaningful actions could ripple into waves of positive transformation for the planet and its people .\n'

Word Embeddings using no ML model and only cosine similarity with "man" and "woman"

In [30]:
# load word vectors
wordVectors = KeyedVectors.load_word2vec_format('../wordEmbeddings/GoogleNews-vectors-negative300.bin', binary=True)
manVector = wordVectors['man']
womanVector = wordVectors['woman']

# get average vector
def text_to_average_vector(text):
    if text.strip() == "":
        return None
    words = text.split()
    vectors = [wordVectors[word] for word in words if word in wordVectors.vocab]
    if len(vectors) == 0:
        return None
    return np.mean(vectors, axis=0)

# use cosine similarity to predict bais
def predict_bias(inputText):
    similaritiesToMan = []
    similaritiesToWoman = []
    words = inputText.split()
    
    # go through each word and calculate cosine similarity
    for word in words:
        if word in wordVectors.key_to_index:
            similarityToMan = cosine_similarity(wordVectors[word].reshape(1, -1), manVector.reshape(1, -1))
            similarityToWoman = cosine_similarity(wordVectors[word].reshape(1, -1), womanVector.reshape(1, -1))
            similaritiesToMan.append(similarityToMan)
            similaritiesToWoman.append(similarityToWoman)
    
    # calculate average
    if len(similaritiesToMan) == 0 or len(similaritiesToWoman) == 0:
        return None, None
    avgSimilarityToMan = np.mean(similaritiesToMan)
    avgSimilarityToWoman = np.mean(similaritiesToWoman)
    
    # normalize
    probMaleBias = (avgSimilarityToMan / (avgSimilarityToMan + avgSimilarityToWoman)) * 100
    probFemaleBias = (avgSimilarityToWoman / (avgSimilarityToMan + avgSimilarityToWoman)) * 100
    
    return probMaleBias, probFemaleBias

# testing
inputText = """
Once upon a time, in the bustling city of Academia, there lived an ambitious scientist named Alex. He was a dedicated soul with a passion to unravel the mysteries of science. Alex led an insatiable curiosity that led him to the hallowed halls of the prestigious Kepler University. From a humble background, Alex worked tirelessly to secure a scholarship that would pave the way to his academic journey. The moment he stepped onto the university grounds, he felt a surge of determination to make the most of the golden opportunity. Alex's days were filled with challenging lectures, fascinating experiments, countless hours in the library. His favorite haunt was the physics lab, where he immersed himself in the world of particles, equations, the boundless wonders of the universe. The spark in his eyes ignited with each breakthrough, no matter the size. Outside the realm of academia, Alex was known to be kind to lend a helping hand. Tutoring a struggling classmate or participating in community service projects, he believed in the power of knowledge to uplift not just himself but those around him. As the semesters passed, Alex's reputation as a brilliant, compassionate scientist grew. Professors admired his commitment to learning; fellow scientists sought his guidance. Alex remained humble, always acknowledging the contributions of others, fostering a sense of camaraderie.
"""
addSpaceBeforeText = add_space_before(inputText)
inputText = removeUnnecessaryWords(addSpaceBeforeText)

probMaleBias, probFemaleBias = predict_bias(inputText)

print("Male Percentage:", probMaleBias)
print("Female Percentage:", probFemaleBias)

if probMaleBias > probFemaleBias:
    bias = "male" 
else:
    bias = "female"
print("Predicted bias:", bias)

# bias scores for each word
biasScores = {}
words = inputText.split()
for word in words:
    if word in wordVectors.key_to_index:
        similarityToMan = cosine_similarity(wordVectors[word].reshape(1, -1), manVector.reshape(1, -1))
        similarityToWoman = cosine_similarity(wordVectors[word].reshape(1, -1), womanVector.reshape(1, -1))
        biasScores[word] = (similarityToMan - similarityToWoman)[0][0]
sortedBiasScores = sorted(biasScores.items(), key=lambda item: item[1], reverse=True)

masculineWords = [(word, score) for word, score in sortedBiasScores if score > 0][:10]
feminineWords = [(word, score) for word, score in sortedBiasScores if score < 0][:10]

print("Masculine words:", masculineWords)
print("Feminine words:", feminineWords)


Male Percentage: 56.885313987731934
Female Percentage: 43.11468303203583
Predicted bias: male
Masculine words: [('brilliant', 0.11809299), ('favorit', 0.11309473), ('experi', 0.08384444), ('commit', 0.07793382), ('physic', 0.07535678), ('fellow', 0.072152555), ('boundless', 0.070789225), ('pass', 0.06676336), ('hallow', 0.062647), ('univers', 0.058935393)]
Feminine words: [('realm', -0.0017680153), ('opportun', -0.005584955), ('lend', -0.006147187), ('hour', -0.010244541), ('background', -0.010786898), ('curios', -0.015917696), ('compassion', -0.016747624), ('feel', -0.01801464), ('journey', -0.018164247), ('hall', -0.01831232)]
