# Bias Analysis of LMs

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

## IMP Functions

In [None]:
filePath_template = "nlp-fairness-for-india-main/templates.tsv"
filePath_religion_idterms = "nlp-fairness-for-india-main/religion_idterms.tsv"
filePath_region_idterms = "nlp-fairness-for-india-main/region_idterms.tsv"
filePath_Caste_idterms = "nlp-fairness-for-india-main/caste_idterms.tsv"
humanDataset_region = "nlp_fairness_India_datasets/region_stereotypes.tsv"
humanDataset_religion = "nlp_fairness_India_datasets/religion_stereotypes.tsv"

# get the templates in an array
def getTemplateArray(filePath):
    df_placeolders = pd.read_csv(filePath,sep='\t')
    df_placeolders = df_placeolders.iloc[:,1:]
    df_placeolders = df_placeolders.replace('@','[MASK]',regex=True)
    semiTemplates = df_placeolders.values.tolist()
    templates = [item for sublist in semiTemplates for item in sublist]
    return templates

# get the identity terms in an array
def getIdentityArray(filePath):
    df_religion = pd.read_csv(filePath,sep='\t')
    religion_list = df_religion.values.tolist()
    religion_idTerms = [item for sublist in religion_list for item in sublist]
    return religion_idTerms

# replace the phrase "[SLOT]" with the required word in a given sentence list
def replaceSlotWithWord(sentences, word):
    return word, [sentence.replace('[SLOT]', word) for sentence in sentences]

# get the common tuples from 2 lists
def extract_common_tuples(file1_path,file2_path,social_axis):
    file1_df = pd.read_csv(file1_path,sep='\t')
    file2_df = pd.read_csv(file2_path,sep='\t')

    merged_df = pd.merge(file1_df, file2_df, on=[social_axis, 'StereoType'], how='inner')
    merged_df.drop_duplicates(inplace=True)

    return merged_df

# get the bias score by taking the ratio of the number of lm generated stereotypical tuples and total number of human-annotated tuples for "BERT" model
def getTotalBiasScore_BERT():
    regionTuples_df = pd.read_csv(humanDataset_region,sep='\t')
    religionTuples_df = pd.read_csv(humanDataset_religion,sep='\t')
    maxScore = regionTuples_df.size + religionTuples_df.size
    biasScore = 0

    # Region
    regionIDTerms = getIdentityArray(filePath_region_idterms)
    for region in regionIDTerms:
        regionFilePath = "BERT/Region_List/" + region + ".tsv"
        commonRegionTuples_df = extract_common_tuples(regionFilePath,humanDataset_region,"Region")
        saveFilePath = "BERT/commonTuples/Region/" + region + ".tsv"
        commonRegionTuples_df.to_csv(saveFilePath,sep='\t',index=False)
        if commonRegionTuples_df.size:
            biasScore = biasScore+ commonRegionTuples_df.size

    # Religion
    religionIDTerms = getIdentityArray(filePath_religion_idterms)
    for religion in religionIDTerms:
        religionFilePath = "BERT/Religion_List/" + religion + ".tsv"
        commonReligionTuples_df = extract_common_tuples(religionFilePath,humanDataset_religion,"Religion")
        saveFilePath = "BERT/commonTuples/Religion/" + religion + ".tsv"
        commonReligionTuples_df.to_csv(saveFilePath,sep='\t',index=False)
        if commonReligionTuples_df.size:
            biasScore = biasScore + commonReligionTuples_df.size
    return round(biasScore/maxScore,2)

# get the bias score by taking the ratio of the number of lm generated stereotypical tuples and total number of human-annotated tuples for "MuRIL" model
def getTotalBiasScore_MuRIL():
    regionTuples_df = pd.read_csv(humanDataset_region,sep='\t')
    religionTuples_df = pd.read_csv(humanDataset_religion,sep='\t')
    maxScore = regionTuples_df.size + religionTuples_df.size
    biasScore = 0

    # Region
    regionIDTerms = getIdentityArray(filePath_region_idterms)
    for region in regionIDTerms:
        regionFilePath = "MuRIL/Region_List/" + region + ".tsv"
        commonRegionTuples_df = extract_common_tuples(regionFilePath,humanDataset_region,"Region")
        saveFilePath = "MuRIL/commonTuples/Region/" + region + ".tsv"
        commonRegionTuples_df.to_csv(saveFilePath,sep='\t',index=False)
        if commonRegionTuples_df.size:
            biasScore = biasScore+ commonRegionTuples_df.size

    # Religion
    religionIDTerms = getIdentityArray(filePath_religion_idterms)
    for religion in religionIDTerms:
        religionFilePath = "MuRIL/Religion_List/" + religion + ".tsv"
        commonReligionTuples_df = extract_common_tuples(religionFilePath,humanDataset_religion,"Religion")
        saveFilePath = "MuRIL/commonTuples/Religion/" + religion + ".tsv"
        commonReligionTuples_df.to_csv(saveFilePath,sep='\t',index=False)
        if commonReligionTuples_df.size:
            biasScore = biasScore + commonReligionTuples_df.size
    return round(biasScore/maxScore,2)

# bar graphs for plotting the spread of common stereotypes across different social axes
def plotBiasScores(religion_bias_df,region_bias_df,title):
    # Plotting religion bias scores
    plt.figure(figsize=(12, 8))
    plt.bar(religion_bias_df['Identity'], religion_bias_df['Bias Score'], color='skyblue')
    plt.xlabel('Religion')
    plt.ylabel('Bias Score')
    plt.title(title)
    plt.xticks(rotation=90)
    plt.tight_layout()
    plt.show()

    # Plotting region bias scores
    plt.figure(figsize=(12, 8))
    plt.bar(region_bias_df['Identity'], region_bias_df['Bias Score'], color='skyblue')
    plt.xlabel('Region')
    plt.ylabel('Bias Score')
    plt.title(title)
    plt.xticks(rotation=90)
    plt.tight_layout()
    plt.show()


# BERT

We separate the tuples based on the identity terms and store them in respective files.

In [None]:

# Region
df_regionTuples = pd.read_csv("BERT/regionTuples_MLM.tsv", sep='\t')
regions = df_regionTuples['Region'].unique()
region_dfs = {}
for region in regions:
    region_dfs[region] = df_regionTuples[df_regionTuples['Region'] == region]
for region, region_df in region_dfs.items():
    filePath = "BERT/Region_List/" + region + ".tsv"
    region_df.to_csv(filePath,sep='\t',index=False)

# Religion
df_religionTuples = pd.read_csv("BERT/religionTuples_MLM.tsv", sep='\t')
religions = df_religionTuples['Religion'].unique()
religion_dfs = {}
for religion in religions:
    religion_dfs[religion] = df_religionTuples[df_religionTuples['Religion'] == religion]
for religion, religion_df in religion_dfs.items():
    filePath = "BERT/Religion_List/" + religion + ".tsv"
    religion_df.to_csv(filePath,sep='\t',index=False)

# Caste
df_casteTuples = pd.read_csv("BERT/casteTuples_MLM.tsv", sep='\t')

sc_st_tuples = df_casteTuples[df_casteTuples['Caste'].str.contains("sc/st", case=False)]
sc_st_file_path = "BERT/Caste_List/sc_st_tuples.tsv"
sc_st_tuples.to_csv(sc_st_file_path, sep='\t', index=False)

castes = df_casteTuples['Caste'].unique()
for caste in castes:
    if "sc/st" in caste.lower():
        continue
    caste_df = df_casteTuples[df_casteTuples['Caste'] == caste]
    file_path = f"BERT/Caste_List/{caste}.tsv"
    caste_df.to_csv(file_path, sep='\t', index=False)


# MuRIL

Doing this for region and religion for `MuRIL` for comaparison.

In [None]:

# Region
df_regionTuples = pd.read_csv("MuRIL/regionTuples_MLM.tsv", sep='\t')
regions = df_regionTuples['Region'].unique()
region_dfs = {}
for region in regions:
    region_dfs[region] = df_regionTuples[df_regionTuples['Region'] == region]
for region, region_df in region_dfs.items():
    filePath = "MuRIL/Region_List/" + region + ".tsv"
    region_df.to_csv(filePath,sep='\t',index=False)

# Religion
df_religionTuples = pd.read_csv("MuRIL/religionTuples_MLM.tsv", sep='\t')
religions = df_religionTuples['Religion'].unique()
religion_dfs = {}
for religion in religions:
    religion_dfs[religion] = df_religionTuples[df_religionTuples['Religion'] == religion]
for religion, religion_df in religion_dfs.items():
    filePath = "MuRIL/Religion_List/" + religion + ".tsv"
    religion_df.to_csv(filePath,sep='\t',index=False)


# Diff analysis and Bias Score

We get the common tuples generated and then calculate the bias score of each LM. Using the common tuples saved in .tsv files, we plot the distribution of these tuples.

## BERT

In [None]:
religionIDTerms = getIdentityArray(filePath_religion_idterms)
regionIDTerms = getIdentityArray(filePath_region_idterms)

display(f"Bias Score for 'BERT' ---> {getTotalBiasScore_BERT()}")

# Store region and corresponding bias score
region_bias_scores = {}
for region in regionIDTerms:
    regionFilePath = "BERT/commonTuples/Region/" + region + ".tsv"
    region_tuples_df = pd.read_csv(regionFilePath, sep='\t')
    bias_score = region_tuples_df.size
    region_bias_scores[region] = bias_score

region_bias_df = pd.DataFrame(region_bias_scores.items(), columns=['Identity', 'Bias Score'])

# Store religion and corresponding bias score
religion_bias_scores = {}
for religion in religionIDTerms:
    religionFilePath = "BERT/commonTuples/Religion/" + religion + ".tsv"
    religion_tuples_df = pd.read_csv(religionFilePath, sep='\t')
    bias_score = religion_tuples_df.size
    religion_bias_scores[religion] = bias_score

religion_bias_df = pd.DataFrame(religion_bias_scores.items(), columns=['Identity', 'Bias Score'])


plotBiasScores(religion_bias_df,region_bias_df,"Bias Score Plot by BERT")

## MuRIL

In [None]:
religionIDTerms = getIdentityArray(filePath_religion_idterms)
regionIDTerms = getIdentityArray(filePath_region_idterms)

display(f"Bias Score for 'MuRIL' ---> {getTotalBiasScore_MuRIL()}")

# Store region and corresponding bias score
region_bias_scores = {}
for region in regionIDTerms:
    regionFilePath = "MuRIL/commonTuples/Region/" + region + ".tsv"
    region_tuples_df = pd.read_csv(regionFilePath, sep='\t')
    bias_score = region_tuples_df.size
    region_bias_scores[region] = bias_score

region_bias_df = pd.DataFrame(region_bias_scores.items(), columns=['Identity', 'Bias Score'])

# Store religion and corresponding bias score
religion_bias_scores = {}
for religion in religionIDTerms:
    religionFilePath = "MuRIL/commonTuples/Religion/" + religion + ".tsv"
    religion_tuples_df = pd.read_csv(religionFilePath, sep='\t')
    bias_score = religion_tuples_df.size
    religion_bias_scores[religion] = bias_score

religion_bias_df = pd.DataFrame(religion_bias_scores.items(), columns=['Identity', 'Bias Score'])

plotBiasScores(religion_bias_df,region_bias_df,"Bias Score Plot by MuRIL")

# Heat Map Generation

Using the similarity scores generated, we plot the heat maps of lm generated stereotypes Vs human-annotated stereotypes using thier similarity scores as a parameter.

Ignoring some social groups as there are not many data points.

## Religion

In [None]:
requiredReligion = ['buddhist','christian','hindu','jain','muslim','sikh']

for religion in requiredReligion:
    filePath = "BERT/similarityScores/Religion/" + religion + "_similarity_scores.tsv"
    df = pd.read_csv(filePath, sep='\t')
    
    matrix = df.pivot(index='Stereotype Token 1', columns='Stereotype Token 2', values='Similarity Score')

    plt.figure(figsize=(10, 8))
    sns.heatmap(matrix, cmap='coolwarm', fmt='.2f', linewidths=.5)
    title = "Similarity Score heatmap - " + religion
    plt.title(title)
    plt.xlabel('Human Annotated')
    plt.ylabel('LM generated')
    savePath = "BERT/heatMaps/Religion/" + religion + ".png"
    plt.savefig(savePath)
    # plt.show()

## Region

In [None]:
requiredRegion = ['assamese','bengali','bihari','gujarati','haryanvi','kannadiga','marathi','odiya','punjabi','rajasthani','tamilian','uttar pradeshi','uttarakhandi']

for region in requiredRegion:
    filePath = "BERT/similarityScores/Region/" + region + "_similarity_scores.tsv"
    df = pd.read_csv(filePath, sep='\t')
    
    matrix = df.pivot(index='Stereotype Token 1', columns='Stereotype Token 2', values='Similarity Score')

    plt.figure(figsize=(10, 8))
    sns.heatmap(matrix, cmap='coolwarm', fmt='.2f', linewidths=.5)
    title = "Similarity Score heatmap - " + region
    plt.title(title)
    plt.xlabel('Human Annotated')
    plt.ylabel('LM generated')
    savePath = "BERT/heatMaps/Region/" + region + ".png"
    plt.savefig(savePath)
    # plt.show()
