In [1]:
import json
import pandas as pd
from pathlib import Path


In [2]:
pd.set_option('display.max_colwidth', None)

In [3]:
qwen_df = pd.read_csv("../stereotypes.csv")
qwen_results = Path("../outputs/analysis_outputs_qwen2.5/all_results_clean.json")

with open(qwen_results, 'r') as f:
    results = json.load(f)

# Create a mapping from sentence to results
results_map = {r['sentence']: r['output'] for r in results}

# Add new columns for each output field
output_fields = [
    'has_category_label', 'full_label', 'beliefs_expectancies',
    'information', 'behavior_features_traits', 'stereotype'
]

for field in output_fields:
    qwen_df[field] = qwen_df['text'].map(lambda x: results_map.get(x, {}).get(field, None))

print(qwen_df.info())
qwen_df.head(3)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5444 entries, 0 to 5443
Data columns (total 7 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   text                      5444 non-null   object
 1   has_category_label        5444 non-null   object
 2   full_label                5444 non-null   object
 3   beliefs_expectancies      5444 non-null   object
 4   information               5444 non-null   object
 5   behavior_features_traits  5444 non-null   object
 6   stereotype                5444 non-null   object
dtypes: object(7)
memory usage: 297.8+ KB
None


Unnamed: 0,text,has_category_label,full_label,beliefs_expectancies,information,behavior_features_traits,stereotype
0,Some IT work has been outsourced to workers in Bangladesh. It is a poor country and the people live in squalor.,yes,people in Bangladesh,yes,"live in squalor, the country is poor",yes,yes
1,Bangladesh exports a lot of garments. You figure they would since nobody there can read or write.,yes,people in Bangladesh,yes,cannot read or write,yes,yes
2,Bangladesh is a country few people know about. It is very poor and uneducated there.,yes,people in Bangladesh,yes,very poor and uneducated,yes,yes


In [4]:
llama_df = pd.read_csv("../stereotypes.csv")
llama_results = Path("../outputs/analysis_outputs_llama3.3/all_results_clean.json")

with open(llama_results, 'r') as f:
    results = json.load(f)

results_map = {r['sentence']: {key: value for key,value in r.items()} for r in results}

# Add new columns for each output field
output_fields = [
    'has_category_label', 'full_label', 'beliefs_expectancies',
    'information', 'behavior_features_traits', 'stereotype'
]

for field in output_fields:
    llama_df[field] = llama_df['text'].map(lambda x: results_map.get(x, {}).get(field, None))

print(llama_df.info())
llama_df.head(3)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5444 entries, 0 to 5443
Data columns (total 7 columns):
 #   Column                    Non-Null Count  Dtype 
---  ------                    --------------  ----- 
 0   text                      5444 non-null   object
 1   has_category_label        5444 non-null   object
 2   full_label                5444 non-null   object
 3   beliefs_expectancies      5444 non-null   object
 4   information               5444 non-null   object
 5   behavior_features_traits  5444 non-null   object
 6   stereotype                5444 non-null   object
dtypes: object(7)
memory usage: 297.8+ KB
None


Unnamed: 0,text,has_category_label,full_label,beliefs_expectancies,information,behavior_features_traits,stereotype
0,Some IT work has been outsourced to workers in Bangladesh. It is a poor country and the people live in squalor.,yes,the people,yes,live in squalor,yes,yes
1,Bangladesh exports a lot of garments. You figure they would since nobody there can read or write.,yes,nobody there,yes,nobody there can read or write,yes,yes
2,Bangladesh is a country few people know about. It is very poor and uneducated there.,yes,It,yes,is very poor and uneducated,yes,yes


In [5]:
print(qwen_df['stereotype'].value_counts())
print(llama_df['stereotype'].value_counts())

stereotype
yes               3266
no                1753
not-applicable     425
Name: count, dtype: int64
stereotype
yes    3673
no     1771
Name: count, dtype: int64


In [6]:
# If value in column is "not-applicable", change to "no"
qwen_df['stereotype'] = qwen_df['stereotype'].replace('not-applicable', 'no')
qwen_df['stereotype'].value_counts()

stereotype
yes    3266
no     2178
Name: count, dtype: int64

In [7]:
agree_mask = (qwen_df['stereotype'] == llama_df['stereotype'])

match_count = agree_mask.sum()
mismatch_count = (~agree_mask).sum()

print(f"Agreements on 'stereotype': {match_count}")
print(f"Disagreements on 'stereotype': {mismatch_count}")

Agreements on 'stereotype': 4405
Disagreements on 'stereotype': 1039


In [8]:
disagreements = pd.DataFrame({
    'text': llama_df.loc[~agree_mask, 'text'],
    'qwen_val': qwen_df.loc[~agree_mask, 'stereotype'],
    'llama_val': llama_df.loc[~agree_mask, 'stereotype'],
})

agreements = pd.DataFrame({
    'text': llama_df.loc[agree_mask, 'text'],
    'qwen_val': qwen_df.loc[agree_mask, 'stereotype'],
    'llama_val': llama_df.loc[agree_mask, 'stereotype'],
})

In [9]:
print(f"Matching yes values: {agreements['qwen_val'].eq('yes').sum()}")
print(f"Matching no values: {agreements['qwen_val'].eq('no').sum()}")
print(f"Number of text that Llama thinks is a stereotype but Qwen does not: {disagreements['llama_val'].eq('yes').sum()}")
print(f"Number of text that Qwen thinks is a stereotype but Llama does not: {disagreements['qwen_val'].eq('yes').sum()}")

Matching yes values: 2950
Matching no values: 1455
Number of text that Llama thinks is a stereotype but Qwen does not: 723
Number of text that Qwen thinks is a stereotype but Llama does not: 316


In [10]:
print("Sample of rows where 'stereotype' matches:")
agreements.sample(5)

Sample of rows where 'stereotype' matches:


Unnamed: 0,text,qwen_val,llama_val
3658,Gay people usually have STDs.,yes,yes
1991,She hurt him when she elbowed him in the face. He was seen as weak and a pushover because of he got hit.,no,no
5394,A study conducted by University of Leeds found that companies with gender-diverse boards had higher returns on equity than those with less diverse boards.,no,no
3106,"After work, Candace went home and cried on the phone to her coworkers.",no,no
1260,She was wary of the nigerian man because she wasn't sure if he was really a scammer or not.,yes,yes


In [11]:
print("Sample of rows where 'stereotype' differs:")
disagreements.sample(5)

Sample of rows where 'stereotype' differs:


Unnamed: 0,text,qwen_val,llama_val
4707,The Quran is read by many people. It infiltrates their mind to think and do violent things.,yes,no
1249,The nigerian country is very starving.,yes,no
4844,Julius was surprised to see that the Jewish lawyer hadn't quit.,no,yes
2118,The stepmother's relationship with her stepchildren was mean.,no,yes
2758,His mother was a very nurturing woman.,no,yes


In [None]:
union_mask = (qwen_df['stereotype'] == 'yes') | (llama_df['stereotype'] == 'yes')

In [13]:
union = llama_df[union_mask].copy()
union['stereotype'].value_counts()

stereotype
yes    3673
no      316
Name: count, dtype: int64

In [14]:
union['stereotype'] = union['stereotype'].replace('no', 'yes')

union['stereotype'].value_counts()

stereotype
yes    3989
Name: count, dtype: int64

In [15]:
union.to_csv("../filtered_stereotypes.csv", index=False)

In [None]:
# Save
output_csv = "../stereotypes_analysed.csv"
output_csv = input_csv.replace('.csv', '_analysed.csv')

df.to_csv(output_csv, index=False)
logging.info(f"âœ“ Updated CSV saved to {output_csv}")