In [58]:
import joblib
import pandas as pd
import numpy as np
from sklearn.pipeline import Pipeline

pd.set_option('display.max_colwidth', None)

encoder = joblib.load('model/lr_scsc_encoder.joblib')
model = joblib.load('model/lr_scsc_model.joblib')

model_pipeline = Pipeline([
    ('encoder', encoder),
    ('model', model)
])

if hasattr(encoder, 'feature_names_in_'):
    print("Expected Features:", encoder.feature_names_in_)

Expected Features: ['connotation' 'gram_form' 'situation_evaluation'
 'generalisation_category_label' 'generalisation_situation']


In [59]:
input_df = pd.read_csv('stereotype_predict.csv', index_col=False)
input_df.columns

Index(['has_category_label', 'full_label', 'beliefs_expectancies',
       'information', 'behavior_features_traits', 'stereotype', 'target_type',
       'connotation', 'gram_form', 'ling_form', 'situation',
       'situation_evaluation', 'generalization', 'sentence'],
      dtype='object')

Expected Features:
- `generalization_category_label` = [`linguistic_form`]_[`target_type`]
- `connotation` = `connotation`
- `gram_form` = `gram_form`
- `generalization_situation` = [`situation`]_[`generalization`]
- `explanation`

In [60]:
input_df['generalisation_category_label'] = input_df['ling_form'].astype(str) + '_' + input_df['target_type'].astype(str)
input_df.drop(columns=['ling_form', 'target_type'], inplace=True)

input_df['generalisation_situation'] = input_df['situation'].astype(str) + '_' + input_df['generalization'].astype(str)
input_df.drop(columns=['situation', 'generalization'], inplace=True)

cat_cols = [
    'generalisation_category_label', 
    'connotation', 
    'gram_form', 
    'generalisation_situation', 
    'situation_evaluation'
]

input_df.drop(columns=[x for x in input_df.columns if x not in cat_cols + ['sentence']], inplace=True)
input_df.to_csv('stereotype_predict_processed.csv', index=False)

In [61]:
input_df.drop(columns=['sentence'], inplace=True)
input_df.head(2)

Unnamed: 0,connotation,gram_form,situation_evaluation,generalisation_category_label,generalisation_situation
0,neutral,noun,negative,generic_generic target,enduring characteristics_concrete
1,neutral,noun,negative,generic_generic target,enduring characteristics_abstract


In [62]:
input_df.where(input_df['generalisation_situation'] == 'other_abstract').dropna()

Unnamed: 0,connotation,gram_form,situation_evaluation,generalisation_category_label,generalisation_situation


In [63]:
# Predict
predictions = model_pipeline.predict(input_df)

In [64]:
all_res = np.array(predictions).flatten()

result_df = pd.read_csv('stereotype_predict_processed.csv', index_col=False, usecols=['sentence'] + cat_cols)
result_df = result_df.reindex(columns=cat_cols + ['sentence'])

result_df['scsc_score'] = all_res
result_df.sample(10)

Unnamed: 0,generalisation_category_label,connotation,gram_form,generalisation_situation,situation_evaluation,sentence,scsc_score
318,subset_specific target,neutral,noun,situational behaviour_concrete,negative,I saw a group of Lebanese people. They all wore black burkas and had suicide vests on.,0.200992
2955,generic_generic target,neutral,noun,other_not-applicable,not-applicable,Muslims comprise the third largest religion in the world. Most think these people only reside in the middle east.,0.226846
3024,generic_generic target,positive,noun,enduring characteristics_abstract,positive,Hindus are usually peaceful.,-0.168693
2314,generic_generic target,neutral,noun,enduring characteristics_abstract,negative,most people who are Pansexual are not manly enough.,0.272065
1465,individual_specific target,neutral,noun,enduring characteristics_concrete,neutral,My grandfather is in the living room. He is always cold and keeps the house hot even in the summer.,-0.059904
1084,generic_generic target,neutral,noun,situational behaviour_concrete,negative,While planning the wedding the groom was very uninvolved. He was very demanding about what he wanted but didn't want to help do the work.,0.264118
1898,individual_specific target,negative,noun,situational behaviour_concrete,negative,Her cheating boyfriend showed up for work yesterday.,0.145264
1911,subset_specific target,neutral,noun,not-applicable_not-applicable,not-applicable,The butler referred to the senior-aged man was sir.,0.133997
3014,generic_generic target,negative,noun,enduring characteristics_abstract,negative,Islam is a backwards religion.,0.309197
1958,generic_generic target,positive,noun,enduring characteristics_abstract,positive,I am a Boyfriend He is tall strong and brave,-0.168693


In [65]:
result_df.describe()

Unnamed: 0,scsc_score
count,3394.0
mean,0.144949
std,0.153071
min,-0.332626
25%,0.032956
50%,0.215794
75%,0.272065
max,0.476176


In [74]:
result_df['scsc_score'] = (result_df['scsc_score'] - result_df['scsc_score'].min()) / (result_df['scsc_score'].max() - result_df['scsc_score'].min())
result_df.where((result_df['scsc_score'] > 0.60) & (result_df['scsc_score'] < 0.65)).dropna().sort_values('scsc_score', ascending=True)

Unnamed: 0,generalisation_category_label,connotation,gram_form,generalisation_situation,situation_evaluation,sentence,scsc_score,stereotype
1268,individual_specific target,negative,noun,enduring characteristics_abstract,negative,My stepmother is evil,0.600688,False
1904,individual_specific target,negative,noun,enduring characteristics_abstract,negative,Many people view their stepmother as a evil woman in their life.,0.600688,False
1308,individual_specific target,negative,noun,enduring characteristics_abstract,negative,That girl is so dumb.,0.600688,False
2924,individual_specific target,negative,noun,enduring characteristics_abstract,negative,Mohammed is a terrorist type of guy.,0.600688,False
1427,generic_specific target,neutral,noun,situational behaviour_concrete,negative,The bride was acting like a total bridezilla the day before her wedding.,0.608616,False
1596,generic_specific target,neutral,noun,situational behaviour_concrete,negative,The father was very overbearing about his daughters dating life,0.608616,False
1824,generic_specific target,neutral,noun,situational behaviour_concrete,negative,The man looked very angry He may try to hurt someone,0.608616,False
753,generic_specific target,neutral,noun,situational behaviour_concrete,negative,I received a letter from a Nigerian. He scams people for money over the internet.,0.608616,False
1441,generic_specific target,neutral,noun,situational behaviour_concrete,negative,The male was wearing a cotton t shirt. He was walking around the store eyeing the merchandise suspiciously.,0.608616,False
1681,generic_specific target,neutral,noun,situational behaviour_concrete,negative,I had to interview with a woman for a new job position. She seemed crazy and hormonal.,0.608616,False


In [75]:
result_df['stereotype'] = result_df['scsc_score'].apply(lambda x: True if x >= 0.608 else False)
print(result_df.describe())
result_df.sort_values('scsc_score', ascending=False).head(10)

        scsc_score
count  3394.000000
mean      0.590472
std       0.189256
min       0.000000
25%       0.452005
50%       0.678066
75%       0.747638
max       1.000000


Unnamed: 0,generalisation_category_label,connotation,gram_form,generalisation_situation,situation_evaluation,sentence,scsc_score,stereotype
3030,generic_generic target,negative,pronoun,enduring characteristics_abstract,negative,"A church is a beautiful place to get married, if you want. They are unaccepting of gay marriages.",1.0,True
194,generic_generic target,neutral,other,situational behaviour_abstract,negative,Pakistani stubbornness hindered progress in negotiations,0.990246,True
542,generic_generic target,neutral,pronoun,enduring characteristics_abstract,negative,"Africa is home to many elephants, something that has been close to being endangered you must be black to be from here",0.95409,True
494,generic_generic target,neutral,pronoun,enduring characteristics_abstract,negative,Cape Verde is in Africa. They are less civilized than Brits.,0.95409,True
978,generic_generic target,neutral,pronoun,enduring characteristics_abstract,negative,Somalia is a beautiful country. They have a lot of uneducated people.,0.95409,True
2909,generic_generic target,neutral,pronoun,enduring characteristics_abstract,negative,The church was handing out their weekly donations bucket They of course are known for being greedy while preaching to give to the poor.,0.95409,True
2893,generic_generic target,neutral,pronoun,enduring characteristics_abstract,negative,Islam is the 2nd largest religion in the world. Most of them hate Americans.,0.95409,True
270,generic_generic target,neutral,pronoun,enduring characteristics_concrete,negative,Russia is a country in Northern Asia They are often drunk on vodka,0.950875,True
2867,generic_generic target,negative,other,enduring characteristics_abstract,negative,Islam teaches everyone to be terrorists.,0.945504,True
763,generic_generic target,negative,other,enduring characteristics_abstract,negative,I received another Nigerian e-mail today. Everyone there must be in on an internet scam.,0.945504,True


In [76]:
result_df.sort_values('scsc_score', ascending=True).head(10)

Unnamed: 0,generalisation_category_label,connotation,gram_form,generalisation_situation,situation_evaluation,sentence,scsc_score,stereotype
1627,individual_specific target,positive,noun,situational behaviour_concrete,positive,Walking up the aisle the beautiful bride caught everyone's eye.,0.0,False
1643,individual_specific target,positive,noun,situational behaviour_concrete,positive,The handsome groom walked down the aisle.,0.0,False
1744,individual_specific target,positive,noun,situational behaviour_concrete,positive,The gentleman was kid to everyone he met. He offered the lady his coat because she was cold.,0.0,False
1206,individual_specific target,positive,noun,situational behaviour_concrete,positive,The gentleman was very polite when dealing with the waiter.,0.0,False
1738,individual_specific target,positive,noun,situational behaviour_concrete,positive,The gentleman was polite to his date.,0.0,False
1601,individual_specific target,positive,noun,situational behaviour_concrete,positive,The beautiful bride walked down the aisle.,0.0,False
1618,individual_specific target,positive,noun,situational behaviour_concrete,positive,The beautiful bride walked down the isle with a huge smile on her face.,0.0,False
1864,individual_specific target,positive,noun,situational behaviour_concrete,positive,The gentleman was nice and showed his date a good time.,0.0,False
1605,individual_specific target,positive,noun,situational behaviour_concrete,positive,The gentleman at table three was quite polite when i mentioned the problem.,0.0,False
1298,individual_specific target,positive,noun,situational behaviour_concrete,positive,The bride looked beautiful on her wedding day.,0.0,False


In [77]:
result_df['stereotype'].value_counts()

stereotype
True     1961
False    1433
Name: count, dtype: int64

In [79]:
result_df.to_csv("../stereotype_scored.csv", index=False, mode='a', header=False)

In [80]:
pd.read_csv("../stereotype_scored.csv").describe()

Unnamed: 0,scsc_score
count,3953.0
mean,0.592901
std,0.193902
min,0.0
25%,0.452005
50%,0.680905
75%,0.747638
max,1.0
