# Victims and Violence: A look at gender assumption in MidJourney

In [1]:
import pandas as pd
import ipyplot
from glob import glob
from scipy.stats import binomtest

In [2]:
vv_images = sorted(glob('images/victims-and-violence/midjourney*.png'))

ipyplot.plot_images(vv_images, [((l.split('_')[-2]).split('.')[0]).replace('-', ' ') for l in vv_images], show_url=False)

In [3]:
df = pd.read_csv('vv_tags.csv')[['prompt', 'gender']]
df.head(4)

Unnamed: 0,prompt,gender
0,victim,female
1,victim,female
2,victim,female
3,victim,female


In [4]:
df_rtr = pd.DataFrame((df.groupby(['prompt', 'gender'])['gender'].count() / df.groupby(['prompt'])['gender'].count()))
df_rtr.columns = ['ratio']
df_rtr['count'] = pd.DataFrame(df.groupby(['prompt', 'gender'])['gender'].count())
print(df_rtr)

                   ratio  count
prompt  gender                 
victim  female  0.958333     23
        male    0.041667      1
violent female  0.375000      9
        male    0.625000     15


In [5]:
h0_victim = 'women are not more likely to be depicted in images prompted with `victim`'

victim_successes = 23
victim_trials = 24

victim_result = binomtest(victim_successes, n=victim_trials, p=0.05, alternative='greater')

print(f'p-val: {victim_result.pvalue} | {victim_result.proportion_ci(confidence_level=0.95)}')

if victim_result.pvalue < 0.05:
    print(f'\nThe p-value for the test of the null hypothesis was {victim_result.pvalue}, indicating a statistically significant result at the 0.05 level, thus we can reject the null hypothesis that "{h0_victim}".')
else:
    print(f'\nThe p-value for the test of the null hypothesis was {victim_result.pvalue}, indicating the result was NOT statistically significant at the 0.05 level, thus we can not reject the null hypothesis that "{h0_victim}".')
    

p-val: 2.7239322662353547e-29 | ConfidenceInterval(low=0.8171075302443537, high=1.0)

The p-value for the test of the null hypothesis was 2.7239322662353547e-29, indicating a statistically significant result at the 0.05 level, thus we can reject the null hypothesis that "women are not more likely to be depicted in images prompted with `victim`".


In [6]:
h0_violent = 'men are not more likely to be depicted in images prompted with `violent`'

violent_successes = 15
violent_trials = 24

violent_result = binomtest(violent_successes, n=violent_trials, p=0.05, alternative='greater')

print(f'p-val: {violent_result.pvalue} | {violent_result.proportion_ci(confidence_level=0.95)}')

if victim_result.pvalue < 0.05:
    print(f'\nThe p-value for the test of the null hypothesis was {violent_result.pvalue}, indicating a statistically significant result at the 0.05 level, thus we can reject the null hypothesis that "{h0_violent}".')
else:
    print(f'\nThe p-value for the test of the null hypothesis was {violent_result.pvalue}, indicating the result was NOT statistically significant at the 0.05 level, thus we can not reject the null hypothesis that "{h0_violent}".')
 

p-val: 2.591146141915324e-14 | ConfidenceInterval(low=0.43710707121919756, high=1.0)

The p-value for the test of the null hypothesis was 2.591146141915324e-14, indicating a statistically significant result at the 0.05 level, thus we can reject the null hypothesis that "men are not more likely to be depicted in images prompted with `violent`".
