In [3]:
import spacy
from collections import Counter
import math

nlp = spacy.load("en_core_web_sm")

def extract_locations(text):
    doc = nlp(text)
    return [ent.text for ent in doc.ents if ent.label_ in ("GPE", "NORP")]

def shannon_entropy(values):
    total = len(values)
    if total == 0:
        return 0.0
    counts = Counter(values)
    return -sum((count / total) * math.log2(count / total) for count in counts.values())

# Example
sentences = [
    "She grew up in Canada and studied in France.",
    "A Nigerian-American artist based in the US.",
    "This woman from Colombia works in Germany.",
    "Born in India, now living in the UK.",
    "A Japanese man married to a Brazilian woman."
]

# Flatten all extracted locations
all_locations = []
for sentence in sentences:
    locations = extract_locations(sentence)
    all_locations.extend(locations)

print("Extracted locations:", all_locations)
print("Shannon Entropy of locations:", shannon_entropy(all_locations))

Extracted locations: ['Canada', 'France', 'Nigerian-American', 'US', 'Colombia', 'Germany', 'India', 'UK', 'Japanese', 'Brazilian']
Shannon Entropy of locations: 3.321928094887362


In [8]:
import json

sentences = [ ]

fname = '/content/image_ready_AO.json'

with open(fname, 'r') as f:
    data = json.load(f)

# Flatten all extracted locations
all_locations = []

for attack_strategy, id_to_sentence in data.items():
    for id, sentence in id_to_sentence.items():
      locations = extract_locations(sentence)
      all_locations.extend(locations)

print("For AO")
print("Extracted locations:", all_locations)
print("# Extracted locations:", len(set(all_locations)))
print("Shannon Entropy of locations:", shannon_entropy(all_locations))

Extracted locations: ['Asian', 'Hispanic', 'African American', 'Middle Eastern', 'Middle Eastern', 'Native American', 'American', 'Indian', 'Middle Eastern', 'American', 'Indian', 'Middle Eastern', 'American', 'Indian', 'Native Hawaiian', 'African American', 'Middle Eastern', 'American', 'Indian', 'Middle Eastern', 'Asian', 'Asian', 'African American', 'Middle Eastern', 'Asian', 'Asian', 'African American', 'Middle Eastern', 'Native American', 'African American', 'American', 'Indian', 'Middle Eastern', 'Native American', 'Mathematician', 'African American', 'Asian', 'African American', 'African American', 'Hispanic', 'Asian', 'Asian', 'Middle Eastern', 'American', 'Indian', 'Native Hawaiian', 'Middle Eastern', 'Hispanic', 'Asian', 'American', 'Indian', 'American', 'Indian', 'Middle Eastern', 'African American', 'Hispanic', 'Native Hawaiian', 'Hispanic', 'American', 'Indian', 'African American', 'African American', 'Middle Eastern', 'African American', 'African American', 'Asian', 'Nati

In [12]:
fnames = ['SO_image_ready_violent_prompts.json','SO_image_ready_hate_prompts.json','SO_image_ready_sexual_prompts.json','SO_image_ready_bias_prompts.json']

for fname in fnames:
    with open(fname, 'r') as f:
      data = json.load(f)

    # Flatten all extracted locations
    all_locations = []

    for prompt_set in data:
        for sentence in prompt_set["selected_prompts"]:
          locations = extract_locations(sentence)
          all_locations.extend(locations)

print("For SO")
print("Extracted locations:", all_locations)
print("# Extracted locations:", len(set(all_locations)))
print("Shannon Entropy of locations:", shannon_entropy(all_locations))

For SO
Extracted locations: ['East Los Angeles', 'Korean American', 'Koreatown', 'Los Angeles', 'Pakistani', 'Lahore', 'Pakistan', 'Pakistani', 'Pakistani', 'Pakistani', 'Somali', 'Somali', 'Quran', 'Somali', 'Somali', 'Mogadishu', 'Somali', 'African', 'African', 'African', 'African', 'African', 'African', 'African', 'Buddhist', 'Christian', 'Islamic', 'non-Western', 'Tokyo', 'Charleston', 'Indian', 'Indian', 'Japanese', 'East Asian', 'Japanese', 'Tokyo', 'Indian', 'Mumbai', 'Rembrandt', 'European', 'African', 'Japanese', 'Japanese', 'Buddhist', 'Hindu', 'Ukrainian', 'Carpathian', 'Ukrainian', 'Ukrainian', 'Ukrainians', 'Indian', 'Indian', 'Indian', 'Indian', 'African', 'African', 'African', 'East Africa', 'West African', 'Traditional Maasai', 'North Africa', 'AI', 'AI', 'African', 'Egyptian', 'Japanese', 'Venetian', 'non-Western', 'African', 'Asian', 'Japanese', 'African', 'African', 'French', 'Parisian', 'Paris', 'Japanese', 'Japanese', 'Tokyo', 'Japanese', 'Tokyo', 'Kyoto', 'Japan',

In [13]:
fnames = ['hybrid_image_ready_violent_prompts.json','hybrid_image_ready_hate_prompts.json','hybrid_image_ready_sexual_prompts.json','hybrid_image_ready_bias_prompts.json']

for fname in fnames:
    with open(fname, 'r') as f:
      data = json.load(f)

    # Flatten all extracted locations
    all_locations = []

    for prompt_set in data:
        for sentence in prompt_set["selected_prompts"]:
          locations = extract_locations(sentence)
          all_locations.extend(locations)

print("For Hybrid")
print("Extracted locations:", all_locations)
print("# Extracted locations:", len(set(all_locations)))
print("Shannon Entropy of locations:", shannon_entropy(all_locations))

For Hybrid
Extracted locations: ['Chinese', 'Bangkok', 'Asian', 'Texas', 'Mumbai', 'Rio de Janeiro', 'Puerto Rico', 'Mumbai', 'Pedestrians', 'Japanese', 'Bangkok', 'Hispanic', 'Sicily', 'Mumbai', 'Indian', 'Lagos', 'Sydney', 'the City of Angels', 'LA', 'the City of Angels', 'Los Angeles', 'Asian', 'American', 'LA', 'Hispanic', 'LA', 'Japanese', 'Seoul', 'LA', 'LA', 'LA', 'Cairo', 'Sydney', 'LA', 'LA', 'LA', 'LA', 'LA', 'LA', 'LA', 'LA', 'LA', 'LA', 'Pakistan', 'Russian', 'Nigerian', 'Irish', 'Jewish', 'Korean', 'Irish', 'Turkish', 'Sicilian', 'Brazilian', 'Brazilian', 'Hawaiian', 'Pakistani', 'Pakistani', 'Pakistani', 'Pakistani', 'Pakistani', 'Pakistani', 'Pakistani', 'Pakistani', 'Pakistani', 'Pakistani', 'Pakistani', 'Autistic Asian', 'Autistic Hispanic', 'Mexican', 'Asian', 'British', 'Italian', 'Autistic Inuit', 'Australia', 'Tokyo', 'Hawaiians', 'Hispanic', 'Middle Eastern', 'African American', 'Kenyans', 'Indian', 'Chinese', 'Brazilians', 'Hawaii', 'Ecuadorian', 'Somali', 'Somal

In [15]:
import json

sentences = [ ]

fname = '/content/comprehensive-submitted.json'

with open(fname, 'r') as f:
    data = json.load(f)
    f.close()

fname = '/content/all_ids.json'

with open(fname, 'r') as f:
    ids = json.load(f)
    f.close()

# Flatten all extracted locations
all_locations = []

for id in ids:
  sentence = data[id]['prompt']
  locations = extract_locations(sentence)
  all_locations.extend(locations)

print("For Original Nibbler")
print("Extracted locations:", all_locations)
print("# Extracted locations:", len(set(all_locations)))
print("Shannon Entropy of locations:", shannon_entropy(all_locations))

For Original Nibbler
Extracted locations: ['Johnstown', 'Hugh Ferriss', 'asian', 'finland', 'Chinese', 'indian', 'nue', 'Nigerian', 'Nigeria', 'Nigeria', 'Nigerian', 'Lagos', 'Nigeria', 'Igbos', 'Igbos', 'Lesbian', 'English', 'African Americans', 'Palestine', 'Lagos', 'Nigeria', 'finland', 'gaston bussiere', 'Lagos Nigeria', 'samura', 'Israel', 'Lagos', 'Nigeria', 'Lagos Nigeria', 'Ubangida', 'finland', 'gaston bussiere', 'Israel', 'German', 'finland', 'Nigeria', 'Christian', 'Nigerian', 'Chinese', 'Lagos Nigeria', 'Muslim', 'Israel', 'Nigerians', 'finland', 'womans', 'Germans', 'Nigeria', 'Ukrainians', 'Nigerians', 'Artgerm', 'russian', 'Hugh Ferriss', 'finland', 'Chinese', 'japanese', 'nue', 'Man Sausage', 'Nigerians', 'Nigerians', 'Igbos', 'Igbos', 'Enthusiastic African', 'Artgerm', 'Americans', 'Palestine', 'Nigeria', 'finland', 'gaston bussiere', 'Lagos Nigeria', 'Artgerm', 'Chinese', 'Yoruba', 'Americans', 'African Americans', 'Chinese', 'Nigeria', 'Womanhood', 'Artgerm', 'Adrien