## Setup

In [1]:
import pandas as pd 
import numpy as np

## Analysis by Study

In [2]:
def extract_acc(df, cond, key):
  '''
  Parameters
  ----------
  df: answer dataframe or sub-dataframe
  cond: condition name, like "bee_telo_changed"
  key: expected answer key word, like "bee", "spider", or "yes"

  Returns
  ----------
  accuracy aggregated over all binary data points
  '''
  acc = []
  acc_list = []
  mae = []

  cond_subset = df[df["condition"] == cond].reset_index()
  for i in range(len(cond_subset)):
    if cond_subset['categorization_rating'][i] == key:
      acc.append(1)
    else:
      acc.append(0)

  return mean(acc)

def mean(arr):
  '''
  Manual mean function
  '''
  return sum(arr) / len(arr)

### Rose and Nichols, 2019 study 1

In [5]:
a = pd.read_csv("bloom_rose_nichols_2019_study1_answer_retrieved_curie.csv")
condition1 = ['bee_telo_changed','spider']
condition2 = ['bee_telo_preserved', 'bee']

acc = []
for cond1, cond2 in condition1, condition2:
  acc.append(extract_acc(a, cond1, cond2))

print(acc)
print(mean(acc))

[0.38, 0.48]
0.43


### Rose and Nichols, 2019 study 2

In [7]:
a = pd.read_csv("bloom_rose_nichols_2019_study2_answer_retrieved_curie.csv")
condition1 = ['bee_telo_changed','spider']
condition2 = ['bee_telo_preserved', 'bee']

acc = []
for cond1, cond2 in condition1, condition2:
  acc.append(extract_acc(a, cond1, cond2))

print(acc)
print(mean(acc))

[0.5, 0.54]
0.52


### Rose and Nichols, 2019 study 3


In [8]:
a = pd.read_csv("bloom_rose_nichols_2019_study3_answer_retrieved_curie.csv")
condition1 = ['bee_telo_changed','spider']
condition2 = ['bee_telo_preserved', 'bee']

acc = []
for cond1, cond2 in condition1, condition2:
  acc.append(extract_acc(a, cond1, cond2))

print(acc)
print(mean(acc))

[0.66, 0.58]
0.62


### Rose and Nichols, 2019 study 4

In [9]:
def extract_acc_column_version2(df, cond, key):
  '''
  Idential to extract_acc(), hard-coded for answer files
  with different response columns
  '''
  acc = []
  acc_list = []
  mae = []

  cond_subset = df[df["condition"] == cond].reset_index()
  for i in range(len(cond_subset)):
    if cond_subset['categorization_rating_question_2_1'][i] == key:
      acc.append(1)
    else:
      acc.append(0)

  return mean(acc)

In [3]:
a = pd.read_csv("rose_nichols_2019_study4_new_answer_retrieved.csv")

condition1 = ['bee_telo_changed_fertilized_by_bee','spider']
condition2 = ['bee_telo_changed_fertilized_by_spider', 'spider']
condition3 = ['bee_telo_preserved_fertilized_by_bee','bee']
condition4 = ['bee_telo_preserved_fertilized_by_spider', 'bee']

acc = []
for cond1, cond2 in condition1, condition2:
  acc.append(extract_acc(a, cond1, cond2))
  
for cond1, cond2 in condition3, condition4:
  acc.append(extract_acc(a, cond1, cond2))
print(acc)
print(mean(acc))

[0.72, 0.6, 0.04, 0.02]
0.345


### Rose and Nichols, 2020 study 1

In [14]:
a = pd.read_csv("rose_nichols_2020_study1_new_answer_retrieved.csv")

condition1 = ['hotplate_telo_changed','clock']
condition2 = ['hotplate_telo_preserved', 'hotplate']
condition3 = ['magnetite_telo_changed','coal']
condition4 = ['magnetite_telo_preserved', 'magnetite']
condition5 = ['vulture_telo_changed','hummingbird']
condition6 = ['vulture_telo_preserved', 'vulture']

acc = []
for cond1, cond2 in condition1, condition2:
  acc.append(extract_acc(a, cond1, cond2))

for cond1, cond2 in condition3, condition4:
  acc.append(extract_acc(a, cond1, cond2))
  
for cond3, cond4 in condition3, condition4:
  acc.append(extract_acc(a, cond3, cond4))

print(acc)
print(mean(acc))

[0.84, 0.76, 0.0, 1.0, 0.0, 1.0]
0.6


### Rose and Nichols, 2020 study 2

In [15]:
a = pd.read_csv("rose_nichols_2020_study2_new_answer_retrieved.csv")

condition1 = ['clock_telo_changed','hotplate']
condition2 = ['clock_telo_preserved', 'clock']
condition3 = ['coal_telo_changed','magnetite']
condition4 = ['coal_telo_preserved', 'coal']
condition5 = ['hummingbird_telo_changed','vulture']
condition6 = ['hummingbird_telo_preserved', 'hummingbird']

acc = []
for cond1, cond2 in condition1, condition2:
  acc.append(extract_acc(a, cond1, cond2))

for cond1, cond2 in condition3, condition4:
  acc.append(extract_acc(a, cond1, cond2))
  
for cond3, cond4 in condition3, condition4:
  acc.append(extract_acc(a, cond3, cond4))

print(acc)
print(mean(acc))

[0.7, 0.78, 1.0, 0.0, 1.0, 0.0]
0.58


### Rose and Nichols, 2020 study 3

In [16]:
a = pd.read_csv("rose_nichols_2020_study3_new_answer_retrieved.csv")

condition1 = ['hotplate_telo_changed_fertilized_by_clock','clock']
condition2 = ['hotplate_telo_changed_fertilized_by_hotplate', 'clock']
condition3 = ['hotplate_telo_preserved_fertilized_by_clock','hotplate']
condition4 = ['hotplate_telo_preserved_fertilized_by_hotplate', 'hotplate']

condition5 = ['magnetite_telo_changed_fertilized_by_coal','coal']
condition6 = ['magnetite_telo_changed_fertilized_by_magnetite', 'coal']
condition7 = ['magnetite_telo_preserved_fertilized_by_coal','magnetite']
condition8 = ['magnetite_telo_preserved_fertilized_by_magnetite', 'magnetite']

condition9 = ['vulture_telo_changed_fertilized_by_hummingbird','hummingbird']
condition10 = ['vulture_telo_changed_fertilized_by_vulture', 'hummingbird']
condition11 = ['vulture_telo_preserved_fertilized_by_hummingbird','vulture']
condition12 = ['vulture_telo_preserved_fertilized_by_vulture', 'vulture']

acc = []
for cond1, cond2 in condition1, condition2:
  acc.append(extract_acc(a, cond1, cond2))

for cond1, cond2 in condition3, condition4:
  acc.append(extract_acc(a, cond1, cond2))
  
for cond1, cond2 in condition5, condition6:
  acc.append(extract_acc(a, cond1, cond2))

for cond1, cond2 in condition7, condition8:
  acc.append(extract_acc(a, cond1, cond2))

for cond1, cond2 in condition9, condition10:
  acc.append(extract_acc(a, cond1, cond2))
  
for cond3, cond4 in condition11, condition12:
  acc.append(extract_acc(a, cond3, cond4))

print(acc)
print(mean(acc))

[1.0, 1.0, 0.54, 0.54, 0.0, 0.0, 1.0, 1.0, 0.72, 0.72, 0.64, 0.64]
0.6499999999999999


### Gelman and Wellman, 1991 study 2

In [17]:
# Hard-coded because results are in yes/nos

def gelman_extract(df, correct_condition):
  data = []
  for i in range(len(df)):
    if a['categorization_rating'][i] == correct_condition:
      data.append(1)
    else:
      data.append(0)
  return mean(data)

In [18]:
a = pd.read_csv("gelman_wellman_1991_study2_new_answer_retrieved.csv")

inside_removal_natural_kind = a[(a['type_category'] == "inside_removal") & (a['is_natural_kind'] == True)]
inside_removal_artifact = a[(a['type_category'] == "inside_removal") & (a['is_natural_kind'] == False)]

outside_removal_natural_kind = a[(a['type_category'] == "outside_removal") & (a['is_natural_kind'] == True)]
outside_removal_artifact = a[(a['type_category'] == "outside_removal") & (a['is_natural_kind'] == False)]

acc = []
acc.append(gelman_extract(inside_removal_natural_kind, 'no')) #nn: if remove inside, no longer category
acc.append(gelman_extract(outside_removal_natural_kind, 'yes')) #nn: if remove outside, still category
acc.append(gelman_extract(inside_removal_artifact, 'yes')) #artif: if remove inside, still category
acc.append(gelman_extract(outside_removal_artifact, 'yes')) #artif: if remove outside, still category

print(acc)
print(mean(acc))

[0.0, 1.0, 1.0, 1.0]
0.75


### Gelman and Wellman, 1991 study 3

In [19]:
a = pd.read_csv("gelman_wellman_1991_study3_new_answer_retrieved.csv")

condition1 = ['cow_raised_by_pig','curly']
condition2 = ['kangaroo_raised_by_goat', 'yes'] #answer extracted as yes/no here
condition3 = ['mouse_raised_by_dog','round']
condition4 = ['rabbit_raised_by_monkey', 'long']
condition5 = ['tiger_raised_by_horse','striped']

acc = []
for cond1, cond2 in condition1, condition2:
  acc.append(extract_acc(a, cond1, cond2))

for cond1, cond2 in condition3, condition4:
  acc.append(extract_acc(a, cond1, cond2))
  
acc.append(extract_acc(a, condition5[0], condition5[1]))

print(acc)
print(mean(acc))

[0.78, 0.54, 0.62, 0.56, 0.7]
0.64


### Gelman and Wellman, 1991 study 5

In [20]:
a = pd.read_csv("gelman_wellman_1991_study5_new_answer_retrieved.csv")

condition1 = ['appleseed_planted_in_flowerpot','apple']
condition2 = ['flowerseed_planted_with_strawberry', 'flower'] 
condition3 = ['grapeseed_planted_with_coconuttree','grape']
condition4 = ['lemonseed_planted_with_orangetree', 'lemon']
condition5 = ['peachseed_planted_with_plumtree','peach']
condition6 = ['roseseed_planted_with_dandelion','rose']
condition7 = ['watermelonseed_planted_in_cornfield','watermelon']

acc = []
for cond1, cond2 in condition1, condition2:
  acc.append(extract_acc(a, cond1, cond2))

for cond1, cond2 in condition3, condition4:
  acc.append(extract_acc(a, cond1, cond2))

for cond1, cond2 in condition5, condition6:
  acc.append(extract_acc(a, cond1, cond2))
  
acc.append(extract_acc(a, condition7[0], condition7[1]))

print(acc)
print(mean(acc))

[0.0, 0.16, 0.0, 0.0, 0.0, 0.24, 0.7]
0.15714285714285717


### Keil, 1992

In [21]:
a = pd.read_csv("keil_1992_study1_new_answer_retrieved.csv")

natural_kind = a[a['is_natural_kind'] == True]
artifact = a[a['is_natural_kind'] == False]

#nn - category stays
condition1 = ['chicken_transformed_to_turkey','chicken']
condition2 = ['grapefruit_transformed_to_orange', 'grapefruit']
condition3 = ['horse_transformed_to_zebra','horse']
condition4 = ['lead_transformed_to_gold', 'lead']
condition5 = ['raccoon_transformed_to_skunk','raccoon']
condition6 = ['sheep_transformed_to_goat', 'sheep']
condition7 = ['tiger_transformed_to_lion','tiger']

#artif - category changes
condition8 = ['bobbypin_transformed_to_needle', 'needle']
condition9 = ['card_transformed_to_toiletpaper','toiletpaper']
condition10 = ['coffeepot_transformed_to_birdfeeder', 'birdfeeder']
condition11 = ['garbagecan_transformed_to_chair','chair']
condition12 = ['kitchenpipe_transformed_to_flute', 'flute']
condition13 = ['necktie_transformed_to_shoelace', 'shoelace']
condition14 = ['plasticbottle_transformed_to_flipflop', 'flipflop']
condition15 = ['tire_transformed_to_boot', 'boot']

acc = []
for cond1, cond2 in condition1, condition2:
  acc.append(extract_acc(a, cond1, cond2))

for cond1, cond2 in condition3, condition4:
  acc.append(extract_acc(a, cond1, cond2))
  
for cond1, cond2 in condition5, condition6:
  acc.append(extract_acc(a, cond1, cond2))

for cond1, cond2 in condition7, condition8:
  acc.append(extract_acc(a, cond1, cond2))

for cond1, cond2 in condition9, condition10:
  acc.append(extract_acc(a, cond1, cond2))
  
for cond3, cond4 in condition11, condition12:
  acc.append(extract_acc(a, cond3, cond4))

for cond3, cond4 in condition13, condition14:
  acc.append(extract_acc(a, cond3, cond4))

acc.append(extract_acc(a, condition15[0], condition15[1]))

print(acc)
print(mean(acc))

[0.16, 0.26, 0.1, 0.26, 0.74, 0.54, 0.62, 0.0, 0.0, 1.0, 0.82, 1.0, 0.0, 0.0, 0.82]
0.42133333333333334


### Waxman, 2007 study 1

In [22]:
a = pd.read_csv("waxman_2007_study1_new_answer_retrieved.csv")

condition1 = ['cow_raised_by_pig','cow']
condition2 = ['pigeon_raised_by_turkey', 'pigeon']
condition3 = ['turtle_raised_by_toad','turtle']

acc = []
for cond1, cond2 in condition1, condition2:
  acc.append(extract_acc(a, cond1, cond2))

acc.append(extract_acc(a, condition3[0], condition3[1]))

print(acc)
print(mean(acc))

[0.38, 0.36, 0.46]
0.39999999999999997


### Waxman, 2007 study 2

In [23]:
a = pd.read_csv("waxman_2007_study2_new_answer_retrieved.csv")

condition1 = ['cardinal_raised_by_seagull','cardinal']
condition2 = ['cow_raised_by_pig', 'cow'] 
condition3 = ['turtle_raised_by_toad','turtle']

acc = []
for cond1, cond2 in condition1, condition2:
  acc.append(extract_acc(a, cond1, cond2))

acc.append(extract_acc(a, condition3[0], condition3[1]))

print(acc)
print(mean(acc))

[0.18, 0.38, 0.46]
0.34


### Waxman, 2007 study 3

In [24]:
a = pd.read_csv("waxman_2007_study3_new_answer_retrieved.csv")

condition1 = ['deer_raised_by_sheep','deer']
condition2 = ['deer_raised_by_sheep_blood', 'deer'] 
condition3 = ['rabbit_raised_by_raccoons','rabbit']
condition4 = ['rabbit_raised_by_raccoons_blood','rabbit']

acc = []
for cond1, cond2 in condition1, condition2:
  acc.append(extract_acc(a, cond1, cond2))

for cond1, cond2 in condition3, condition4:
  acc.append(extract_acc(a, cond1, cond2))
print(acc)
print(mean(acc))

[0.16, 0.08, 0.0, 0.0]
0.06


### Barton & K, 1989

In [25]:
a = pd.read_csv('barton_and_komatsu_1997_study1.csv')

condition1 = ['TV_changed_in_functional','no']
condition2 = ['gold_changed_in_molecule', 'no'] 
condition3 = ['mirror_changed_in_functional','no']
condition4 = ['pencil_changed_in_functional','no']
condition5 = ['record_changed_in_functional','no']
condition6 = ['tire_changed_in_functional','no']
condition7 = ['tree_changed_in_molecule','no']
condition8 = ['water_changed_in_molecule','no']

acc = []
for cond1, cond2 in condition1, condition2:
  acc.append(extract_acc(a, cond1, cond2))

for cond1, cond2 in condition3, condition4:
  acc.append(extract_acc(a, cond1, cond2))

for cond1, cond2 in condition5, condition6:
  acc.append(extract_acc(a, cond1, cond2))

for cond1, cond2 in condition7, condition8:
  acc.append(extract_acc(a, cond1, cond2))
  
print(acc)
print(mean(acc))

[0.0, 0.16, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
0.02


### Hampton 2007 - typicality

In [29]:
def extract_acc_column_version3(df, cond, key):
  '''
  Idential to extract_acc(), hard-coded for answer files
  with different response columns
  '''
  acc = []
  acc_list = []
  mae = []

  cond_subset = df[df["condition"] == cond].reset_index()
  for i in range(len(cond_subset)):
    if cond_subset['bloom_response_question_1_3'][i] == key:
      acc.append(1)
    else:
      acc.append(0)

  return mean(acc)

In [30]:
# >50% effect only present in question 1.3 (typicality with both mutation and maturation) 
# and 1.4 (categorization with just maturation)

a = pd.read_csv('hampton_2007_study3_new_question1_3.csv')

condition1 = ['horse_matured_to_zebra','zebra']
condition2 = ['horse_mutated_to_zebra', 'zebra'] 
condition3 = ['hummingbird_matured_to_bee','bee']
condition4 = ['hummingbird_mutated_to_bee','bee']
condition5 = ['snake_matured_to_lizard','lizard']
condition6 = ['snake_mutated_to_lizard','lizard']
condition7 = ['worm_matured_to_snail','snail']
condition8 = ['worm_mutated_to_snail','snail']

acc = []
for cond1, cond2 in condition1, condition2:
  acc.append(extract_acc_column_version3(a, cond1, cond2))

for cond1, cond2 in condition3, condition4:
  acc.append(extract_acc_column_version3(a, cond1, cond2))

for cond1, cond2 in condition5, condition6:
  acc.append(extract_acc_column_version3(a, cond1, cond2))

for cond1, cond2 in condition7, condition8:
  acc.append(extract_acc_column_version3(a, cond1, cond2))
  
print(acc)
print(mean(acc))

[0.94, 0.68, 0.0, 0.0, 0.9, 0.68, 0.98, 0.9]
0.635


### Hampton 2007 - categorization

In [31]:
def extract_acc_column_version4(df, cond, key):
  '''
  Idential to extract_acc(), hard-coded for answer files
  with different response columns
  '''
  acc = []
  acc_list = []
  mae = []

  cond_subset = df[df["condition"] == cond].reset_index()
  for i in range(len(cond_subset)):
    if cond_subset['bloom_response_question_1_4'][i] == key:
      acc.append(1)
    else:
      acc.append(0)

  return mean(acc)

In [33]:
a = pd.read_csv('hampton_2007_study3_new_question1_4.csv')

condition1 = ['horse_matured_to_zebra','zebra']
condition2 = ['hummingbird_matured_to_bee', 'bee'] 
condition3 = ['snake_matured_to_lizard','lizard']
condition4 = ['worm_matured_to_snail','snail']

acc = []
for cond1, cond2 in condition1, condition2:
  acc.append(extract_acc_column_version4(a, cond1, cond2))

for cond1, cond2 in condition3, condition4:
  acc.append(extract_acc_column_version4(a, cond1, cond2))

print(acc)
print(mean(acc))

[0.88, 0.0, 1.0, 0.86]
0.6849999999999999


Hampton, 2007 average

In [34]:
print(mean([0.6849999999999999, 0.635]))

0.6599999999999999
