In [1]:
# Analytical Tools
import numpy as np
import pandas as pd
import scipy
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

# General Utilities
import math
import json
import pprint
import itertools
import importlib

# Some settings
from IPython.display import Markdown, display
log = pprint.pprint
space = lambda: print()
printmd = lambda text: display(Markdown(text))
%matplotlib inline

def set_pandas_display_options() -> None:
    """Set pandas display options."""
    display = pd.options.display

    display.max_columns = 1000
    display.max_rows = 1000
    display.max_colwidth = 199
    display.width = 1000
    # display.precision = 2  # set as needed

set_pandas_display_options()

In [7]:
non_insight = ['Mary won\'t eat fish or spinach, Sally won\'t eat fish or green beans, Steve won\'t eat shrimp or potatoes, Alice won\'t eat beef or tomatoes, and Jim won\'t eat fish or tomatoes. If you are willing to host all these people for a dinner party, which items from the following list can you serve: green beans, creamed codfish, roast beef, roast chicken, celery, and lettuce?','Three spies, suspected as double agents, speak as follows when questioned: Albert: "Bertie is a mole." Bertie: "Cedric is a mole." Cedric: "Bertie is lying." Assuming that moles lie, other agents tell the truth, and there is just one mole among the three, who is the mole? Albert, Bertie, or Cedric?',
	'What day follows the day before yesterday if two days from now will be Sunday?',
	'A group of soldiers were standing facing west. Their sergeant shouted at them: Right turn! U-turn! Left turn! U-turn! Right turn! U-turn! Left turn! Right turn! In which direction are they now facing?',
	'There are three playing cards lying face up, side by side. A five is just to the right of a two. A five is just to the left of a two. A spade is just to the left of a club, and a spade is just to the right of a spade. What are the three cards?',
	'The following verse spells out a word. I am a food for you and me. My first letter is in pat but not in tar. My second letter is in stream but not in smart. My third letter is in giraffe but not in fridge. My fourth letter is in treat but not in absent. What is the word that this verse described?',
	'Next week I am going to have lunch with my friend, visit the new art gallery, go to the Social Security office, and have my teeth checked at the dentist. My friend cannot meet me on Wednesday; the Social Security office is closed weekends; the dentist has office hours only on Tuesday, Friday, and Saturday; the art gallery is closed Tuesday, Thursday, and weekends. On what single day can I do everything I have planned?',
	'Paula is trying to get in shape. She wants to do this by climbing stairs. She starts on the fourth floor, climbs up five stories, down seven, up six, down three, and up four again. What floor is she on now?',
	'Ben spent $42 for shoes. This was $14 less than twice what he spent for a shirt. The shirt was $20 less expensive than the jeans. How much was the jeans?',
	'Three people play a game in which one person loses and two people win each round. The one who loses must double the amount of money that each of the other two players has at that time. The three players agree to play three games. At the end of the three games, each player has lost one game and each person has $8. What was the original stake of each player?',
	'Solve for a and b: 3a+6b = 5, and 2a-b=1. In the answer box, provide the values of a and b.',
	'Solve for x, y, and z: x+2y-z = 13, 2x+y+z = 8, and 3x-y-2z = 1. In the answer box, provide the values of x, y, and z.',
	 'Given the numbers 100, 3, 15, 11, and 14, how can you obtain exactly 101. You can use multiplication, division, addition, and subtraction but the only constraint is that you have to use all of these numbers.',
	 'Andrew is half the age of Brian, Brian is three times older than Charles and the sum of their ages is 44 years. How old is Charles?',
	 'Calculate the age of Rob given that Rob\'s father is 45 and he is 15 years older than twice Rob\'s age.'];

insight = ['Joe Fan has no psychic powers but he can tell you the score of any football game before it starts. How?',
'A man in a small town married 20 different women of the same town. All are still living and he never divorced. Polygamy is unlawful but he has broken no law. How can this be?',
'A man bought a horse for $60 and sold it for $70. Then he bought it back for $80 and sold it for $90. How much did he make or lose in the horse trading business?',
'A man is reading a book when the lights go off but even although the room is pitch dark the man goes on reading. How?',
'Someone walked for 20 minutes on the surface of a lake without sinking but without any form of flotation aid. How?',
'A man and his son are in a serious car accident. The father is killed and the son is rushed to the emergency room. Upon arrival, the attending doctor looks at the child and screams: \'This child is my son!\' Who is the doctor?',
'Anthony and Cleopatra are lying dead on the floor in an Egyptian villa. Nearby is a broken bowl. There are no marks on their bodies and they were not poisoned. Not a person was in the villa when they died. How did they die?',
'Two sisters along with a large group of people watched as the queen attacked the king. No one said anything. Why?',
'A woman gave natural birth to two sons who were born on the same hour of the same day of the same month of the same year. But they were not twins and she had no access to a time machine. How could this be?',
'Bob\'s driver\'s license was recently revoked, following a string of severe traffic violations. Just a few days later, a cop spotted the unlicensed Bob yet again, entering a one-way street against the direction of the traffic. This was the same cop who had cited Bob before. However, the cop did not stop him, and just gave him a smile. Why?',
'Mr. Hardy slipped and fell off a sixty-foot ladder onto the concrete floor below. However, he did not injure himself in any way. How is this possible?',
'A murderer is condemned to death. He has to choose among three rooms. The first is full of raging fires, the second is full of assassins with loaded guns, and the third is full of lions that haven\'t eaten in 3 years. Which room is safest for him?',
'Alex and Casey are blood relatives of Bobbie. However, Alex and Casey are not blood relatives at all. How is this possible?',
'Sid Shady was working for a large construction company that was very concerned about employee theft. Someone tipped company security that Shady was the man to watch. Each night, he passed through security with a wheelbarrow full of scrap lumber, discarded electrical wires, and chunks of concrete. The security guards checked the contents daily, but could find nothing of value. What was Shady stealing?',
'Our basketball team won 72-49, and yet not one man scored as much as a single point. How is that possible?'];
 
anagram = ['Solve this anagram: AXPLINE', 
						'Solve this anagram: LUBMEJD', 
						'Solve this anagram: AALRYS',
						'Solve this anagram: EURADBL',
						'Solve this anagram: NAYUGHT',
						'Solve this anagram: HSOADW',
						'Solve this anagram: DONRO',
						'Solve this anagram: MARNOD',
						'Solve this anagram: HAKMOCM',
						'Solve this anagram: RDFUNE',
						'Solve this anagram: EVANG',
						'Solve this anagram: SAOSI',
						'Solve this anagram: IEGWHT',
						'Solve this anagram: AYKAWLJ',
						'Solve this anagram: NCRBOA']

solutions2 = 'explain|jumbled|salary|durable|naughty|shadow|donor|random|hammock|vegan|refund|oasis|weight|jaywalk|carbon'

In [8]:
with open('pilot1.json') as json_file:
    data = json.load(json_file)


In [9]:
questions_non_insight = []
answers_non_insight = []
confidence_non_insight = []
difficulty_non_insight = []
aha_non_insight = []

questions_insight = []
answers_insight = []
confidence_insight = []
difficulty_insight = []
aha_insight = []

questions_anagram = []
answers_anagram = []
confidence_anagram = []
difficulty_anagram = []
aha_anagram = []

for i in range(198): #go thru the subjects
    if len(data[i]['data']) > 3: #only look at subjects that finished the task (and not just started without finishing)
        if int(data[i]['data'][3]['responses'][7]) == 6: #passed attention/quiz check, hooray!
            
            for j in range(6): #go through the 6 problems shown to people
                question = data[i]['data'][0]['problems_seen'][j] #store the question seen by the participant
                answer = data[i]['data'][j*3+5]['responses'] #store people's answers to the questions here
                response = data[i]['data'][j*3+6]['responses'] #store people's responses to the questions here
                
                if question in non_insight:   
                    questions_non_insight.append(question)
                    answers_non_insight.append(answer)
                    confidence_non_insight.append(int(response[6])+1)
                    difficulty_non_insight.append(int(response[13])+1)
                    aha_non_insight.append(int(response[20])+1)
                    
                elif question in insight:
                    questions_insight.append(question)
                    answers_insight.append(answer)
                    confidence_insight.append(int(response[6])+1)
                    difficulty_insight.append(int(response[13])+1)
                    aha_insight.append(int(response[20])+1)
                
                elif question in anagram:
                    questions_anagram.append(question)
                    answers_anagram.append(answer)
                    confidence_anagram.append(int(response[6])+1)
                    difficulty_anagram.append(int(response[13])+1)
                    aha_anagram.append(int(response[20])+1)
        else:     
            print('bad subject!')

bad subject!
bad subject!
bad subject!
bad subject!


Convert data into df

In [10]:
d = {'questions':questions_non_insight,
     'answers':answers_non_insight, 
     'confidence': confidence_non_insight, 
     'difficulty': difficulty_non_insight, 
     'aha': aha_non_insight}

d2 = {'questions':questions_insight,
     'answers':answers_insight, 
     'confidence': confidence_insight, 
     'difficulty': difficulty_insight, 
     'aha': aha_insight}

d3 = {'questions':questions_anagram,
     'answers':answers_anagram, 
     'confidence': confidence_anagram, 
     'difficulty': difficulty_anagram, 
     'aha': aha_anagram}

df_non_insight = pd.DataFrame(d)
df_insight = pd.DataFrame(d2)
df_anagram = pd.DataFrame(d3)

#Analyze only those responses for which people's confidence is greater than 5
df_non_insight_filtered = df_non_insight.loc[(df_non_insight['confidence'] >= 5)]
df_insight_filtered = df_insight.loc[(df_insight['confidence'] >= 5)]
df_anagram_filtered = df_anagram.loc[(df_anagram['confidence'] >= 5)]

print(len(df_non_insight.index), len(df_non_insight_filtered.index))
print(len(df_insight.index), len(df_insight_filtered.index))
print(len(df_anagram.index), len(df_anagram_filtered.index))

170 90
170 77
170 122


In [18]:
df_anagram_filtered = df_anagram.loc[(df_anagram['answers'].str.contains(solutions2, case = False)==True)]
df3 = df_anagram.groupby(['questions']).agg(['mean', 'count'])
df4 = df_anagram_filtered.groupby(['questions']).agg(['mean', 'count'])
print('% Solved')
print(df4['difficulty']['count']/df3['difficulty']['count'])
print('Counts')
print(df4['difficulty']['count'])
print('Mean difficulty')
print(df4['difficulty']['mean'])

% Solved
questions
Solve this anagram: AALRYS     0.800000
Solve this anagram: AXPLINE    0.538462
Solve this anagram: AYKAWLJ    0.545455
Solve this anagram: DONRO      0.750000
Solve this anagram: EURADBL    0.666667
Solve this anagram: EVANG      0.444444
Solve this anagram: HAKMOCM    0.866667
Solve this anagram: HSOADW     0.818182
Solve this anagram: IEGWHT     0.769231
Solve this anagram: LUBMEJD    0.818182
Solve this anagram: MARNOD     0.857143
Solve this anagram: NAYUGHT    1.000000
Solve this anagram: NCRBOA     0.555556
Solve this anagram: RDFUNE     0.714286
Solve this anagram: SAOSI      0.700000
Name: count, dtype: float64
Counts
questions
Solve this anagram: AALRYS      8
Solve this anagram: AXPLINE     7
Solve this anagram: AYKAWLJ     6
Solve this anagram: DONRO       9
Solve this anagram: EURADBL     6
Solve this anagram: EVANG       8
Solve this anagram: HAKMOCM    13
Solve this anagram: HSOADW      9
Solve this anagram: IEGWHT     10
Solve this anagram: LUBMEJD   

In [6]:
df1 = df_non_insight.groupby(['questions', 'answers'], as_index=False).agg(['mean', 'count'])
df2 = df_insight.groupby(['questions', 'answers']).agg(['mean', 'count'])
df3 = df_anagram.groupby(['questions', 'answers']).agg(['mean', 'count'])

In [20]:
df1 = df_non_insight_filtered.groupby(['questions'], as_index=False).agg(['mean', 'count'])
df2 = df_insight_filtered.groupby(['questions'], as_index=False).agg(['mean', 'count'])
df3 = df_anagram_filtered.groupby(['questions'], as_index=False).agg(['mean', 'count'])
df3

Unnamed: 0_level_0,confidence,confidence,difficulty,difficulty,aha,aha
Unnamed: 0_level_1,mean,count,mean,count,mean,count
questions,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Solve this anagram: AALRYS,6.875,8,2.625,8,5.125,8
Solve this anagram: AXPLINE,6.714286,7,2.285714,7,3.142857,7
Solve this anagram: AYKAWLJ,6.0,6,3.5,6,4.333333,6
Solve this anagram: DONRO,6.9,10,1.8,10,4.7,10
Solve this anagram: EURADBL,7.0,6,2.333333,6,4.666667,6
Solve this anagram: EVANG,6.625,8,2.875,8,3.875,8
Solve this anagram: HAKMOCM,6.846154,13,2.538462,13,5.076923,13
Solve this anagram: HSOADW,6.555556,9,2.444444,9,3.888889,9
Solve this anagram: IEGWHT,6.8,10,1.4,10,4.8,10
Solve this anagram: LUBMEJD,6.625,8,3.0,8,5.25,8


In [75]:
df2

Unnamed: 0_level_0,confidence,confidence,difficulty,difficulty,aha,aha
Unnamed: 0_level_1,mean,count,mean,count,mean,count
questions,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
"A man and his son are in a serious car accident. The father is killed and the son is rushed to the emergency room. Upon arrival, the attending doctor looks at the child and screams: 'This child is my son!' Who is the doctor?",6.2,5,2.8,5,4.4,5
A man bought a horse for $60 and sold it for $70. Then he bought it back for $80 and sold it for $90. How much did he make or lose in the horse trading business?,6.111111,9,2.777778,9,1.777778,9
A man in a small town married 20 different women of the same town. All are still living and he never divorced. Polygamy is unlawful but he has broken no law. How can this be?,6.666667,3,2.0,3,6.0,3
A man is reading a book when the lights go off but even although the room is pitch dark the man goes on reading. How?,6.0,5,2.8,5,3.6,5
"A murderer is condemned to death. He has to choose among three rooms. The first is full of raging fires, the second is full of assassins with loaded guns, and the third is full of lions that haven't eaten in 3 years. Which room is safest for him?",6.666667,9,2.666667,9,3.777778,9
A woman gave natural birth to two sons who were born on the same hour of the same day of the same month of the same year. But they were not twins and she had no access to a time machine. How could this be?,6.5,2,3.0,2,6.5,2
"Alex and Casey are blood relatives of Bobbie. However, Alex and Casey are not blood relatives at all. How is this possible?",6.0,3,3.0,3,4.333333,3
Joe Fan has no psychic powers but he can tell you the score of any football game before it starts. How?,6.333333,6,2.666667,6,5.333333,6
"Mr. Hardy slipped and fell off a sixty-foot ladder onto the concrete floor below. However, he did not injure himself in any way. How is this possible?",6.2,10,2.5,10,2.6,10
"Our basketball team won 72-49, and yet not one man scored as much as a single point. How is that possible?",6.6,5,1.4,5,5.2,5


In [76]:
df3

Unnamed: 0_level_0,confidence,confidence,difficulty,difficulty,aha,aha
Unnamed: 0_level_1,mean,count,mean,count,mean,count
questions,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
Solve this anagram: AALRYS,6.875,8,2.625,8,5.125,8
Solve this anagram: AXPLINE,6.714286,7,2.285714,7,3.142857,7
Solve this anagram: AYKAWLJ,6.0,6,3.5,6,4.333333,6
Solve this anagram: DONRO,6.9,10,1.8,10,4.7,10
Solve this anagram: EURADBL,7.0,6,2.333333,6,4.666667,6
Solve this anagram: EVANG,6.625,8,2.875,8,3.875,8
Solve this anagram: HAKMOCM,6.846154,13,2.538462,13,5.076923,13
Solve this anagram: HSOADW,6.555556,9,2.444444,9,3.888889,9
Solve this anagram: IEGWHT,6.8,10,1.4,10,4.8,10
Solve this anagram: LUBMEJD,6.625,8,3.0,8,5.25,8


In [66]:
np.corrcoef(df3['difficulty'], df3['aha'])

  c = cov(x, y, rowvar, dtype=dtype)
  c *= np.true_divide(1, fact)
  c *= np.true_divide(1, fact)


array([[nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan],
       [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan],
       [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan],
       [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan],
       [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan],
       [nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan, nan,
        nan, nan, nan, nan, nan, nan, nan, nan, nan,

Analyze only those responses for which people's confidence is greater than 5

In [67]:
print(df1["aha"].mean(),df2["aha"].mean(), df3["aha"].mean())
print(df1["difficulty"].mean(),df2["difficulty"].mean(), df3["difficulty"].mean())
print(df1["confidence"].mean(),df2["confidence"].mean(), df3["confidence"].mean())


mean    2.737908
dtype: float64 mean    4.225486
dtype: float64 mean    4.44368
dtype: float64
mean    2.811825
dtype: float64 mean    2.495726
dtype: float64 mean    2.603969
dtype: float64
mean    6.11589
dtype: float64 mean    6.214841
dtype: float64 mean    6.682358
dtype: float64


In [69]:
print(df_non_insight["aha"].mean(),df_insight["aha"].mean(), df_anagram["aha"].mean())
print(df_non_insight["difficulty"].mean(),df_insight["difficulty"].mean(), df_anagram["difficulty"].mean())
print(df_non_insight["confidence"].mean(),df_insight["confidence"].mean(), df_anagram["confidence"].mean())

2.1294117647058823 2.6882352941176473 3.5647058823529414
3.976470588235294 3.8529411764705883 3.4529411764705884
4.235294117647059 3.9176470588235293 5.170588235294118


In [71]:
print(df_non_insight_filtered["aha"].mean(),df_insight_filtered["aha"].mean(), df_anagram_filtered["aha"].mean())
print(df_non_insight_filtered["difficulty"].mean(),df_insight_filtered["difficulty"].mean(), df_anagram_filtered["difficulty"].mean())
print(df_non_insight_filtered["confidence"].mean(),df_insight_filtered["confidence"].mean(), df_anagram_filtered["confidence"].mean())

2.7777777777777777 3.6493506493506493 4.450819672131147
2.6333333333333333 2.4805194805194803 2.4836065573770494
6.177777777777778 6.207792207792208 6.704918032786885


Plot distribution of curiosity ratings below