In [1]:
# Analytical Tools
import numpy as np
import pandas as pd
import scipy
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

# General Utilities
import math
import json
import pprint
import itertools
import importlib

# Some settings
from IPython.display import Markdown, display
log = pprint.pprint
space = lambda: print()
printmd = lambda text: display(Markdown(text))
%matplotlib inline

def set_pandas_display_options() -> None:
    """Set pandas display options."""
    display = pd.options.display

    display.max_columns = 1000
    display.max_rows = 1000
    display.max_colwidth = 199
    display.width = 1000
    # display.precision = 2  # set as needed

set_pandas_display_options()

In [2]:
q1 = 'Three spies, suspected as double agents, speak as follows when questioned: Albert: "Bertie is a mole." Bertie: "Cedric is a mole." Cedric: "Bertie is lying." Assuming that moles lie, other agents tell the truth, and there is just one mole among the three, who is the mole? Albert, Bertie, or Cedric?'
s1 = 'Bertie|B'

q2 = 'What day follows the day before yesterday if two days from now will be Sunday?'
s2 = 'Thursday'

q3 = 'The police were convinced that either A, B, C, or D had committed a crime. Each of the suspects, in turn, made a statement.  However, only one of the four statements was true. A said, \"I didn\'t do it.\" B said, \"A is lying.\", C said, \"B is lying.\", D said, \"B did it\". Who committed the crime?'
s3 = 'B'

q4 = 'A farmer has 19 sheep on his land. One day, a big storm hits, and seven sheep run away. The next day, four sheep return and find their way home. The next day, another big storm hits, and six sheep run away. How many sheep does the farmer have left?'
s4 = '10|ten'

q5 = 'Next week I am going to have lunch with my friend, visit the new art gallery, go to the Social Security office, and have my teeth checked at the dentist. My friend cannot meet me on Wednesday; the Social Security office is closed weekends; the dentist has office hours only on Tuesday, Friday, and Saturday; the art gallery is closed Tuesday, Thursday, and weekends. On what single day can I do everything I have planned?'
s5 = 'Friday'

q6 = 'Lana has 2 bags with 2 marbles in each bag. Markus has 2 bags with 3 marbles in each bag. How many more marbles does Markus have?'
s6 = '2|two'

q7 = ' Mary won\'t eat fish or spinach, Sally won\'t eat fish or green beans, Steve won\'t eat shrimp or potatoes, Alice won\'t eat beef or tomatoes, and Jim won\'t eat fish or tomatoes. If you are willing to host them for a small party, which item from the following list can you serve: green beans, creamed codfish, tomatoes, celery, and roast beef?'
s7 = 'celery'

q8 = 'Ben spent $42 for shoes. This was $14 less than what he spent for a shirt and twice more expensive than the tie. The tie was $20 cheaper than the jeans. How much was the jeans?'
s8 = '41'

q9 ='Eric is trying to get in shape. He wants to do this by climbing stairs. He starts on the fourth floor, climbs up five stories, down seven, up six, down three, up four, and then up two again. What floor is he on now?'
s9 = '11|eleven|11th|eleventh'

q10 = 'Andrew is half the age of Brian, Brian is three times older than Charles and the sum of their ages is 44 years. How old is Charles?'
s10 = '8|eight'

q11 = 'Before Gary injured his arm, he was able to type 9 words per minute on his phone. After he injured his arm, he had to use his left arm for a while, and he could only type 6 words per minute on his phone. What is the difference between the number of words he could type in 5 minutes before and after he broke his arm?'
s11 = '15|fifteen'

q12 = 'Lauren\'s chicken laid an average of six eggs per week. Lauren sold those eggs for $3 per dozen. How much money did she collect in four weeks if she sold all her eggs?'
s12 = '6|six|$6|6 dollars'

q13 = 'Carrie grew three inches taller last year, and five inches taller this year. How many inches taller did she grow in the last two years?'
s13 = 'eight inches|8|eight|8 inches'

q14 = 'The book store is very busy today. There are 25 children listening to a story. 35 people are shopping for books. 18 people are at the checkout counter. How many people are at the bookstore?'
s14 = '78|80'

q15 = 'Given a source of unlimited water and four containers of different capacities -- 14, 20, 24, and 12 litres -- how would you obtain exactly 50 litres of water?'
s15 = '14|24|12'

q16 ='Smith is a butcher and president of the street storekeepers\' committee, which also includes the grocer, the baker, and the pharmacist. They all sit around a table. \n\n Smith sits on Jones\' left. Molly sits at the grocer\'s right. Bailey, who faces Jones, is not the pharmacist. \n\n Who is Molly? The butcher, grocer, baker, or the pharmacist?'
s16 = 'pharmacist'
q17 = 'Lebrun, Lenoir, and Leblanc are, not necessarily in that order, the accountant, warehouseman, and traveling salesman of a firm. The salesman is unmarried. Both Lebrun and Lenoir are married. Lebrun is not the accountant. What job does Lenoir have? Accountant, warehouseman, or salesman?'
s17 = 'accountant'

q18 = 'Lilah\'s band had practiced 24 songs. At a performance, they played 7 songs in their first set. In their second set, they played 8 songs. How many songs did they play for their third and final set?'
s18 = '9'

q19 = 'Ricky has a magic money box. Every day the box doubles the number of coins placed inside of it. Ricky put in 3 pennies on Monday. He looked inside his box on Friday night. How many pennies did Ricky see?'
s19 = '48'

q20 = 'Erin had 24 marbles and Evan had 3 marbles. Erin gave some of her marbles to Evan. Now Erin has exactly double the number of marbles that Evan has. How many marbles did Erin give to Evan?'
s20 = 'six|6'

q21 = 'Gary had 100 dollars. He went to the grocery store and bought three pints of ice cream for 8 dollars each. Then, he went to the farmer\'s market and bought four dozen eggs for 5 dollars per dozen. Finally, he went to a book store and bought three books for 3 dollars each. How much money does he have left?'
s21 = '47|forty seven'

q22 = 'Ben spent $40 for shoes. This was $14 more than what he spent for a shirt. The tie was $10 cheaper than the shirt. How much was the tie?'
s22 = '16|sixteen'

q23 = 'Eric is trying to get in shape. He wants to do this by climbing stairs. He starts on the third floor, climbs up five stories, down seven, up six, down three, up four, down four, up five, down three, and then up two again. What floor is he on now?'
s23 = 'eighth|8|8th|eight'

q24 = 'Andrew is twice the age of Brian, Charles is three times older than Brian, and the sum of their ages is 60 years. How old is Charles?'
s24 = '30|thirty'

q25 = 'Before Gary injured his arm, he was able to type 9 words per minute on his phone. After he injured his arm, he had to use his left arm for a while, and he could only type 6 words per minute on his phone. What is the difference between the number of words he could type in 8 minutes before and after he broke his arm?'
s25 = '24|twenty four'

q26 = 'Lauren had nine chickens. Each chicken laid an average of six eggs per week. Lauren sold those eggs for $3 per dozen. How much money did she collect in four weeks if she sold all her eggs?'
s26 = '54|fifty four'

q27 = 'Carrie grew three inches taller last year, and five inches taller this year. Lana was four inches taller than Carrie two years ago. Lana then grew five inches taller last year, and seven inches taller this year. How many inches taller is Lana compared to Carrie now?'
s27 = '8|eight'

q28 = 'Lebrun, Lenoir, Ledroi, and Leblanc are, not necessarily in that order, the accountant, cashier, warehouseman, and  salesman of a firm. The salesman has never been married. Lebrun, Lenoir, and Ledoi are married. Ledroi is the cashier. Lebrun is not the accountant. What job does Lenoir have? Accountant, cashier, warehouseman, or salesman?'
s28 = 'accountant'

q29 = 'Erin had 40 marbles, Ben had 8 marbles, and Chris had 3 marbles. Erin gave 16 marbles to Ben. Ben then gave some of his marbles to Chris. Now Ben has exactly double the number of marbles that Chris has. How many marbles did Ben give to Chris?'
s29 = 'six|6'

q30 = 'The book store is very busy today. There are 25 children listening to a story narrated by 1 person. 35 people are shopping for books. 20 people are studying. 18 people are at the checkout counter. There are 5 cashiers at the counter. How many people are at the bookstore in total?'
s30 = '104'

anagram = ['Solve this anagram: AXPLINE', 
						'Solve this anagram: LUBMEJD', 
						'Solve this anagram: AALRYS',
						'Solve this anagram: EURADBL',
						'Solve this anagram: NAYTUGH',
						'Solve this anagram: HSOAWD',
						'Solve this anagram: DONRO',
						'Solve this anagram: PMUOI',
						'Solve this anagram: HAKMOCM',
						'Solve this anagram: RDUNEF',
						'Solve this anagram: NITGA',
						'Solve this anagram: SAOSI',
						'Solve this anagram: IEGWHT',
						'Solve this anagram: AYKAWLJ',
						'Solve this anagram: NCRBOA',
						'Solve this anagram: MARNOD',
						'Solve this anagram: EGANV',
						'Solve this anagram: CRLAIGE',
						'Solve this anagram: MATCILE',
						'Solve this anagram: ERLKC',
                        'Solve this anagram: HKAMOMC', 
						'Solve this anagram: ALIPNXE', 
						'Solve this anagram: EGNAV',
						'Solve this anagram: UERADBL',
						'Solve this anagram: NCBROA',
						'Solve this anagram: DRUNEF',
						'Solve this anagram: ANYTUGH',
						'Solve this anagram: AHSDOW',
						'Solve this anagram: AYKWALJ',
						'Solve this anagram: TINAG']

solutions2 = 'explain|jumbled|salary|durable|naughty|shadow|donor|opium|hammock|refund|funder|giant|oasis|weight|jaywalk|carbon|random|vegan|glacier|climate|clerk'

In [3]:
with open('main3.json') as json_file:
    data = json.load(json_file)  


Collect data in separate structures for non-insight (to check answers) and in 1 for anagram

In [4]:
questions_q1 = []
answers_q1 = []
confidence_q1 = []
difficulty_q1 = []
aha_q1 = []
rts_q1 = []

questions_q2 = []
answers_q2 = []
confidence_q2 = []
difficulty_q2 = []
aha_q2 = []
rts_q2 = []

questions_q3 = []
answers_q3 = []
confidence_q3 = []
difficulty_q3 = []
aha_q3 = []
rts_q3 = []

questions_q4 = []
answers_q4 = []
confidence_q4 = []
difficulty_q4 = []
aha_q4 = []
rts_q4 = []

questions_q5 = []
answers_q5 = []
confidence_q5 = []
difficulty_q5 = []
aha_q5 = []
rts_q5 = []

questions_q6 = []
answers_q6 = []
confidence_q6 = []
difficulty_q6 = []
aha_q6 = []
rts_q6 = []

questions_q7 = []
answers_q7 = []
confidence_q7 = []
difficulty_q7 = []
aha_q7 = []
rts_q7 = []

questions_q8 = []
answers_q8 = []
confidence_q8 = []
difficulty_q8 = []
aha_q8 = []
rts_q8 = []

questions_q9 = []
answers_q9 = []
confidence_q9 = []
difficulty_q9 = []
aha_q9 = []
rts_q9 = []

questions_q10 = []
answers_q10 = []
confidence_q10 = []
difficulty_q10 = []
aha_q10 = []
rts_q10 = []

questions_q11 = []
answers_q11 = []
confidence_q11 = []
difficulty_q11 = []
aha_q11 = []
rts_q11 = []

questions_q12 = []
answers_q12 = []
confidence_q12 = []
difficulty_q12 = []
aha_q12 = []
rts_q12 = []

questions_q13 = []
answers_q13 = []
confidence_q13 = []
difficulty_q13 = []
aha_q13 = []
rts_q13 = []

questions_q14 = []
answers_q14 = []
confidence_q14 = []
difficulty_q14 = []
aha_q14 = []
rts_q14 = []

questions_q15 = []
answers_q15 = []
confidence_q15 = []
difficulty_q15 = []
aha_q15 = []
rts_q15 = []

questions_q16 = []
answers_q16 = []
confidence_q16 = []
difficulty_q16 = []
aha_q16 = []
rts_q16 = []

questions_q17 = []
answers_q17 = []
confidence_q17 = []
difficulty_q17 = []
aha_q17 = []
rts_q17 = []

questions_q18 = []
answers_q18 = []
confidence_q18 = []
difficulty_q18 = []
aha_q18 = []
rts_q18 = []

questions_q19 = []
answers_q19 = []
confidence_q19 = []
difficulty_q19 = []
aha_q19 = []
rts_q19 = []

questions_q20 = []
answers_q20 = []
confidence_q20 = []
difficulty_q20 = []
aha_q20 = []
rts_q20 = []

questions_q21 = []
answers_q21 = []
confidence_q21 = []
difficulty_q21 = []
aha_q21 = []
rts_q21 = []

questions_q22 = []
answers_q22 = []
confidence_q22 = []
difficulty_q22 = []
aha_q22 = []
rts_q22 = []

questions_q23 = []
answers_q23 = []
confidence_q23 = []
difficulty_q23 = []
aha_q23 = []
rts_q23 = []

questions_q24 = []
answers_q24 = []
confidence_q24 = []
difficulty_q24 = []
aha_q24 = []
rts_q24 = []

questions_q25 = []
answers_q25 = []
confidence_q25 = []
difficulty_q25 = []
aha_q25 = []
rts_q25 = []

questions_q26 = []
answers_q26 = []
confidence_q26 = []
difficulty_q26 = []
aha_q26 = []
rts_q26 = []

questions_q27 = []
answers_q27 = []
confidence_q27 = []
difficulty_q27 = []
aha_q27 = []
rts_q27 = []

questions_q28 = []
answers_q28 = []
confidence_q28 = []
difficulty_q28 = []
aha_q28 = []
rts_q28 = []

questions_q29 = []
answers_q29 = []
confidence_q29 = []
difficulty_q29 = []
aha_q29 = []
rts_q29 = []

questions_q30 = []
answers_q30 = []
confidence_q30 = []
difficulty_q30 = []
aha_q30 = []
rts_q30 = []

questions_anagram = []
answers_anagram = []
confidence_anagram = []
difficulty_anagram = []
aha_anagram = []
rts_anagram = []

num_subs = 0

for i in range(len(data)): #go thru the subjects
    if len(data[i]['data']) > 3: #only look at subjects that finished the task 
        num_subs = num_subs + 1
        if 1 == 1: #int(data[i]['data'][3]['responses'][7]) == 4: #passed attention/quiz check, hooray!
            
            for j in range(4): #go through the 4 problems shown to people
                question = data[i]['data'][0]['problems_seen'][j] #store the question seen by the participant
                answer = data[i]['data'][j*3+5]['responses'] #store people's answers to the questions here
                rt = data[i]['data'][j*3+5]['rt']/1000 #store people's RT for the answer provided
                response = data[i]['data'][j*3+6]['responses'] #store people's responses to the questions here
                
                if rt > 500:
                    print('bad')     
                    
                elif question in q1:   
                    questions_q1.append(question)
                    answers_q1.append(answer)
                    rts_q1.append(rt)
                    confidence_q1.append(int(response[6])+1)
                    difficulty_q1.append(int(response[13])+1)
                    aha_q1.append(int(response[20])+1)
                    
                elif question in q2:   
                    questions_q2.append(question)
                    answers_q2.append(answer)
                    rts_q2.append(rt)
                    confidence_q2.append(int(response[6])+1)
                    difficulty_q2.append(int(response[13])+1)
                    aha_q2.append(int(response[20])+1)
                    
                elif question in q3:   
                    questions_q3.append(question)
                    answers_q3.append(answer)
                    rts_q3.append(rt)
                    confidence_q3.append(int(response[6])+1)
                    difficulty_q3.append(int(response[13])+1)
                    aha_q3.append(int(response[20])+1)
                    
                elif question in q4:   
                    questions_q4.append(question)
                    answers_q4.append(answer)
                    rts_q4.append(rt)
                    confidence_q4.append(int(response[6])+1)
                    difficulty_q4.append(int(response[13])+1)
                    aha_q4.append(int(response[20])+1)
                    
                elif question in q5:   
                    questions_q5.append(question)
                    answers_q5.append(answer)
                    rts_q5.append(rt)
                    confidence_q5.append(int(response[6])+1)
                    difficulty_q5.append(int(response[13])+1)
                    aha_q5.append(int(response[20])+1)
                    
                elif question in q6:   
                    questions_q6.append(question)
                    answers_q6.append(answer)
                    rts_q6.append(rt)
                    confidence_q6.append(int(response[6])+1)
                    difficulty_q6.append(int(response[13])+1)
                    aha_q6.append(int(response[20])+1)
                    
                elif question in q7:   
                    questions_q7.append(question)
                    answers_q7.append(answer)
                    rts_q7.append(rt)
                    confidence_q7.append(int(response[6])+1)
                    difficulty_q7.append(int(response[13])+1)
                    aha_q7.append(int(response[20])+1)
                    
                elif question in q8:   
                    questions_q8.append(question)
                    answers_q8.append(answer)
                    rts_q8.append(rt)
                    confidence_q8.append(int(response[6])+1)
                    difficulty_q8.append(int(response[13])+1)
                    aha_q8.append(int(response[20])+1)
                    
                elif question in q9:   
                    questions_q9.append(question)
                    answers_q9.append(answer)
                    rts_q9.append(rt)
                    confidence_q9.append(int(response[6])+1)
                    difficulty_q9.append(int(response[13])+1)
                    aha_q9.append(int(response[20])+1)
                    
                elif question in q10:   
                    questions_q10.append(question)
                    answers_q10.append(answer)
                    rts_q10.append(rt)
                    confidence_q10.append(int(response[6])+1)
                    difficulty_q10.append(int(response[13])+1)
                    aha_q10.append(int(response[20])+1)  
                    
                elif question in q11:   
                    questions_q11.append(question)
                    answers_q11.append(answer)
                    rts_q11.append(rt)
                    confidence_q11.append(int(response[6])+1)
                    difficulty_q11.append(int(response[13])+1)
                    aha_q11.append(int(response[20])+1)  
                    
                elif question in q12:   
                    questions_q12.append(question)
                    answers_q12.append(answer)
                    rts_q12.append(rt)
                    confidence_q12.append(int(response[6])+1)
                    difficulty_q12.append(int(response[13])+1)
                    aha_q12.append(int(response[20])+1)    
                    
                elif question in q13:   
                    questions_q13.append(question)
                    answers_q13.append(answer)
                    rts_q13.append(rt)
                    confidence_q13.append(int(response[6])+1)
                    difficulty_q13.append(int(response[13])+1)
                    aha_q13.append(int(response[20])+1)  
                      
                    
                elif question in q14:   
                    questions_q14.append(question)
                    answers_q14.append(answer)
                    rts_q14.append(rt)
                    confidence_q14.append(int(response[6])+1)
                    difficulty_q14.append(int(response[13])+1)
                    aha_q14.append(int(response[20])+1)  
                    
                elif question in q15:   
                    questions_q15.append(question)
                    answers_q15.append(answer)
                    rts_q15.append(rt)
                    confidence_q15.append(int(response[6])+1)
                    difficulty_q15.append(int(response[13])+1)
                    aha_q15.append(int(response[20])+1)  
                      
                    
                elif question in q16:   
                    questions_q16.append(question)
                    answers_q16.append(answer)
                    rts_q16.append(rt)
                    confidence_q16.append(int(response[6])+1)
                    difficulty_q16.append(int(response[13])+1)
                    aha_q16.append(int(response[20])+1)    
                    
                elif question in q17:   
                    questions_q17.append(question)
                    answers_q17.append(answer)
                    rts_q17.append(rt)
                    confidence_q17.append(int(response[6])+1)
                    difficulty_q17.append(int(response[13])+1)
                    aha_q17.append(int(response[20])+1)    
                    
                elif question in q18:   
                    questions_q18.append(question)
                    answers_q18.append(answer)
                    rts_q18.append(rt)
                    confidence_q18.append(int(response[6])+1)
                    difficulty_q18.append(int(response[13])+1)
                    aha_q18.append(int(response[20])+1)    
                    
                elif question in q19:   
                    questions_q19.append(question)
                    answers_q19.append(answer)
                    rts_q19.append(rt)
                    confidence_q19.append(int(response[6])+1)
                    difficulty_q19.append(int(response[13])+1)
                    aha_q19.append(int(response[20])+1)  
                    
                elif question in q20:   
                    questions_q20.append(question)
                    answers_q20.append(answer)
                    rts_q20.append(rt)
                    confidence_q20.append(int(response[6])+1)
                    difficulty_q20.append(int(response[13])+1)
                    aha_q20.append(int(response[20])+1)
                    
                elif question in q21:   
                    questions_q21.append(question)
                    answers_q21.append(answer)
                    rts_q21.append(rt)
                    confidence_q21.append(int(response[6])+1)
                    difficulty_q21.append(int(response[13])+1)
                    aha_q21.append(int(response[20])+1)
                    
                elif question in q22:   
                    questions_q22.append(question)
                    answers_q22.append(answer)
                    rts_q22.append(rt)
                    confidence_q22.append(int(response[6])+1)
                    difficulty_q22.append(int(response[13])+1)
                    aha_q22.append(int(response[20])+1)
                    
                elif question in q23:   
                    questions_q23.append(question)
                    answers_q23.append(answer)
                    rts_q23.append(rt)
                    confidence_q23.append(int(response[6])+1)
                    difficulty_q23.append(int(response[13])+1)
                    aha_q23.append(int(response[20])+1)
                    
                elif question in q24:   
                    questions_q24.append(question)
                    answers_q24.append(answer)
                    rts_q24.append(rt)
                    confidence_q24.append(int(response[6])+1)
                    difficulty_q24.append(int(response[13])+1)
                    aha_q24.append(int(response[20])+1)
                    
                elif question in q25:   
                    questions_q25.append(question)
                    answers_q25.append(answer)
                    rts_q25.append(rt)
                    confidence_q25.append(int(response[6])+1)
                    difficulty_q25.append(int(response[13])+1)
                    aha_q25.append(int(response[20])+1)
                    
                elif question in q26:   
                    questions_q26.append(question)
                    answers_q26.append(answer)
                    rts_q26.append(rt)
                    confidence_q26.append(int(response[6])+1)
                    difficulty_q26.append(int(response[13])+1)
                    aha_q26.append(int(response[20])+1)
                    
                elif question in q27:   
                    questions_q27.append(question)
                    answers_q27.append(answer)
                    rts_q27.append(rt)
                    confidence_q27.append(int(response[6])+1)
                    difficulty_q27.append(int(response[13])+1)
                    aha_q27.append(int(response[20])+1)
                    
                elif question in q28:   
                    questions_q28.append(question)
                    answers_q28.append(answer)
                    rts_q28.append(rt)
                    confidence_q28.append(int(response[6])+1)
                    difficulty_q28.append(int(response[13])+1)
                    aha_q28.append(int(response[20])+1)
                    
                elif question in q29:   
                    questions_q29.append(question)
                    answers_q29.append(answer)
                    rts_q29.append(rt)
                    confidence_q29.append(int(response[6])+1)
                    difficulty_q29.append(int(response[13])+1)
                    aha_q29.append(int(response[20])+1)
                    
                elif question in q30:   
                    questions_q30.append(question)
                    answers_q30.append(answer)
                    rts_q30.append(rt)
                    confidence_q30.append(int(response[6])+1)
                    difficulty_q30.append(int(response[13])+1)
                    aha_q30.append(int(response[20])+1)
                                        
                elif question in anagram:
                    questions_anagram.append(question)
                    answers_anagram.append(answer)
                    rts_anagram.append(rt)
                    confidence_anagram.append(int(response[6])+1)
                    difficulty_anagram.append(int(response[13])+1)
                    aha_anagram.append(int(response[20])+1)
        else:     
            print('bad subject!')

print('we recruited ', num_subs, ' subjects')

bad
bad
bad
bad
bad
bad
bad
bad
bad
bad
bad
bad
bad
bad
bad
we recruited  850  subjects


Convert data into df and do a filtering based on correct responses

In [5]:
d1 = {'questions':questions_q1,
     'answers':answers_q1, 
     'rts': rts_q1,
     'confidence': confidence_q1, 
     'difficulty': difficulty_q1, 
     'aha': aha_q1}
d2 = {'questions':questions_q2,
     'answers':answers_q2, 
     'rts': rts_q2,
     'confidence': confidence_q2, 
     'difficulty': difficulty_q2, 
     'aha': aha_q2}
d3 = {'questions':questions_q3,
     'answers':answers_q3, 
     'rts': rts_q3,
     'confidence': confidence_q3, 
     'difficulty': difficulty_q3, 
     'aha': aha_q3}
d4 = {'questions':questions_q4,
     'answers':answers_q4, 
     'rts': rts_q4,
     'confidence': confidence_q4, 
     'difficulty': difficulty_q4, 
     'aha': aha_q4}
d5 = {'questions':questions_q5,
     'answers':answers_q5, 
     'rts': rts_q5,
     'confidence': confidence_q5, 
     'difficulty': difficulty_q5, 
     'aha': aha_q5}
d6 = {'questions':questions_q6,
     'answers':answers_q6, 
     'rts': rts_q6,
     'confidence': confidence_q6, 
     'difficulty': difficulty_q6, 
     'aha': aha_q6}
d7 = {'questions':questions_q7,
     'answers':answers_q7, 
     'rts': rts_q7,
     'confidence': confidence_q7, 
     'difficulty': difficulty_q7, 
     'aha': aha_q7}
d8 = {'questions':questions_q8,
     'answers':answers_q8, 
     'rts': rts_q8,
     'confidence': confidence_q8, 
     'difficulty': difficulty_q8, 
     'aha': aha_q8}
d9 = {'questions':questions_q9,
     'answers':answers_q9, 
     'rts': rts_q9,
     'confidence': confidence_q9, 
     'difficulty': difficulty_q9, 
     'aha': aha_q9}
d10 = {'questions':questions_q10,
     'answers':answers_q10, 
     'rts': rts_q10,
     'confidence': confidence_q10, 
     'difficulty': difficulty_q10, 
     'aha': aha_q10}
d11 = {'questions':questions_q11,
     'answers':answers_q11, 
     'rts': rts_q11,
     'confidence': confidence_q11, 
     'difficulty': difficulty_q11, 
     'aha': aha_q11}
d12 = {'questions':questions_q12,
     'answers':answers_q12, 
     'rts': rts_q12,
     'confidence': confidence_q12, 
     'difficulty': difficulty_q12, 
     'aha': aha_q12}
d13 = {'questions':questions_q13,
     'answers':answers_q13, 
     'rts': rts_q13,
     'confidence': confidence_q13, 
     'difficulty': difficulty_q13, 
     'aha': aha_q13}
d14 = {'questions':questions_q14,
     'answers':answers_q14, 
     'rts': rts_q14,
     'confidence': confidence_q14, 
     'difficulty': difficulty_q14, 
     'aha': aha_q14}
d15 = {'questions':questions_q15,
     'answers':answers_q15, 
     'rts': rts_q15,
     'confidence': confidence_q15, 
     'difficulty': difficulty_q15, 
     'aha': aha_q15}
d16 = {'questions':questions_q16,
     'answers':answers_q16, 
     'rts': rts_q16,
     'confidence': confidence_q16, 
     'difficulty': difficulty_q16, 
     'aha': aha_q16}
d17 = {'questions':questions_q17,
     'answers':answers_q17, 
     'rts': rts_q17,
     'confidence': confidence_q17, 
     'difficulty': difficulty_q17, 
     'aha': aha_q17}
d18 = {'questions':questions_q18,
     'answers':answers_q18, 
     'rts': rts_q18,
     'confidence': confidence_q18, 
     'difficulty': difficulty_q18, 
     'aha': aha_q18}
d19 = {'questions':questions_q19,
     'answers':answers_q19, 
     'rts': rts_q19,
     'confidence': confidence_q19, 
     'difficulty': difficulty_q19, 
     'aha': aha_q19}
d20 = {'questions':questions_q20,
     'answers':answers_q20, 
     'rts': rts_q20,
     'confidence': confidence_q20, 
     'difficulty': difficulty_q20, 
     'aha': aha_q20}
d21 = {'questions':questions_q21,
     'answers':answers_q21, 
     'rts': rts_q21,
     'confidence': confidence_q21, 
     'difficulty': difficulty_q21, 
     'aha': aha_q21}
d22 = {'questions':questions_q22,
     'answers':answers_q22, 
     'rts': rts_q22,
     'confidence': confidence_q22, 
     'difficulty': difficulty_q22, 
     'aha': aha_q22}
d23 = {'questions':questions_q23,
     'answers':answers_q23, 
     'rts': rts_q23,
     'confidence': confidence_q23, 
     'difficulty': difficulty_q23, 
     'aha': aha_q23}
d24 = {'questions':questions_q24,
     'answers':answers_q24, 
     'rts': rts_q24,
     'confidence': confidence_q24, 
     'difficulty': difficulty_q24, 
     'aha': aha_q24}
d25 = {'questions':questions_q25,
     'answers':answers_q25, 
     'rts': rts_q25,
     'confidence': confidence_q25, 
     'difficulty': difficulty_q25, 
     'aha': aha_q25}
d26 = {'questions':questions_q26,
     'answers':answers_q26, 
     'rts': rts_q26,
     'confidence': confidence_q26, 
     'difficulty': difficulty_q26, 
     'aha': aha_q26}
d27 = {'questions':questions_q27,
     'answers':answers_q27, 
     'rts': rts_q27,
     'confidence': confidence_q27, 
     'difficulty': difficulty_q27, 
     'aha': aha_q27}
d28 = {'questions':questions_q28,
     'answers':answers_q28, 
     'rts': rts_q28,
     'confidence': confidence_q28, 
     'difficulty': difficulty_q28, 
     'aha': aha_q28}
d29 = {'questions':questions_q29,
     'answers':answers_q29, 
     'rts': rts_q29,
     'confidence': confidence_q29, 
     'difficulty': difficulty_q29, 
     'aha': aha_q29}
d30 = {'questions':questions_q30,
     'answers':answers_q30, 
     'rts': rts_q30,
     'confidence': confidence_q30, 
     'difficulty': difficulty_q30, 
     'aha': aha_q30}

d51 = {'questions':questions_anagram,
     'answers':answers_anagram, 
     'rts': rts_anagram,
     'confidence': confidence_anagram, 
     'difficulty': difficulty_anagram, 
     'aha': aha_anagram}

df1 = pd.DataFrame(d1)
df2 = pd.DataFrame(d2)
df3 = pd.DataFrame(d3)
df4 = pd.DataFrame(d4)
df5 = pd.DataFrame(d5)
df6 = pd.DataFrame(d6)
df7 = pd.DataFrame(d7)
df8 = pd.DataFrame(d8)
df9 = pd.DataFrame(d9)
df10 = pd.DataFrame(d10)
df11 = pd.DataFrame(d11)
df12 = pd.DataFrame(d12)
df13 = pd.DataFrame(d13)
df14 = pd.DataFrame(d14)
df15 = pd.DataFrame(d15)
df16 = pd.DataFrame(d16)
df17 = pd.DataFrame(d17)
df18 = pd.DataFrame(d18)
df19 = pd.DataFrame(d19)
df20 = pd.DataFrame(d20)
df21 = pd.DataFrame(d21)
df22 = pd.DataFrame(d22)
df23 = pd.DataFrame(d23)
df24 = pd.DataFrame(d24)
df25 = pd.DataFrame(d25)
df26 = pd.DataFrame(d26)
df27 = pd.DataFrame(d27)
df28 = pd.DataFrame(d28)
df29 = pd.DataFrame(d29)
df30 = pd.DataFrame(d30)
df_anagram = pd.DataFrame(d51)

df1_filtered = df1.loc[(df1['answers'].str.contains(s1, case = False)==True)]
df2_filtered = df2.loc[(df2['answers'].str.contains(s2, case = False)==True)]
df3_filtered = df3.loc[(df3['answers'].str.contains(s3, case = False)==True)]
df4_filtered = df4.loc[(df4['answers'].str.contains(s4, case = False)==True)]
df5_filtered = df5.loc[(df5['answers'].str.contains(s5, case = False)==True)]
df6_filtered = df6.loc[(df6['answers'].str.contains(s6, case = False)==True)]
df7_filtered = df7.loc[(df7['answers'].str.contains(s7, case = False)==True)]
df8_filtered = df8.loc[(df8['answers'].str.contains(s8, case = False)==True)]
df9_filtered = df9.loc[(df9['answers'].str.contains(s9, case = False)==True)]
df10_filtered = df10.loc[(df10['answers'].str.contains(s10, case = False)==True)]
df11_filtered = df11.loc[(df11['answers'].str.contains(s11, case = False)==True)]
df12_filtered = df12.loc[(df12['answers'].str.contains(s12, case = False)==True)]
df13_filtered = df13.loc[(df13['answers'].str.contains(s13, case = False)==True)]
df14_filtered = df14.loc[(df14['answers'].str.contains(s14, case = False)==True)]
df15_filtered = df15.loc[(df15['answers'].str.contains(s15, case = False)==True)]
df16_filtered = df16.loc[(df16['answers'].str.contains(s16, case = False)==True)]
df17_filtered = df17.loc[(df17['answers'].str.contains(s17, case = False)==True)]
df18_filtered = df18.loc[(df18['answers'].str.contains(s18, case = False)==True)]
df19_filtered = df19.loc[(df19['answers'].str.contains(s19, case = False)==True)]
df20_filtered = df20.loc[(df20['answers'].str.contains(s20, case = False)==True)]
df21_filtered = df21.loc[(df21['answers'].str.contains(s21, case = False)==True)]
df22_filtered = df22.loc[(df22['answers'].str.contains(s22, case = False)==True)]
df23_filtered = df23.loc[(df23['answers'].str.contains(s23, case = False)==True)]
df24_filtered = df24.loc[(df24['answers'].str.contains(s24, case = False)==True)]
df25_filtered = df25.loc[(df25['answers'].str.contains(s25, case = False)==True)]
df26_filtered = df26.loc[(df26['answers'].str.contains(s26, case = False)==True)]
df27_filtered = df27.loc[(df27['answers'].str.contains(s27, case = False)==True)]
df28_filtered = df28.loc[(df28['answers'].str.contains(s28, case = False)==True)]
df29_filtered = df29.loc[(df29['answers'].str.contains(s29, case = False)==True)]
df30_filtered = df30.loc[(df30['answers'].str.contains(s30, case = False)==True)]
df_anagram_filtered = df_anagram.loc[(df_anagram['answers'].str.contains(solutions2, case = False)==True)]

Analyze the anagrams first

In [6]:
dft3 = df_anagram.groupby(['questions']).agg(['mean', 'std', 'count'])
dft4 = df_anagram_filtered.groupby(['questions']).agg(['mean', 'std', 'count'])

d0 = {'anagrams': dft4.index,
     'percent solved':dft4['rts']['count'].values/dft3['rts']['count'].values,
     #'solved count':dft4['rts']['count'].values, 
     #'total count': dft3['rts']['count'].values,
     'mean RT': dft4['rts']['mean'].values,
     'Aha': dft4['aha']['mean'].values,
     'mean difficulty': dft4['difficulty']['mean'].values}

dft5 = pd.DataFrame(d0)
dft5.sort_values('mean difficulty')

Unnamed: 0,anagrams,percent solved,mean RT,Aha,mean difficulty
1,Solve this anagram: AHSDOW,0.984615,11.745766,3.640625,1.5625
17,Solve this anagram: IEGWHT,0.95082,14.796379,3.482759,1.586207
12,Solve this anagram: ERLKC,0.962963,17.734442,3.75,1.692308
0,Solve this anagram: AALRYS,0.826087,26.005684,4.289474,1.947368
27,Solve this anagram: SAOSI,0.938462,21.82882,4.57377,2.180328
28,Solve this anagram: TINAG,0.77193,30.335818,3.909091,2.25
8,Solve this anagram: DONRO,0.724638,32.3981,3.84,2.32
21,Solve this anagram: NAYTUGH,0.825397,44.319481,5.0,2.461538
14,Solve this anagram: HAKMOCM,0.895833,30.881651,4.395349,2.488372
6,Solve this anagram: AYKWALJ,0.882353,22.812883,4.466667,2.5


In [34]:
df_non_insight = pd.concat([df1, df2, df3, df4, df5, df6, df7, df8, df9, df10, df11, df12, df13, df14, df15, df16, df17, df18, df19, df20, df21, df22, df23, df24, df25, df26, df27, df28, df29, df30])

df_non_insight_filtered = pd.concat([df1_filtered, df2_filtered,df3_filtered,df4_filtered, df5_filtered, df6_filtered,df7_filtered,df8_filtered,df9_filtered,df10_filtered,df11_filtered,df12_filtered,df13_filtered, df14_filtered,df15_filtered, df16_filtered,df17_filtered,df18_filtered,df19_filtered,df20_filtered, df21_filtered, df22_filtered, df23_filtered, df24_filtered, df25_filtered, df26_filtered, df27_filtered, df28_filtered, df29_filtered, df30_filtered])


dft3 = df_non_insight.groupby(['questions']).agg(['mean', 'std', 'count'])
dft4 = df_non_insight_filtered.groupby(['questions']).agg(['mean', 'std', 'count'])

d0 = {'problem': dft4.index,
      'mean RT': dft4['rts']['mean'].values, 
     'percent solved':dft4['rts']['count'].values/dft3['rts']['count'].values,
     #'solved count':dft4['rts']['count'].values, 
     #'total count': dft3['rts']['count'].values,
     
     'Aha': dft4['aha']['mean'].values,
     'difficulty': dft4['difficulty']['mean'].values}

dft5 = pd.DataFrame(d0)
dft5.sort_values('difficulty')

Unnamed: 0,problem,mean RT,percent solved,Aha,difficulty
2,"Andrew is half the age of Brian, Brian is three times older than Charles and the sum of their ages is 44 years. How old is Charles?",176.675292,0.48,4.208333,4.416667
9,"Carrie grew three inches taller last year, and five inches taller this year. Lana was four inches taller than Carrie two years ago. Lana then grew five inches taller last year, and seven inches t...",104.915182,0.532258,2.363636,3.575758
17,Lauren had nine chickens. Each chicken laid an average of six eggs per week. Lauren sold those eggs for $3 per dozen. How much money did she collect in four weeks if she sold all her eggs?,88.307659,0.554054,2.073171,3.097561
29,What day follows the day before yesterday if two days from now will be Sunday?,61.746588,0.557377,2.117647,3.823529
7,Ben spent $42 for shoes. This was $14 less than what he spent for a shirt and twice more expensive than the tie. The tie was $20 cheaper than the jeans. How much was the jeans?,74.848556,0.590164,2.916667,2.861111
24,"Smith is a butcher and president of the street storekeepers' committee, which also includes the grocer, the baker, and the pharmacist. They all sit around a table. \n\n Smith sits on Jones' left....",167.929192,0.619048,3.384615,4.769231
13,"Erin had 40 marbles, Ben had 8 marbles, and Chris had 3 marbles. Erin gave 16 marbles to Ben. Ben then gave some of his marbles to Chris. Now Ben has exactly double the number of marbles that Chr...",94.638559,0.653846,2.764706,3.5
12,Erin had 24 marbles and Evan had 3 marbles. Erin gave some of her marbles to Evan. Now Erin has exactly double the number of marbles that Evan has. How many marbles did Erin give to Evan?,82.501861,0.654545,3.083333,3.055556
27,"The police were convinced that either A, B, C, or D had committed a crime. Each of the suspects, in turn, made a statement. However, only one of the four statements was true. A said, ""I didn't d...",92.897,0.686275,2.142857,5.285714
19,"Lebrun, Lenoir, Ledroi, and Leblanc are, not necessarily in that order, the accountant, cashier, warehouseman, and salesman of a firm. The salesman has never been married. Lebrun, Lenoir, and Le...",135.625905,0.724138,2.857143,3.880952


In [8]:
non_insight_easier_rt = [np.mean(df11_filtered['rts']), np.mean(df30_filtered['rts']), np.mean(df21_filtered['rts']), np.mean(df22_filtered['rts']), np.mean(df9_filtered['rts'])] 
non_insight_easier_diff = [np.mean(df11_filtered['difficulty']), np.mean(df30_filtered['difficulty']), np.mean(df21_filtered['difficulty']), np.mean(df22_filtered['difficulty']), np.mean(df9_filtered['difficulty'])] 

non_insight_harder_rt = [np.mean(df23_filtered['rts']), np.mean(df8_filtered['rts']), np.mean(df24_filtered['rts']), np.mean(df20_filtered['rts']), np.mean(df17_filtered['rts'])] 
non_insight_harder_diff = [np.mean(df23_filtered['difficulty']), np.mean(df8_filtered['difficulty']), np.mean(df24_filtered['difficulty']), np.mean(df20_filtered['difficulty']), np.mean(df17_filtered['difficulty'])] 

anagram1_rt = [41.10223, 66.349359, 54.66748, 52.435821, 61.704657]
anagram1_diff = [3.026316, 3.282051, 3.387097, 3.461538, 3.17]

Anagram analysis

In [40]:
print(np.mean(non_insight_easier_rt), np.mean(non_insight_easier_diff), np.std(non_insight_easier_diff)) 
print(np.mean(non_insight_harder_rt), np.mean(non_insight_harder_diff), np.std(non_insight_harder_diff), len(non_insight_harder_diff)) 
print(np.mean(anagram1_rt), np.mean(anagram1_diff), np.std(anagram1_diff), len(anagram1_diff))

from scipy import stats
stats.ttest_ind(anagram1_diff, non_insight_harder_diff)

53.5685168429567 2.4671208727393394 0.15590462590213483
73.56521037585603 3.1227982162764767 0.21096849614478017 5
55.2519094 3.2654004 0.15480300365897295 5


Ttest_indResult(statistic=1.0899352327307978, pvalue=0.3074800607539428)

In [47]:
insight_harder_all = np.concatenate([df23_filtered['difficulty'].values, df8_filtered['difficulty'].values,df24_filtered['difficulty'].values, df20_filtered['difficulty'].values, df17_filtered['difficulty'].values])

df2 = np.concatenate([df23_filtered['difficulty'],df8_filtered['difficulty'],
               df24_filtered['difficulty'],df20_filtered['difficulty'],df17_filtered['difficulty'],
              df_anagram_filtered[df_anagram_filtered['questions'].str.contains('EURADBL')]['difficulty'],
              df_anagram_filtered[df_anagram_filtered['questions'].str.contains('MATCILE')]['difficulty'],
              df_anagram_filtered[df_anagram_filtered['questions'].str.contains('AYKAWLJ')]['difficulty'],
              df_anagram_filtered[df_anagram_filtered['questions'].str.contains('LUBMEJD')]['difficulty'],
              df_anagram_filtered[df_anagram_filtered['questions'].str.contains('NITGA')]['difficulty']])

print(np.mean(insight_harder_all), np.std(insight_harder_all), len(insight_harder_all), np.mean(df2), np.std(df2), len(df2))
stats.ttest_ind(insight_harder_all, df2)

3.1333333333333333 1.577621275493231 225 3.191646191646192 1.683166838731794 407


Ttest_indResult(statistic=-0.42567613913119473, pvalue=0.670489227894661)

ANOVA analysis of the questions we selected

In [38]:
import scipy.stats as stats

df2 = np.concatenate([df23_filtered['difficulty'],df8_filtered['difficulty'],
               df24_filtered['difficulty'],df20_filtered['difficulty'],df17_filtered['difficulty'],
              df_anagram_filtered[df_anagram_filtered['questions'].str.contains('EURADBL')]['difficulty'],
              df_anagram_filtered[df_anagram_filtered['questions'].str.contains('MATCILE')]['difficulty'],
              df_anagram_filtered[df_anagram_filtered['questions'].str.contains('AYKAWLJ')]['difficulty'],
              df_anagram_filtered[df_anagram_filtered['questions'].str.contains('LUBMEJD')]['difficulty'],
              df_anagram_filtered[df_anagram_filtered['questions'].str.contains('NITGA')]['difficulty']])
print(np.mean(df2), np.std(df2))

stats.f_oneway(df23_filtered['difficulty'],df8_filtered['difficulty'],
               df24_filtered['difficulty'],df20_filtered['difficulty'],df17_filtered['difficulty'],
              df_anagram_filtered[df_anagram_filtered['questions'].str.contains('EURADBL')]['difficulty'],
              df_anagram_filtered[df_anagram_filtered['questions'].str.contains('MATCILE')]['difficulty'],
              df_anagram_filtered[df_anagram_filtered['questions'].str.contains('AYKAWLJ')]['difficulty'],
              df_anagram_filtered[df_anagram_filtered['questions'].str.contains('LUBMEJD')]['difficulty'],
              df_anagram_filtered[df_anagram_filtered['questions'].str.contains('NITGA')]['difficulty'])


3.191646191646192 1.683166838731794


F_onewayResult(statistic=0.5850506695899853, pvalue=0.8095419156486302)

In [32]:
df = pd.DataFrame({'val':np.concatenate([df23_filtered['difficulty'],df8_filtered['difficulty'],
               df24_filtered['difficulty'],df20_filtered['difficulty'],df17_filtered['difficulty'],
              df_anagram_filtered[df_anagram_filtered['questions'].str.contains('EURADBL')]['difficulty'],
              df_anagram_filtered[df_anagram_filtered['questions'].str.contains('MATCILE')]['difficulty'],
              df_anagram_filtered[df_anagram_filtered['questions'].str.contains('AYKAWLJ')]['difficulty'],
              df_anagram_filtered[df_anagram_filtered['questions'].str.contains('LUBMEJD')]['difficulty'],
              df_anagram_filtered[df_anagram_filtered['questions'].str.contains('NITGA')]['difficulty']]),
             'data':np.repeat(['A','B','C', 'D', 'E', 'F', 'G', 'H', 'J', 'K'],[len(df23_filtered['difficulty']),len(df8_filtered['difficulty']),len(df24_filtered['difficulty']),len(df20_filtered['difficulty']), len(df17_filtered['difficulty']),len(         df_anagram_filtered[df_anagram_filtered['questions'].str.contains('EURADBL')]['difficulty']),len(
              df_anagram_filtered[df_anagram_filtered['questions'].str.contains('MATCILE')]['difficulty']),
                                                                                len(          df_anagram_filtered[df_anagram_filtered['questions'].str.contains('AYKAWLJ')]['difficulty']),len(
              df_anagram_filtered[df_anagram_filtered['questions'].str.contains('LUBMEJD')]['difficulty']),len(
              df_anagram_filtered[df_anagram_filtered['questions'].str.contains('NITGA')]['difficulty'])])})

import statsmodels.api as sm
from statsmodels.formula.api import ols

mod = ols('val ~ data',data=df).fit()

sm.stats.anova_lm(mod, typ=1) 

Unnamed: 0,df,sum_sq,mean_sq,F,PR(>F)
data,9.0,15.092876,1.676986,0.585051,0.809542
Residual,397.0,1137.958721,2.866395,,


Analyze only those responses for which people's confidence is greater than 5

Plot distribution of curiosity ratings below