In [107]:
# Analytical Tools
import numpy as np
import pandas as pd
import scipy
import matplotlib.pyplot as plt
import seaborn as sns
from scipy import stats

# General Utilities
import math
import json
import pprint
import itertools
import importlib

# Some settings
from IPython.display import Markdown, display
log = pprint.pprint
space = lambda: print()
printmd = lambda text: display(Markdown(text))
%matplotlib inline

def set_pandas_display_options() -> None:
    """Set pandas display options."""
    display = pd.options.display

    display.max_columns = 1000
    display.max_rows = 1000
    display.max_colwidth = 199
    display.width = 1000
    # display.precision = 2  # set as needed

set_pandas_display_options()

In [108]:
anagram = ['YUOEJRN', 
						'RODLLA', 
						'HSOADW',
						'ECELT',
						'RETLET',
						'LYLE',]

solutions = 'journey|dollar|shadow|elect|letter|yell|shad|let'

In [109]:
with open('pilot1.json') as json_file:
    data = json.load(json_file) 

In [131]:
data[266]['data'][12]

{'rt': 3567,
 'stimulus': "<center><p>Now the <b>black</b> anagram will be shown to you.</p> <p>If you are not able to solve the anagram in <b>1 minute</b>, you will be taken to the next screen.</p> <p>Press 'v' to view the anagram. </p></center>",
 'key_press': 86,
 'trial_type': 'html-keyboard-response',
 'trial_index': 12,
 'time_elapsed': 218546,
 'internal_node_id': '0.0-3.1-0.1',
 'responseType': 'finish',
 'participantID': 'srpzsfwo4h1wao66pw1j9vh107r87pub',
 'black_problems': ['ECELT'],
 'blue_problems': ['YUOEJRN']}

Collect data in separate structures for non-insight (to check answers) and in 1 for anagram

In [115]:
black_anagram = []
black_anagram_ans = []
black_anagram_aha = []
black_anagram_rt = []

blue_anagram = []
blue_anagram_ans = []
blue_anagram_aha = []
blue_anagram_rt = []

num_subs = 0

for i in range(len(data)): #go thru the subjects
    if len(data[i]['data']) > 4: #only look at subjects that finished the task 
            num_subs = num_subs + 1            
            quiz_answer = data[i]['data'][3]['responses'] #get answer to quiz answer
            if '1 minute' in quiz_answer:
                j = 3
            else:
                quiz_answer = data[i]['data'][5]['responses']
                if '1 minute' in quiz_answer:
                    j = 5 
                else:
                    j = 7
            
            for k in range(2):
                color = data[i]['data'][j+1]['stimulus'] #which colored stimulus were they shown?
                if 'blue' in color:
                    blue_anagram.append(data[i]['data'][j+2]['blue_problems'][0])
                    blue_anagram_rt.append(data[i]['data'][j+2]['rt'])
                    blue_anagram_ans.append(data[i]['data'][j+2]['responses'])
                    aha = int(data[i]['data'][j+4]['responses'][6])
                    blue_anagram_aha.append(aha)
                    j = j+4
                elif 'black' in color:
                    black_anagram.append(data[i]['data'][j+2]['black_problems'][0])
                    black_anagram_rt.append(data[i]['data'][j+2]['rt'])
                    black_anagram_ans.append(data[i]['data'][j+2]['responses'])
                    aha = int(data[i]['data'][j+3]['responses'][6])
                    black_anagram_aha.append(aha)
                    j = j+3

print('we recruited ', num_subs, ' subjects')

we recruited  147  subjects


Convert data into df and do a filtering based on correct responses

In [136]:
d1 = {'anagrams':black_anagram,
     'answers':black_anagram_ans, 
     'rts': black_anagram_rt,
     'aha': black_anagram_aha}

d2 = {'anagrams':blue_anagram,
     'answers':blue_anagram_ans, 
     'rts': blue_anagram_rt,
     'aha': blue_anagram_aha}

df1 = pd.DataFrame(d1)
df2 = pd.DataFrame(d2)
df1_filtered = df1.loc[(df1['answers'].str.contains(solutions, case = False)==True)]
df2_filtered = df2.loc[(df2['answers'].str.contains(solutions, case = False)==False)]
#df2_filtered = df2[df2['rts'] > 15000]

In [None]:
print(len(df1_filtered), len(df2_filtered))
print(df1_filtered['aha'].mean(), df2_filtered['aha'].mean())
print(df1_filtered['rts'].mean(), df1_filtered['rts'].std(), df2_filtered['rts'].mean())

In [142]:
dft3 = df1_filtered.groupby(['anagrams']).agg(['mean', 'std', 'count'])
dft4 = df2_filtered.groupby(['anagrams']).agg(['mean', 'std', 'count'])
dft3


Unnamed: 0_level_0,rts,rts,rts,aha,aha,aha
Unnamed: 0_level_1,mean,std,count,mean,std,count
anagrams,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
ECELT,15918.125,10520.330469,16,3.9375,1.913766,16
HSOADW,11561.25,9262.988752,20,4.5,1.572795,20
LYLE,12287.722222,5797.089592,18,4.111111,1.875191,18
RETLET,13692.029412,11306.870892,34,3.382353,2.000223,34
RODLLA,11012.6,7426.477832,20,3.85,1.631112,20
YUOEJRN,11982.294118,9537.652494,17,4.176471,1.944071,17


Analyze the anagrams first

Unnamed: 0,anagrams,percent solved,mean RT,Aha,mean difficulty
1,Solve this anagram: AHSDOW,0.984615,11.745766,3.640625,1.5625
17,Solve this anagram: IEGWHT,0.95082,14.796379,3.482759,1.586207
12,Solve this anagram: ERLKC,0.962963,17.734442,3.75,1.692308
0,Solve this anagram: AALRYS,0.826087,26.005684,4.289474,1.947368
27,Solve this anagram: SAOSI,0.938462,21.82882,4.57377,2.180328
28,Solve this anagram: TINAG,0.77193,30.335818,3.909091,2.25
8,Solve this anagram: DONRO,0.724638,32.3981,3.84,2.32
21,Solve this anagram: NAYTUGH,0.825397,44.319481,5.0,2.461538
14,Solve this anagram: HAKMOCM,0.895833,30.881651,4.395349,2.488372
6,Solve this anagram: AYKWALJ,0.882353,22.812883,4.466667,2.5


In [8]:
df_non_insight = pd.concat([df1, df2, df3, df4, df5, df6, df7, df8, df9, df10, df11, df12, df13, df14, df15, df16, df17, df18, df19, df20, df21, df22, df23, df24, df25, df26, df27, df28, df29, df30])

df_non_insight_filtered = pd.concat([df1_filtered, df2_filtered,df3_filtered,df4_filtered, df5_filtered, df6_filtered,df7_filtered,df8_filtered,df9_filtered,df10_filtered,df11_filtered,df12_filtered,df13_filtered, df14_filtered,df15_filtered, df16_filtered,df17_filtered,df18_filtered,df19_filtered,df20_filtered, df21_filtered, df22_filtered, df23_filtered, df24_filtered, df25_filtered, df26_filtered, df27_filtered, df28_filtered, df29_filtered, df30_filtered])


dft3 = df_non_insight.groupby(['questions']).agg(['mean', 'std', 'count'])
dft4 = df_non_insight_filtered.groupby(['questions']).agg(['mean', 'std', 'count'])

d0 = {'problem': dft4.index,
      'mean RT': dft4['rts']['mean'].values, 
     'percent solved':dft4['rts']['count'].values/dft3['rts']['count'].values,
     #'solved count':dft4['rts']['count'].values, 
     #'total count': dft3['rts']['count'].values,
     
     'Aha': dft4['aha']['mean'].values,
     'difficulty': dft4['difficulty']['mean'].values}

dft5 = pd.DataFrame(d0)
dft5.sort_values('difficulty')

Unnamed: 0,problem,mean RT,percent solved,Aha,difficulty
16,Lana has 2 bags with 2 marbles in each bag. Markus has 2 bags with 3 marbles in each bag. How many more marbles does Markus have?,24.164377,0.929825,1.867925,1.320755
21,"Lilah's band had practiced 24 songs. At a performance, they played 7 songs in their first set. In their second set, they played 8 songs. How many songs did they play for their third and final set?",30.759604,0.981481,2.377358,1.773585
8,"Carrie grew three inches taller last year, and five inches taller this year. How many inches taller did she grow in the last two years?",25.5891,0.909091,1.52,1.8
0,"Mary won't eat fish or spinach, Sally won't eat fish or green beans, Steve won't eat shrimp or potatoes, Alice won't eat beef or tomatoes, and Jim won't eat fish or tomatoes. If you are willing ...",54.916632,0.934426,2.385965,1.912281
1,"A farmer has 19 sheep on his land. One day, a big storm hits, and seven sheep run away. The next day, four sheep return and find their way home. The next day, another big storm hits, and six shee...",37.644831,0.951613,2.067797,1.966102
18,Lauren's chicken laid an average of six eggs per week. Lauren sold those eggs for $3 per dozen. How much money did she collect in four weeks if she sold all her eggs?,43.841268,0.931818,2.560976,2.073171
23,Ricky has a magic money box. Every day the box doubles the number of coins placed inside of it. Ricky put in 3 pennies on Monday. He looked inside his box on Friday night. How many pennies did Ri...,44.710511,0.833333,2.111111,2.266667
26,The book store is very busy today. There are 25 children listening to a story. 35 people are shopping for books. 18 people are at the checkout counter. How many people are at the bookstore?,35.12376,0.833333,1.96,2.3
4,"Before Gary injured his arm, he was able to type 9 words per minute on his phone. After he injured his arm, he had to use his left arm for a while, and he could only type 6 words per minute on hi...",46.419163,0.844828,2.469388,2.326531
25,The book store is very busy today. There are 25 children listening to a story narrated by 1 person. 35 people are shopping for books. 20 people are studying. 18 people are at the checkout counter...,65.758095,0.777778,1.880952,2.357143


In [26]:
non_insight_easier_rt = [np.mean(df11_filtered['rts']), np.mean(df30_filtered['rts']), np.mean(df21_filtered['rts']), np.mean(df22_filtered['rts']), np.mean(df9_filtered['rts'])] 
non_insight_easier_diff = [np.mean(df11_filtered['difficulty']), np.mean(df30_filtered['difficulty']), np.mean(df21_filtered['difficulty']), np.mean(df22_filtered['difficulty']), np.mean(df9_filtered['difficulty'])] 

non_insight_harder_rt = [np.mean(df23_filtered['rts']), np.mean(df8_filtered['rts']), np.mean(df24_filtered['rts']), np.mean(df20_filtered['rts']), np.mean(df17_filtered['rts'])] 
non_insight_harder_diff = [np.mean(df23_filtered['difficulty']), np.mean(df8_filtered['difficulty']), np.mean(df24_filtered['difficulty']), np.mean(df20_filtered['difficulty']), np.mean(df17_filtered['difficulty'])] 


anagram1_rt = [41.10223, 66.349359, 54.66748, 52.435821, 58.969918]

anagram1_diff = [3.026316, 3.282051, 3.387097, 3.461538, 3.081633]

#anagram2_rt =



Anagram analysis

In [28]:
print(np.mean(non_insight_easier_rt), np.mean(non_insight_easier_diff)) 
print(np.mean(non_insight_harder_rt), np.mean(non_insight_harder_diff)) 
print(np.mean(anagram1_rt), np.mean(anagram1_diff))



53.5685168429567 2.4671208727393394
73.56521037585603 3.1227982162764767
54.704961600000004 3.2477270000000003


Analyze only those responses for which people's confidence is greater than 5

Plot distribution of curiosity ratings below