In [1]:
import os
import sys
import urllib, io

import numpy as np
import scipy.stats as stats
import pandas as pd

import pymongo as pm
from collections import Counter
import json
import re
import ast

from PIL import Image, ImageOps, ImageDraw, ImageFont 

from io import BytesIO
import base64

import  matplotlib
from matplotlib import pylab, mlab, pyplot
%matplotlib inline
from IPython.core.pylabtools import figsize, getfigs
plt = pyplot
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42

import seaborn as sns
sns.set_context('talk')
sns.set_style('darkgrid')

from IPython.display import clear_output

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")

In [None]:
## directory & file hierarchy
proj_dir = os.path.abspath('..')
datavol_dir = os.path.join(proj_dir,'data')
analysis_dir = os.path.abspath(os.path.join(os.getcwd(),'..'))
results_dir = os.path.join(proj_dir,'results')
plot_dir = os.path.join(results_dir,'plots')
csv_dir = os.path.join(results_dir,'csv')
json_dir = os.path.join(results_dir,'json')
exp_dir = os.path.abspath(os.path.join(proj_dir,'experiments'))
png_dir = os.path.abspath(os.path.join(datavol_dir,'png'))
jefan_dir = os.path.join(analysis_dir,'jefan')
will_dir = os.path.join(analysis_dir,'will')

## add helpers to python path
if os.path.join(proj_dir,'stimuli') not in sys.path:
    sys.path.append(os.path.join(proj_dir,'stimuli'))
    
if not os.path.exists(results_dir):
    os.makedirs(results_dir)
    
if not os.path.exists(plot_dir):
    os.makedirs(plot_dir)   
    
if not os.path.exists(csv_dir):
    os.makedirs(csv_dir)       

## Load dataframe

In [None]:
iterationName = 'pilot4'
read_path = os.path.join(csv_dir,'block_silhouette_{}.csv'.format(iterationName))
df = pd.read_csv(read_path)

### Exclude Outliers

In [None]:
#in progress
#iterationName = 'pilot3'
#read_path = os.path.join(csv_dir,'block_silhouette_{}.csv'.format(iterationName))
#df3 = pd.read_csv(read_path)

iterationName = 'pilot4'
read_path = os.path.join(csv_dir,'block_silhouette_{}.csv'.format(iterationName))
df = pd.read_csv(read_path)

#df = pd.merge(df3 , df4 , how='outer')

low_scores = df[(df.trialNum == 15) & (df.score == 0)]['gameID']
df = df[~df.gameID.isin(low_scores)]

## Summary Statistics

In [None]:
# Accuracy etc. by condition
df[['condition','normedScore','numBlocks','buildTime']].groupby(by=['condition']).describe()

In [None]:
# Accuracy etc. by trial number
df[['condition','normedScore','numBlocks','buildTime','trialNum']].groupby(by='trialNum').describe()

In [None]:
subject_means_by_condition = df.groupby(['gameID','condition']).mean()
condition_means = subject_means_by_condition.groupby('condition')['normedScore'].mean()

subject_scores_by_condition = subject_means_by_condition['normedScore']

fig = plt.figure(figsize=(8,6))
sns.set_context('poster')
sns.set_style('whitegrid', {'legend.frameon':False})
c = sns.catplot(x='condition', 
                y='normedScore', 
                hue='gameID', 
                kind='point', 
                legend=False,
                data=df);
plt.ylabel('Normed F1 score')
plt.xlabel('Condition')
plt.yticks(np.linspace(0.4,1,5))
plt.setp(c.ax.lines,linewidth=2)
plt.tight_layout()



In [None]:
score_condition = df[['condition','normedScore']]

In [None]:
fig = plt.figure(figsize=(8,6))
sns.set_context('poster')
sns.set_style('whitegrid', {'legend.frameon':False})
b = sns.barplot(data=score_condition,
                palette='husl',
                hue='condition',
                x='condition',
                y='normedScore')
plt.legend(bbox_to_anchor=(1.0,1))
plt.ylabel('Normed F1 score')
plt.xlabel('Condition')
plt.yticks(np.linspace(0,1,6))
plt.tight_layout()


In [None]:
df['extendedCondition'] = df['condition']
df.loc[(df.condition == 'physical') & (df.numBlocksExplore == 0),'extendedCondition'] = 'no_action_physical'

fig = plt.figure(figsize=(4,4))
sns.set_context('poster')
sns.set_style('whitegrid', {'legend.frameon':False})
b = sns.barplot(data = df,
                palette='husl',
                hue='extendedCondition',
                x='condition',
                y='normedScore')
plt.legend(bbox_to_anchor=(1.0,1))
plt.ylabel('Normed F1 score')
plt.xlabel('Condition')
plt.yticks(np.linspace(0,1,6))
plt.tight_layout()




In [None]:
df = df[(df.numBlocks>2)]
df = df[~df.gameID.isin(low_scores)]


df['extendedCondition'] = df['condition']
df.loc[(df.condition == 'physical') & (df.numBlocksExplore > 4),'extendedCondition'] = 'very_physical'
df.loc[(df.condition == 'physical') & (df.numBlocksExplore <= 4),'extendedCondition'] = 'not_very_physical'
df.loc[(df.condition == 'physical') & (df.numBlocksExplore == 0),'extendedCondition'] = 'no_action_physical'

fig = plt.figure(figsize=(4,4))
sns.set_context('poster')
sns.set_style('whitegrid', {'legend.frameon':False})
b = sns.barplot(data = df,
                palette='husl',
                hue='extendedCondition',
                hue_order=['mental','no_action_physical','not_very_physical','physical','very_physical'],
                x='condition',
                y='normedScore')
plt.legend(bbox_to_anchor=(1.0,1))
plt.ylabel('Normed F1 score')
plt.xlabel('Condition')
plt.yticks(np.linspace(0,1,6))
plt.tight_layout()




In [None]:
fig = plt.figure(figsize=(4,4))
sns.set_context('poster')
sns.set_style('whitegrid', {'legend.frameon':False})
b = sns.barplot(data=df,
                palette='husl',
                hue='extendedCondition',
                x='condition',
                y='numBlocks')
plt.legend(bbox_to_anchor=(1.0,1))
plt.ylabel('n blocks')
plt.xlabel('condition')
plt.yticks(np.linspace(0,12,6))
plt.tight_layout()

In [None]:
score_trials = df[['trialNum','normedScore']]

fig = plt.figure(figsize=(8,4))
sns.set_context('poster')
sns.set_style('whitegrid', {'legend.frameon':False})
b = sns.lineplot(data=score_trials,
                palette='husl',
                x='trialNum',
                y='normedScore')
plt.ylabel('Normed F1 score')
plt.xlabel('Trial')
plt.yticks(np.linspace(0.4,1,6))
plt.xticks(np.linspace(0,15,16))
plt.tight_layout()

## Load settled dataframe

In [None]:
## load in dataframe
iterationName = 'pilot4'

data_path = os.path.join(csv_dir,'block_silhouette_settled_{}.csv'.format(iterationName))
dfs = pd.read_csv(data_path)

### Exclude outliers

In [None]:
low_scores = df[(df.trialNum == 15) & (df.score == 0)]['gameID']
dfs = dfs[~dfs.gameID.isin(low_scores)]

In [None]:
# Add a condition for physical with 0 actions
dfs['extendedCondition'] = dfs['condition']

trialInfo = df[['gameID','trialNum','numBlocksExplore']]

dfs = dfs.merge(trialInfo, on=['gameID', 'trialNum'], how='left')

In [None]:
dfs.loc[(df.condition == 'physical') & (dfs.numBlocksExplore == 0),'extendedCondition'] = 'no_action_physical'

In [None]:
# Add useful variables to dataframe

dfs['subject_trial'] = dfs['gameID'] + '_' + str(dfs['trialNum'])
dfs['condition_number'] = np.where(dfs['condition']=='mental', 100, 2)
dfs['condition_code'] = dfs['condition_number'] + dfs['trialNum']
dfs['time_bin'] = np.round_(dfs['timePlaced']/10000)*10000

dfs_build = dfs[dfs.phase == 'build']
dfs_explore = dfs[dfs.phase == 'explore']


In [None]:
dfs['subject_trial'] = dfs['gameID'] + '_' + str(dfs['trialNum'])
dfs['condition_number'] = np.where(dfs['condition']=='mental', 100, 2)
dfs['condition_code'] = dfs['condition_number'] + dfs['trialNum']/2

fig = plt.figure(figsize=(12,6))
sns.set_context('poster')
sns.set_style('whitegrid', {'legend.frameon':False})
ax = sns.lineplot(x="timePlaced", 
                  y="normedIncrementalScore", 
                  hue='condition_code', 
                  units="subject_trial",
                  palette='coolwarm',
                  estimator=None, lw=0.7,
                  data=dfs[dfs['phase']=='build'])

#red is mental, blue is physical

In [None]:
# 'Scores over time across participant and structure, by condition'

fig = plt.figure(figsize=(12,6))
sns.set_context('poster')
sns.set_style('whitegrid', {'legend.frameon':False})
ax = sns.lineplot(x="numBlocks", 
                  y="normedIncrementalScore", 
                  hue='extendedCondition', 
                  estimator= np.mean,
                  palette='coolwarm',
                  lw=0.7,
                  data=dfs[dfs.phase == 'build']) #and numblocks=0

In [None]:
fig = plt.figure(figsize=(12,6))
sns.set_context('poster')
sns.set_style('whitegrid', {'legend.frameon':False})
ax = sns.lineplot(x="time_bin", 
                  y="normedIncrementalScore", 
                  hue='extendedCondition', 
                  estimator= np.mean,
                  palette='coolwarm',
                  lw=0.7,
                  data=dfs[dfs.phase == 'build'])

In [None]:
# 'Mean scores for each structure over time (binned), by condition and phase'

g = sns.FacetGrid(data=dfs, row="targetName", col="phase", height=4, aspect = 2)

sns.set_context('poster')
sns.set_style('whitegrid', {'legend.frameon':False})

g = g.map_dataframe(sns.lineplot, x="time_bin", 
                  y="normedIncrementalScore", 
                  hue='condition', 
                  estimator= np.mean,
                  palette='coolwarm',
                  lw=0.7).set_titles("{row_name}").add_legend()


In [None]:
g = sns.FacetGrid(data=dfs, row="targetName", col="phase", height=4, aspect = 2)
sns.set_context('poster')
sns.set_style('whitegrid')
g = g.map_dataframe(sns.lineplot,
                    x="numBlocks", 
                    y="normedIncrementalScore", 
                    hue='condition', 
                    palette='coolwarm',
                    lw=0.7).set_titles("{row_name}").add_legend()


In [None]:
# 'Individual scores for each attempt at each structure over time, by condition and phase'

g = sns.FacetGrid(data=dfs, row="targetName", col="phase", height=4, aspect = 2, legend_out = True)

sns.set_context('poster')
sns.set_style('whitegrid')
g = g.map_dataframe(sns.lineplot,
                        x="timePlaced", 
                        y="normedIncrementalScore", 
                        hue='condition', 
                        label='condition',
                        units="subject_trial",
                        estimator=None,
                        palette='coolwarm',
                        lw=0.7).set_titles("{row_name}").add_legend()


In [None]:
# 'Mean scores for each participant over time, by condition and phase'

g = sns.FacetGrid(data=dfs, row="gameID", col="phase", height=4, aspect = 2, legend_out = True)

sns.set_context('poster')
sns.set_style('whitegrid')
g = g.map_dataframe(sns.lineplot,
                        x="time_bin", 
                        y="normedIncrementalScore", 
                        hue='condition', 
                        estimator=np.mean,
                        palette='coolwarm',
                        lw=0.7).set_titles('subject').add_legend()


In [None]:
# difference?
# more similar at start, at end?

In [None]:
def iou(x, y):
    
    assert(x.shape == y.shape)
    nblocks = x.shape[0]
    ncols = x.shape[1]

    # find last    
    sum_cols = np.sum(x,axis=0)
    last_block = 0
    while (sum_cols[last_block] > 0):    
        last_block += 1
    
    xt = x[:,0:last_block]
    yt = y[:,0:last_block]
    
    ones = np.ones([nblocks,last_block])
    
    # calculate intersection over union after truncating to length of x
    return  np.sum(xt * yt, axis=None) / np.sum(np.fmin((xt + yt),ones), axis=None)

In [None]:
max_n_blocks = max(dfs['numBlocks'])
n_block_kinds = dfs['blockKind'].nunique()
blockKinds = np.sort(dfs['blockKind'].unique())
targets = np.sort(dfs['targetName'].unique())

b_map = dict(zip(blockKinds, range(0,n_block_kinds)))
print(b_map)
blockKind_blockNum = np.zeros([n_block_kinds,max_n_blocks])
fig, axs = plt.subplots(len(targets)+1,2, figsize=(12,20))

for ci, condition in enumerate(['mental','physical']):
    p = 0
    for target in targets:
        p += 1
        target_settled = dfs[(dfs.targetName == target) & (dfs.condition == condition) & (dfs.phase == 'build')]

        for i in range(1, max_n_blocks):
            ith_blocks = target_settled[target_settled.numBlocks == i]['blockKind']
            for b in ith_blocks:
                blockKind_blockNum[b_map[b],i] += 1

        #normalize
        blockKind_blockNum = blockKind_blockNum/list(map((lambda x: max([1,x])),np.sum(blockKind_blockNum, axis=0)))

        axs[p,ci].matshow(blockKind_blockNum)        
        axs[p,ci].set_title(target, fontsize=10)

for ax in axs.flat:
    ax.axis('off')

plt.get_cmap().set_bad(color='red')
plt.set_cmap('magma')

In [None]:
dfs_saved

In [None]:
max_n_blocks = max(dfs['numBlocks'])
n_block_kinds = dfs['blockKind'].nunique()
blockKinds = np.sort(dfs['blockKind'].unique())
targets = np.sort(dfs['targetName'].unique())
gameIDs = np.sort(dfs['gameID'].unique())

b_map = dict(zip(blockKinds, range(0,n_block_kinds)))
print(b_map)
fig, axs = plt.subplots(len(targets)+1,2, figsize=(12,20))

for ph_i, phase in enumerate(['explore','build']):
    p = 0
    for target in targets:
        blockKind_blockNum = np.zeros([n_block_kinds,max_n_blocks])
        p += 1
        target_settled = dfs[(dfs.targetName == target) &\
                             (dfs.phase == phase) &\
                             (dfs.condition == 'physical')\
                             #(dfs.gameID == gameIDs[1])\
                            ]

        for i in range(1, max_n_blocks):
            ith_blocks = target_settled[target_settled.numBlocks == i]['blockKind']
            for b in ith_blocks:
                blockKind_blockNum[b_map[b],i] += 1

        #normalize
        blockKind_blockNum = blockKind_blockNum/list(map((lambda x: max([1,x])),np.sum(blockKind_blockNum, axis=0)))

        axs[p,ph_i].matshow(blockKind_blockNum)        
        axs[p,ph_i].set_title(target, fontsize=10)

for ax in axs.flat:
    ax.axis('off')

plt.get_cmap().set_bad(color='red')
plt.set_cmap('magma')

In [None]:
max_n_blocks = max(dfs['numBlocks'])
n_block_kinds = dfs['blockKind'].nunique()
blockKinds = np.sort(dfs['blockKind'].unique())
targets = np.sort(dfs['targetName'].unique())
gameIDs = np.sort(dfs['gameID'].unique())

b_map = dict(zip(blockKinds, range(0,n_block_kinds)))
print(b_map)
fig, axs = plt.subplots(len(targets)+1,2, figsize=(12,20))

for ph_i, phase in enumerate(['explore','build']):
    p = 0
    for target in targets:
        blockKind_blockNum = np.zeros([n_block_kinds,max_n_blocks])
        p += 1
        target_settled = dfs[(dfs.targetName == target) &\
                             (dfs.phase == phase) &\
                             (dfs.gameID == gameIDs[0])]

        for i in range(1, max_n_blocks):
            ith_blocks = target_settled[target_settled.numBlocks == i]['blockKind']
            for b in ith_blocks:
                blockKind_blockNum[b_map[b],i-1] += 1

        #normalize
        blockKind_blockNum = blockKind_blockNum/list(map((lambda x: max([1,x])),np.sum(blockKind_blockNum, axis=0)))

        axs[p,ph_i].matshow(blockKind_blockNum)        
        axs[p,ph_i].set_title(target, fontsize=10)

for ax in axs.flat:
    ax.axis('off')

plt.get_cmap().set_bad(color='red')
plt.set_cmap('magma')

In [None]:
# 1
#     within-physical, within-structure,  within-participant: final prototype vs. final construction. 
# vs. within-physical, within-structure, between-participant: final prototype vs. final construction. 

# SKIPPED Filter- only final prototypes
# Should also do only final reconstructions

# Make all matrices
# for within participant

max_n_blocks = max(dfs['numBlocks'])
n_block_kinds = dfs['blockKind'].nunique()
blockKinds = np.sort(dfs['blockKind'].unique())
targets = np.sort(dfs['targetName'].unique())
n_targets = dfs['targetName'].nunique()
gameIDs = np.sort(dfs['gameID'].unique())
n_gameIDs = dfs['gameID'].nunique()

b_map = dict(zip(blockKinds, range(0,n_block_kinds)))
seq_mat = np.zeros([n_gameIDs,2,n_targets,n_block_kinds, max_n_blocks])

for gameID_i, gameID in enumerate(gameIDs):
    for ph_i, phase in enumerate(['explore','build']):
        p = 0
        for target_i, target in enumerate(targets):
            blockKind_blockNum = np.zeros([n_block_kinds,max_n_blocks])
            p += 1
            target_settled = dfs[(dfs.targetName == target) &\
                                 (dfs.condition == 'physical')&\
                                 (dfs.phase == phase) &\
                                 (dfs.gameID == gameID)]

            for i in range(0, max_n_blocks-1):
                ith_blocks = target_settled[target_settled.numBlocks == i+1]['blockKind']
                for b in ith_blocks:
                    seq_mat[gameID_i,ph_i,target_i,b_map[b],i] += 1

            #normalize
            seq_mat[gameID_i,ph_i,target_i,:,:] = seq_mat[gameID_i,ph_i,target_i,:,:]/\
                list(map((lambda x: max([1,x])),np.sum(seq_mat[gameID_i,ph_i,target_i,:,:], axis=0)))


# Intersection over union
# If all 1s and 0s: 
# crop to shorter length (or always explore? What if length is 0?)
# (M * N)/(M + N)

# for between participant
# bootstrap- select 1000 random (although we only have a few?)
# Calculating distribution of edit distances
# And find our CI


In [None]:
# Calculate edit distances between participants with bootstrapping
import random

n_bootstrap = 1000
n_targets = 1

iou_scores = np.zeros([n_targets, n_gameIDs, n_bootstrap])

iter_diff = []

for b in range(0, n_bootstrap-1):
    
    structures_diff = []  
    
    for structure in range(0,16):
        
        #get indices of participants with physical explore sequences of that structure
        built_structure = np.sum(seq_mat[:,1,structure,:,1],axis = 1) > 0
        struct_explore_seqs = seq_mat[built_structure,0,structure,:,:] # grab those sequences
        struct_build_seqs = seq_mat[built_structure,1,structure,:,:] # grab those sequences
    

        num_phys = struct_build_seqs.shape[0]
        
        # calculate observed similarity
        shuff_inds = list(range(0,num_phys-1))
        random.shuffle(shuff_inds)
        
        observed_sim = np.zeros(n_gameIDs)
        permuted_sim = np.zeros(n_gameIDs)
        
        # calculate observed similarity
        for participant in range(0,num_phys-1):
            explore_seq = struct_explore_seqs[participant,:,:]
            build_seq = struct_build_seqs[participant,:,:]
            iou_score = iou(explore_seq, build_seq)
            observed_sim[participant] = iou_score
        
        observed_sim = observed_sim[~pd.isna(observed_sim)]
                                
        for participant in range(0,num_phys-1):
            
            explore_seq = struct_explore_seqs[participant,:,:]
            build_seq = struct_build_seqs[shuff_inds[participant],:,:]
            iou_score = iou(explore_seq, build_seq)
            permuted_sim[participant] = iou_score
            
        permuted_sim = permuted_sim[~pd.isna(permuted_sim)]
                                
        diff_sim = np.mean(observed_sim - permuted_sim)
        structures_diff.append(diff_sim)
    
    iter_diff.append(np.mean(structures_diff))
        
plt.hist(iter_diff)

In [None]:
np.mean(iter_diff)

In [None]:
accuracies['gameID'] = gameIDs[0]

In [None]:
# Calculate edit distance within participants

targets = np.sort(dfs['targetName'].unique())
n_targets = dfs['targetName'].nunique()
gameIDs = np.sort(dfs['gameID'].unique())
n_gameIDs = dfs['gameID'].nunique()

iou_scores2 = np.zeros([n_gameIDs,n_targets])

for participant in range(0,n_gameIDs):

    for structure in range(0,16):
        explore_seq = seq_mat[participant,0,structure,:,:]
        build_seq = seq_mat[participant,1,structure,:,:]
        iou_score = iou(explore_seq,build_seq)
        iou_scores2[participant,structure] = iou_score


In [None]:
dict(enumerate(targets))

In [None]:
similarities = pd.DataFrame(iou_scores2[:,:])
similarities = similarities.rename(columns = dict(enumerate(targets)))
similarities['gameID'] = np.sort(dfs['gameID'].unique())
similarities = similarities.melt(id_vars = 'gameID')
similarities = similarities.rename(columns = {'variable': 'targetName',
                                              'value': 'explore_build_similarity'})
similarities

In [None]:
df = df.merge(similarities, on=['gameID','targetName'], how='left')

In [None]:
df_saved = df.copy()

In [None]:
df['sim_group'] = 'None'
df.loc[(df.explore_build_similarity == 1),'sim_group'] = 'total'
df.loc[(df.explore_build_similarity < 1),'sim_group'] = 'low'
df.loc[pd.isna(df.explore_build_similarity),'sim_group'] = 'None'

In [None]:
df['sim_group'] = 'None'
df.loc[(df.explore_build_similarity == 1),'sim_group'] = 'total'
df.loc[(df.explore_build_similarity < 1),'sim_group'] = 'low'
df.loc[pd.isna(df.explore_build_similarity),'sim_group'] = 'None'

fig = plt.figure(figsize=(4,4))
sns.set_context('poster')
sns.set_style('whitegrid', {'legend.frameon':False})
b = sns.barplot(data = df[(df.condition=='physical') & (df.sim_group!='None')],
                palette='husl',
                hue='sim_group',
                x='sim_group',
                y='normedScore')
plt.legend(bbox_to_anchor=(1.0,1))
plt.ylabel('Normed F1 score')
plt.xlabel('Similarity between build and explore phase block sequences')


In [None]:
list(dfs.columns)

In [None]:
# currently keeping all build attempts that are of same length as maximum explore bout

df1 = dfs[(dfs.condition=='physical') & (dfs.phase=='explore')]
df2 = df1.groupby(by=['trialNum', 'gameID']).count()['numBlocks'].reset_index()
df2 = df2.rename(columns = {'numBlocks':'maxExploreBlocks'})
dfs = dfs.merge(df2, on=['gameID','trialNum'], how = 'left')
#dfs[dfs.numBlocks == dfs.maxExploreBlocks]

In [None]:
dfs = dfs.merge(similarities, on=['gameID','targetName'], how='left')
dfs['sim_group'] = 'None'
dfs.loc[(dfs.explore_build_similarity == 1),'sim_group'] = 'total'
dfs.loc[(dfs.explore_build_similarity < 1),'sim_group'] = 'low'
dfs.loc[pd.isna(dfs.explore_build_similarity),'sim_group'] = 'None'

In [None]:
fig = plt.figure(figsize=(4,4))
sns.set_context('poster')
sns.set_style('whitegrid', {'legend.frameon':False})
b = sns.barplot(data = X3,
                palette='husl',
                hue='sim_group',
                x='phase',
                y='normedIncrementalScore')
plt.ylabel('Normed F1 score')
plt.xlabel('Diff accuracy in build and explore phase up to length of final explore block placed')
#plt.ylim(0.4,0.7)
#plt.yticks(np.linspace(0.4,0.7,5))
plt.legend(bbox_to_anchor=(1.0,1))

In [None]:
# numBlocks = index in the block sequence
# maxExploreBlocks = number of blocks that were placed in the explore phase
X = dfs
## subset data
X2 = X[(X['condition']=='physical') & (X['maxExploreBlocks']==X['numBlocks'])].reset_index(drop=True)
#X2.drop(labels=['Unnamed: 0', 'Unnamed: 0.1'],axis=1,inplace=True)
X2['trialID'] = X2['gameID'] + '_' + X2['trialNum'].map(str) ## add trialID column
## get list of valid trials for which we have exactly two rows
from collections import Counter
Y = pd.DataFrame.from_dict(Counter(X2['trialID']),orient='index').reset_index()
Y.columns = ['trialID','numAppearances']
valid_num_appearances = Y[Y['numAppearances']==2]['trialID'].values
## get list of trialIDs that have at least one explore and one build phase
Y = X2.groupby('trialID')['phase'].nunique().reset_index()
valid_num_phases = Y[Y['phase']==2]['trialID'].values
valid_trials = np.intersect1d(valid_num_appearances,valid_num_phases)
## further subset to get valid data only
X3 = X2[X2['trialID'].isin(valid_trials)].reset_index(drop=True)
assert np.sum(X3['numBlocks']==0)==0 ## dont consider trials for which no blocks were placed in the explore phase
## pivot to get build & explore scores for each trial
X4 = X3.pivot(index='trialID',columns='phase',values='normedIncrementalScore').reset_index()

In [None]:
#dfs['trialID'] = dfs['gameID'] + dfs['trialNum'].map(str)
X3

In [None]:
Y = dfs

In [None]:
Y = pd.DataFrame.from_dict(Counter(X2['trialID']),orient='index').reset_index()
Y.columns = ['trialID','numAppearances']
valid_num_appearances = Y[Y['numAppearances']==2]['trialID'].values
## get list of trialIDs that have at least one explore and one build phase
Y = X2.groupby('trialID')['phase'].nunique().reset_index()
valid_num_phases = Y[Y['phase']==2]['trialID'].values
valid_trials = np.intersect1d(valid_num_appearances,valid_num_phases)
## further subset to get valid data only
X3 = X2[X2['trialID'].isin(valid_trials)].reset_index(drop=True)
assert np.sum(X3['numBlocks']==0)==0 ## dont consider trials for which no blocks were placed in the explore phase
## pivot to get build & explore scores for each trial
X4 = X3.pivot(index='trialID',columns='phase',values='normedIncrementalScore').reset_index()

In [None]:

## save out to csv dir, where all the csv's go to live
out_path = os.path.join(csv_dir,'similarity_settled_block_silhouette_{}.csv'.format(iterationName))
dfs.to_csv(out_path)

In [None]:
dfs[dfs.condition=='physical']

In [None]:

mean_similarities = np.mean(similarities,axis=1)
plt.hist(mean_similarities[~pd.isna(mean_similarities)])

In [None]:
all_sim = iou_scores2.reshape(-1)
plt.hist(all_sim[~pd.isna(all_sim)])

In [None]:
# demo

participant = 0

fig, axs = plt.subplots(len(targets)+1,2, figsize=(12,20))

iou_scores = []

for structure in range(0,16):
    explore_seq = seq_mat[participant,0,structure,:,:]
    build_seq = seq_mat[participant,1,structure,:,:]
    axs[structure,0].matshow(explore_seq)
    axs[structure,1].matshow(build_seq)
    iou_score = iou(explore_seq,build_seq)
    iou_scores.append(iou_score)

for ax in axs.flat:
    ax.axis('off')

plt.get_cmap().set_bad(color='red')
plt.set_cmap('magma')

print(iou_scores)

In [None]:
participant = 0

fig, axs = plt.subplots(len(targets)+1,2, figsize=(12,20))

for phase in [0,1]:
    for structure in range(0,16):
        axs[structure,phase].matshow(seq_mat[participant,phase,structure,:,:])        

for ax in axs.flat:
    ax.axis('off')

plt.get_cmap().set_bad(color='red')
plt.set_cmap('magma')

In [None]:
# 2
# within-structure, between-conditions, between-participants : final physical constructions vs. final mental constructions


In [None]:
#code that turned out not to be useful but keeping for reference if I want to do similar analyses

dfsmat = dfs[['gameID','blockKind','numBlocks','targetName','condition']]
dfsmatCounts = dfsmat.groupby(['condition','targetName','numBlocks','blockKind']).count()['gameID']
dfsmatTotals = dfsmat.groupby(['condition','targetName','numBlocks']).count()['blockKind']
dfsmatNormed = dfsmatCounts/dfsmatTotals
dfsmatNormed = dfsmatNormed.reset_index()

g = sns.FacetGrid(data=dfsmatNormed.unstack(level=0), 
                           row="targetName", col="condition", height=4, aspect = 2, legend_out = True)

sns.set_context('poster')
sns.set_style('whitegrid')
g = g.map_dataframe(sns.heatmap, 
                        vmin = 0,
                        vmax = 1,
                        palette='coolwarm').set_titles("{targetName}")


In [None]:
dfsmat = dfs[['gameID','blockKind','numBlocks','targetName','condition']]
dfsmatCounts = dfsmat.groupby(['condition','targetName','numBlocks','blockKind']).count()['gameID']
dfsmatTotals = dfsmat.groupby(['condition','targetName','numBlocks']).count()['blockKind']
dfsmatNormed = dfsmatCounts/dfsmatTotals