In [None]:
import os
import sys
import urllib, io

import numpy as np
import scipy.stats as stats
import pandas as pd

import pymongo as pm
from collections import Counter
import json
import re
import ast

from PIL import Image, ImageOps, ImageDraw, ImageFont 

from io import BytesIO
import base64

import  matplotlib
from matplotlib import pylab, mlab, pyplot
%matplotlib inline
from IPython.core.pylabtools import figsize, getfigs
plt = pyplot
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42

import seaborn as sns
sns.set_context('talk')
sns.set_style('darkgrid')

from IPython.display import clear_output

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")

In [None]:
## directory & file hierarchy
proj_dir = os.path.abspath('..')
datavol_dir = os.path.join(proj_dir,'data')
analysis_dir = os.path.abspath(os.path.join(os.getcwd(),'..'))
results_dir = os.path.join(proj_dir,'results')
plot_dir = os.path.join(results_dir,'plots')
csv_dir = os.path.join(results_dir,'csv')
json_dir = os.path.join(results_dir,'json')
exp_dir = os.path.abspath(os.path.join(proj_dir,'experiments'))
png_dir = os.path.abspath(os.path.join(datavol_dir,'png'))
jefan_dir = os.path.join(analysis_dir,'jefan')
will_dir = os.path.join(analysis_dir,'will')

## add helpers to python path
if os.path.join(proj_dir,'stimuli') not in sys.path:
    sys.path.append(os.path.join(proj_dir,'stimuli'))
    
if not os.path.exists(results_dir):
    os.makedirs(results_dir)
    
if not os.path.exists(plot_dir):
    os.makedirs(plot_dir)   
    
if not os.path.exists(csv_dir):
    os.makedirs(csv_dir)       

## Load dataframe

In [None]:
iterationName = 'pilot4'
read_path = os.path.join(csv_dir,'block_silhouette_{}.csv'.format(iterationName))
df = pd.read_csv(read_path)

### Exclude Outliers

In [None]:
#in progress
iterationName = 'pilot3'
read_path = os.path.join(csv_dir,'block_silhouette_{}.csv'.format(iterationName))
df3 = pd.read_csv(read_path)

iterationName = 'pilot4'
read_path = os.path.join(csv_dir,'block_silhouette_{}.csv'.format(iterationName))
df4 = pd.read_csv(read_path)

df = pd.merge(df3 , df4 , how='outer')

low_scores = df[(df.trialNum == 15) & (df.score == 0)]['gameID']
df = df[~df.gameID.isin(low_scores)]

## Summary Statistics

In [None]:
# Accuracy etc. by condition
df[['condition','normedScore','numBlocks','buildTime']].groupby(by=['condition']).describe()

In [None]:
# Accuracy etc. by trial number
df[['condition','normedScore','numBlocks','buildTime','trialNum']].groupby(by='trialNum').describe()

In [None]:
subject_means_by_condition = df.groupby(['gameID','condition']).mean()
condition_means = subject_means_by_condition.groupby('condition')['normedScore'].mean()

subject_scores_by_condition = subject_means_by_condition['normedScore']

fig = plt.figure(figsize=(8,6))
sns.set_context('poster')
sns.set_style('whitegrid', {'legend.frameon':False})
c = sns.catplot(x='condition', 
                y='normedScore', 
                hue='gameID', 
                kind='point', 
                legend=False,
                data=df);
plt.ylabel('Normed F1 score')
plt.xlabel('Condition')
plt.yticks(np.linspace(0.4,1,5))
plt.setp(c.ax.lines,linewidth=2)
plt.tight_layout()



In [None]:
score_condition = df[['condition','normedScore']]

In [None]:
fig = plt.figure(figsize=(8,6))
sns.set_context('poster')
sns.set_style('whitegrid', {'legend.frameon':False})
b = sns.barplot(data=score_condition,
                palette='husl',
                hue='condition',
                x='condition',
                y='normedScore')
plt.legend(bbox_to_anchor=(1.0,1))
plt.ylabel('Normed F1 score')
plt.xlabel('Condition')
plt.yticks(np.linspace(0,1,6))
plt.tight_layout()


In [None]:
df['extendedCondition'] = df['condition']
df.loc[(df.condition == 'physical') & (df.numBlocksExplore == 0),'extendedCondition'] = 'no_action_physical'

fig = plt.figure(figsize=(4,4))
sns.set_context('poster')
sns.set_style('whitegrid', {'legend.frameon':False})
b = sns.barplot(data = df,
                palette='husl',
                hue='extendedCondition',
                x='condition',
                y='normedScore')
plt.legend(bbox_to_anchor=(1.0,1))
plt.ylabel('Normed F1 score')
plt.xlabel('Condition')
plt.yticks(np.linspace(0,1,6))
plt.tight_layout()




In [None]:
# Add a condition for physical with 0 actions
df['extendedCondition'] = df['condition']
df.loc[(df.condition == 'physical') & (df.numBlocksExplore == 0),'extendedCondition'] = 'no_action_physical'


In [None]:
fig = plt.figure(figsize=(4,4))
sns.set_context('poster')
sns.set_style('whitegrid', {'legend.frameon':False})
b = sns.barplot(data=df,
                palette='husl',
                hue='extendedCondition',
                x='condition',
                y='numBlocks')
plt.legend(bbox_to_anchor=(1.0,1))
plt.ylabel('n blocks')
plt.xlabel('condition')
plt.yticks(np.linspace(0,12,6))
plt.tight_layout()

In [None]:
score_trials = df[['trialNum','normedScore']]

fig = plt.figure(figsize=(8,4))
sns.set_context('poster')
sns.set_style('whitegrid', {'legend.frameon':False})
b = sns.lineplot(data=score_trials,
                palette='husl',
                x='trialNum',
                y='normedScore')
plt.ylabel('Normed F1 score')
plt.xlabel('Trial')
plt.yticks(np.linspace(0.4,1,6))
plt.xticks(np.linspace(0,15,16))
plt.tight_layout()

## Load settled dataframe

In [None]:
## load in dataframe
iterationName = 'pilot4'

data_path = os.path.join(csv_dir,'block_silhouette_settled_{}.csv'.format(iterationName))
dfs = pd.read_csv(data_path)

### Exclude outliers

In [None]:
low_scores = df[(df.trialNum == 15) & (df.score == 0)]['gameID']
dfs = dfs[~dfs.gameID.isin(low_scores)]

In [None]:
# Add a condition for physical with 0 actions
dfs['extendedCondition'] = dfs['condition']

trialInfo = df[['gameID','trialNum','numBlocksExplore']]

dfs = dfs.merge(trialInfo, on=['gameID', 'trialNum'], how='left')

In [None]:
dfs.loc[(df.condition == 'physical') & (dfs.numBlocksExplore == 0),'extendedCondition'] = 'no_action_physical'

In [None]:
# Add useful variables to dataframe

dfs['subject_trial'] = dfs['gameID'] + '_' + str(dfs['trialNum'])
dfs['condition_number'] = np.where(dfs['condition']=='mental', 100, 2)
dfs['condition_code'] = dfs['condition_number'] + dfs['trialNum']
dfs['time_bin'] = np.round_(dfs['timePlaced']/10000)*10000

dfs_build = dfs[dfs.phase == 'build']
dfs_explore = dfs[dfs.phase == 'explore']


In [None]:
dfs['subject_trial'] = dfs['gameID'] + '_' + str(dfs['trialNum'])
dfs['condition_number'] = np.where(dfs['condition']=='mental', 100, 2)
dfs['condition_code'] = dfs['condition_number'] + dfs['trialNum']/2

fig = plt.figure(figsize=(12,6))
sns.set_context('poster')
sns.set_style('whitegrid', {'legend.frameon':False})
ax = sns.lineplot(x="timePlaced", 
                  y="normedIncrementalScore", 
                  hue='condition_code', 
                  units="subject_trial",
                  palette='coolwarm',
                  estimator=None, lw=0.7,
                  data=dfs[dfs['phase']=='build'])

#red is mental, blue is physical

In [None]:
# 'Scores over time across participant and structure, by condition'

fig = plt.figure(figsize=(12,6))
sns.set_context('poster')
sns.set_style('whitegrid', {'legend.frameon':False})
ax = sns.lineplot(x="numBlocks", 
                  y="normedIncrementalScore", 
                  hue='extendedCondition', 
                  estimator= np.mean,
                  palette='coolwarm',
                  lw=0.7,
                  data=dfs[dfs.phase == 'build']) #and numblocks=0

In [None]:
fig = plt.figure(figsize=(12,6))
sns.set_context('poster')
sns.set_style('whitegrid', {'legend.frameon':False})
ax = sns.lineplot(x="time_bin", 
                  y="normedIncrementalScore", 
                  hue='extendedCondition', 
                  estimator= np.mean,
                  palette='coolwarm',
                  lw=0.7,
                  data=dfs[dfs.phase == 'build'])

In [None]:
# 'Mean scores for each structure over time (binned), by condition and phase'

g = sns.FacetGrid(data=dfs, row="targetName", col="phase", height=4, aspect = 2)

sns.set_context('poster')
sns.set_style('whitegrid', {'legend.frameon':False})

g = g.map_dataframe(sns.lineplot, x="time_bin", 
                  y="normedIncrementalScore", 
                  hue='condition', 
                  estimator= np.mean,
                  palette='coolwarm',
                  lw=0.7).set_titles("{row_name}").add_legend()


In [None]:
g = sns.FacetGrid(data=dfs, row="targetName", col="phase", height=4, aspect = 2)
sns.set_context('poster')
sns.set_style('whitegrid')
g = g.map_dataframe(sns.lineplot,
                    x="numBlocks", 
                    y="normedIncrementalScore", 
                    hue='condition', 
                    palette='coolwarm',
                    lw=0.7).set_titles("{row_name}").add_legend()


In [None]:
# 'Individual scores for each attempt at each structure over time, by condition and phase'

g = sns.FacetGrid(data=dfs, row="targetName", col="phase", height=4, aspect = 2, legend_out = True)

sns.set_context('poster')
sns.set_style('whitegrid')
g = g.map_dataframe(sns.lineplot,
                        x="timePlaced", 
                        y="normedIncrementalScore", 
                        hue='condition', 
                        label='condition',
                        units="subject_trial",
                        estimator=None,
                        palette='coolwarm',
                        lw=0.7).set_titles("{row_name}").add_legend()


In [None]:
# 'Mean scores for each participant over time, by condition and phase'

g = sns.FacetGrid(data=dfs, row="gameID", col="phase", height=4, aspect = 2, legend_out = True)

sns.set_context('poster')
sns.set_style('whitegrid')
g = g.map_dataframe(sns.lineplot,
                        x="time_bin", 
                        y="normedIncrementalScore", 
                        hue='condition', 
                        estimator=np.mean,
                        palette='coolwarm',
                        lw=0.7).set_titles('subject').add_legend()


In [None]:
max_n_blocks = max(dfs['numBlocks'])
n_block_kinds = dfs['blockKind'].nunique()
blockKinds = np.sort(dfs['blockKind'].unique())
targets = np.sort(dfs['targetName'].unique())

b_map = dict(zip(blockKinds, range(0,n_block_kinds)))
print(b_map)
blockKind_blockNum = np.zeros([n_block_kinds,max_n_blocks])
fig, axs = plt.subplots(len(targets)+1,2, figsize=(12,20))

for ci, condition in enumerate(['mental','physical']):
    p = 0
    for target in targets:
        p += 1
        target_settled = dfs[(dfs.targetName == target) & (dfs.condition == condition)]

        for i in range(1, max_n_blocks):
            ith_blocks = target_settled[target_settled.numBlocks == i]['blockKind']
            for b in ith_blocks:
                blockKind_blockNum[b_map[b],i] += 1

        #normalize
        blockKind_blockNum = blockKind_blockNum/list(map((lambda x: max([1,x])),np.sum(blockKind_blockNum, axis=0)))

        axs[p,ci].matshow(blockKind_blockNum)
        
        axs[p,ci].set_title(target, fontsize=10)

for ax in axs.flat:
    ax.axis('off')

plt.get_cmap().set_bad(color='red')
plt.set_cmap('magma')

In [None]:
# difference?
# more similar at start, at end?

In [None]:
list()

In [None]:
#code that turned out not to be useful but keeping for reference if I want to do similar analyses

dfsmat = dfs[['gameID','blockKind','numBlocks','targetName','condition']]
dfsmatCounts = dfsmat.groupby(['condition','targetName','numBlocks','blockKind']).count()['gameID']
dfsmatTotals = dfsmat.groupby(['condition','targetName','numBlocks']).count()['blockKind']
dfsmatNormed = dfsmatCounts/dfsmatTotals
dfsmatNormed = dfsmatNormed.reset_index()

g = sns.FacetGrid(data=dfsmatNormed.unstack(level=0), 
                           row="targetName", col="condition", height=4, aspect = 2, legend_out = True)

sns.set_context('poster')
sns.set_style('whitegrid')
g = g.map_dataframe(sns.heatmap, 
                        vmin = 0,
                        vmax = 1,
                        palette='coolwarm').set_titles("{targetName}")


In [None]:
dfsmat = dfs[['gameID','blockKind','numBlocks','targetName','condition']]
dfsmatCounts = dfsmat.groupby(['condition','targetName','numBlocks','blockKind']).count()['gameID']
dfsmatTotals = dfsmat.groupby(['condition','targetName','numBlocks']).count()['blockKind']
dfsmatNormed = dfsmatCounts/dfsmatTotals