In [1]:
import os
import sys
import urllib, io
os.getcwd()
sys.path.append("..")
sys.path.append("../utils")
proj_dir = os.path.abspath('../..')

## add helpers to python path
if os.path.join(proj_dir,'stimuli') not in sys.path:
    sys.path.append(os.path.join(proj_dir,'stimuli'))

import numpy as np
import scipy.stats as stats
import pandas as pd
from scipy.spatial import distance
from scipy import ndimage
from scipy.stats import entropy
from random import random
from sklearn.cluster import SpectralBiclustering
import itertools

import pymongo as pm
from collections import Counter
import json
import re
import ast

from PIL import Image, ImageOps, ImageDraw, ImageFont 

from io import BytesIO
import base64

import  matplotlib
from matplotlib import pylab, mlab, pyplot
%matplotlib inline
from IPython.core.pylabtools import figsize, getfigs
plt = pyplot
import matplotlib as mpl
mpl.rcParams['pdf.fonttype'] = 42
from matplotlib import colors

import seaborn as sns
sns.set_context('talk')
sns.set_style('darkgrid')

from IPython.display import clear_output

import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", message="numpy.dtype size changed")
warnings.filterwarnings("ignore", message="numpy.ufunc size changed")

import plotly
import plotly.graph_objects as go
import plotly.io as pio
pio.orca.config.use_xvfb = True
plotly.io.orca.config.save()

import importlib
import graph as g

In [2]:
## directory & file hierarchy
proj_dir = os.path.abspath('../..')
datavol_dir = os.path.join(proj_dir,'data')
analysis_dir = os.path.abspath(os.path.join(os.getcwd(),'..'))
results_dir = os.path.join(analysis_dir,'results')
stim_dir = os.path.join(proj_dir,'stimuli')
plot_dir = os.path.join(results_dir,'plots')
csv_dir = os.path.join(results_dir,'csv')
json_dir = os.path.join(results_dir,'json')
exp_dir = os.path.abspath(os.path.join(proj_dir,'experiments'))
png_dir = os.path.abspath(os.path.join(datavol_dir,'png'))
jefan_dir = os.path.join(analysis_dir,'jefan')
will_dir = os.path.join(analysis_dir,'will')
    
if not os.path.exists(results_dir):
    os.makedirs(results_dir)
    
if not os.path.exists(plot_dir):
    os.makedirs(plot_dir)   
    
if not os.path.exists(csv_dir):
    os.makedirs(csv_dir)       

### load in data

In [3]:
iteration_name = 'Exp2Pilot3'
num_trials = 24 #for sanity checks

In [4]:
# Data already compiled into dataframes in CogSci 2020 Dataframe Generator

# trial_end data
trial_path = os.path.join(csv_dir,'block_silhouette_{}_good.csv'.format(iteration_name))
df = pd.read_csv(trial_path)

# # initial_block data
initial_path = os.path.join(csv_dir,'block_silhouette_initial_{}_good.csv'.format(iteration_name))
dfi = pd.read_csv(initial_path)

# # settled_block data
settled_path = os.path.join(csv_dir,'block_silhouette_settled_{}_good.csv'.format(iteration_name))
dfs = pd.read_csv(settled_path)

# # Sanity Check- same participants in each dataset.
df_participants = df.gameID.unique()
dfs_participants = dfs.gameID.unique()
assert Counter(df_participants) == Counter(dfs_participants)

n_before_outliers = len(df_participants)
print(str(n_before_outliers) + ' participants total')

49 participants total


### apply preprocessing

In [5]:
targets = np.sort(df['targetName'].unique())
ppts = np.sort(df['gameID'].unique())
reps = np.sort(df['repetition'].unique())

dfi['usableDiscreteWorld'] = dfi['discreteWorld'].apply(lambda a: 1+(-1)*np.array(ast.literal_eval(a)))
dfi['flatDiscreteWorld'] = dfi['discreteWorld'].apply(lambda a: (1+(-1)*np.array(ast.literal_eval(a))).flatten())

dfic = dfi.copy()
#dfic = dfi[dfi.condition=='repeated']
dfic = dfic[['targetName','gameID','blockNum','repetition','phase_extended','flatDiscreteWorld','usableDiscreteWorld','rawF1DiscreteScore']]
dfic['discreteWorld'] = dfic['usableDiscreteWorld']
dfic['flatDiscreteWorldStr'] = dfic['flatDiscreteWorld'].apply(g.convert_to_str)

max_actions = dfic['blockNum'].max()

### make trajectory graph

In [61]:
importlib.reload(g) ## reimport graph utils
make_plot = False
if make_plot:
    phases = ['pre', 'post']
    for this_target in targets[:1]:
        for this_phase in phases:
            g.plot_trajectory_graph(data = dfic, 
                                    target = this_target, 
                                    phase = this_phase, 
                                    save=True, 
                                    out_dir = plot_dir,
                                    extension = 'test',
                                    x_lower_bound = 4,
                                    x_upper_bound = 13,
                                    edge_width_scale_factor = 0.8,
                                    node_size_scale_factor = 0.8)

### analyze entropy over world states

In [50]:
importlib.reload(g)
H = dict()
P = dict()
phases = ['pre', 'post']
for target in targets:
    H[target] = dict()
    P[target] = dict()
    for phase in phases:
        print('Calculating entropy for {} {}'.format(target, phase))
        h,p = g.get_entropy_over_states(data=dfic, target=target, phase=phase)
        H[target][phase] = h
        P[target][phase] = p
        clear_output(wait=True)
print('Done!')

Done!


In [73]:
H2 = pd.DataFrame(H).transpose().reset_index()
H2['diff'] = H2['post']-H2['pre']
import scipy
scipy.stats.ttest_1samp(H2['diff'].values,0)

Ttest_1sampResult(statistic=-1.9262744242865506, pvalue=0.09544431106740073)

In [79]:
for target in targets:
    PRE = set(W[target]['pre'].keys())
    POST = set(W[target]['post'].keys())
    xsect = PRE.intersection(POST)
    print('Target: {} | Pre: {} | Post: {} | Intersection: {}'.format(target, len(PRE),len(POST),len(xsect)))

Target: hand_selected_004 | Pre: 282 | Post: 253 | Intersection: 74
Target: hand_selected_005 | Pre: 156 | Post: 176 | Intersection: 48
Target: hand_selected_006 | Pre: 214 | Post: 249 | Intersection: 52
Target: hand_selected_008 | Pre: 214 | Post: 214 | Intersection: 57
Target: hand_selected_009 | Pre: 252 | Post: 193 | Intersection: 68
Target: hand_selected_011 | Pre: 182 | Post: 226 | Intersection: 62
Target: hand_selected_012 | Pre: 264 | Post: 261 | Intersection: 80
Target: hand_selected_016 | Pre: 299 | Post: 281 | Intersection: 83


In [83]:
overlap = list(xsect)

In [88]:
np.sum([P[target]['post'][o] for o in overlap])

0.5876685934489403

In [89]:
phase

'post'

In [90]:
np.sum([P[target]['pre'][o] for o in overlap])

0.46495327102803735

In [None]:
a = data[(data.targetName==target) & (data.phase_extended==phase)]

49

In [152]:
from graph import *
random_seed = 2
data = dfic

# this is what an empty world looks like (does not count towards entropy)
empty_world = '[0 0 0 0 0 0 0 0 0 0 0 0 0][0 0 0 0 0 0 0 0 0 0 0 0 0][0 0 0 0 0 0 0 0 0 0 0 0 0][0 0 0 0 0 0 0 0 0 0 0 0 0][0 0 0 0 0 0 0 0 0 0 0 0 0][0 0 0 0 0 0 0 0 0 0 0 0 0][0 0 0 0 0 0 0 0 0 0 0 0 0][0 0 0 0 0 0 0 0 0 0 0 0 0][0 0 0 0 0 0 0 0 0 0 0 0 0][0 0 0 0 0 0 0 0 0 0 0 0 0][0 0 0 0 0 0 0 0 0 0 0 0 0][0 0 0 0 0 0 0 0 0 0 0 0 0][0 0 0 0 0 0 0 0 0 0 0 0 0][0 0 0 0 0 0 0 0 0 0 0 0 0][0 0 0 0 0 0 0 0 0 0 0 0 0][0 0 0 0 0 0 0 0 0 0 0 0 0][0 0 0 0 0 0 0 0 0 0 0 0 0][0 0 0 0 0 0 0 0 0 0 0 0 0]' 

# Create graph
t = GenericBuildGraph() # make new tree

## get list of bootstrap resampled game ID's
boot_games = np.random.RandomState(random_seed).choice(data['gameID'].unique(),
                                              size=len(data['gameID'].unique()),replace=True)

## get bootstrapped version of dataframe with current bootstrap sample of gameIDs
B = pd.DataFrame()
for g, boot_game in enumerate(boot_games):
    boot_a = data[(data['gameID']==boot_game) & (data.targetName==target) & (data.phase_extended==phase)]
    B = pd.concat([B,boot_a],axis=0)

a = B.groupby('gameID')
a.apply(lambda g: t.add_build_path(g))

# create dictionary of world strings to num visits
W = dict()
for i, (k1, layer) in enumerate(t.world_layers.items()):
    for j, (k2, node) in enumerate(layer.nodes.items()):
        if k2 != empty_world:
            W[k2]=node.visits

# convert counts to probabilities
P = convert_to_prob_dist(W)

# calculate entropy over probability dist
h = entropy(list(P.values()))

print(h)

4.775621871881723


In [153]:
H[target][phase]

5.158970737611774

In [120]:
# from collections import Counter
# x = _a['gameID'].unique().sample(n=len(_a), random_state=0, replace=True)

In [135]:
target

'hand_selected_016'

In [136]:
phase

'post'

5.158970737611774

32

In [132]:
B

Unnamed: 0,targetName,gameID,blockNum,repetition,phase_extended,flatDiscreteWorld,usableDiscreteWorld,rawF1DiscreteScore,discreteWorld,flatDiscreteWorldStr
9731,hand_selected_016,9415-40177ff4-0b4d-494c-96b4-3409ff007cdb,1,3,post,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, ...",0.307692,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, ...",0000000000000000000000000000000000000000000000...
9732,hand_selected_016,9415-40177ff4-0b4d-494c-96b4-3409ff007cdb,2,3,post,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, ...",0.400000,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, ...",0000000000000000000000000000000000000000000000...
9733,hand_selected_016,9415-40177ff4-0b4d-494c-96b4-3409ff007cdb,3,3,post,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, ...",0.558824,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, ...",0000000000000000000000000000000000000000000000...
9734,hand_selected_016,9415-40177ff4-0b4d-494c-96b4-3409ff007cdb,4,3,post,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, ...",0.600000,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, ...",0000000000000000000000000000000000000000000000...
9735,hand_selected_016,9415-40177ff4-0b4d-494c-96b4-3409ff007cdb,5,3,post,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, ...",0.611111,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, ...",0000000000000000000000000000000000000000000000...
9736,hand_selected_016,9415-40177ff4-0b4d-494c-96b4-3409ff007cdb,6,3,post,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, ...",0.648649,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, ...",0000000000000000000000000000000000000000000000...
9737,hand_selected_016,9415-40177ff4-0b4d-494c-96b4-3409ff007cdb,7,3,post,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, ...",0.780488,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, ...",0000000000000000000000000000000000000000000000...
9738,hand_selected_016,9415-40177ff4-0b4d-494c-96b4-3409ff007cdb,8,3,post,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, ...",0.790698,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, ...",0000000000000000000000000000000000000000000000...
10447,hand_selected_016,9972-0695b95b-896a-4ade-9999-92ab0fe4584a,1,3,post,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, ...",0.166667,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, ...",0000000000000000000000000000000000000000000000...
10448,hand_selected_016,9972-0695b95b-896a-4ade-9999-92ab0fe4584a,2,3,post,"[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...","[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, ...",0.240000,"[[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, ...",0000000000000000000000000000000000000000000000...
