Analyses in this notebook are an attempt to use RSA to examine the similarity of scanpaths as a function of scene context, location, object set, etc. They didn't yield anything conclusive on a first go, but keeping them here as legacy and in case we should want to revisit this line of inquiry. 

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm
import seaborn as sns
import pandas as pd
import pickle
import imageio
import random 
from scipy.spatial import distance
from time import perf_counter
from scipy import stats
import sys
import visual_search_rsa
from matplotlib.ticker import FormatStrFormatter
from sklearn.metrics.pairwise import cosine_similarity
from mpl_toolkits.axes_grid1 import make_axes_locatable
from hpd import hpd_grid

UTILS_DIR = 'FC_geodesic/utils/distance_FC'
sys.path.insert(0, UTILS_DIR)
from distance_FC import distance_FC

pd.options.mode.chained_assignment = None

### Prelims.

In [3]:
## Set directories. 
data_direc = '/Users/angelaradulescu/Dropbox (Facebook)/VisualSearch/VisualSearchGazeData/'
equirect_direc = '/Users/angelaradulescu/Dropbox (Facebook)/VisualSearch/VisualSearchSpheres/'

## Load object locations. 
locations = pd.read_csv(data_direc + 'location_all_objects.csv')

## Load data (~1min for gaze). 
load_fixations = 0
if load_fixations: 
    gaze = pd.read_csv(data_direc + 'gaze_fixations.csv', index_col=False)
else: 
    gaze = pd.read_csv(data_direc + 'gaze_raw.csv', index_col=False)
    # Restrict to non-assisted trials.
    gaze = gaze[gaze['condition'] == 'No'].reset_index(drop=True)
    gaze.insert(7, column='room', value=np.nan)
    gaze.insert(8, column='location', value=np.nan)
    gaze.insert(9, column='trial_vp', value=np.nan)
    
## Mark scene room. 
kitchen = [i for i, s in enumerate(list(gaze['scene'].values)) if 'Kitchen' in s]
bathroom = [i for i, s in enumerate(list(gaze['scene'].values)) if 'Bathroom' in s]
studio = [i for i, s in enumerate(list(gaze['scene'].values)) if 'Studio' in s]
guest_bedroom = [i for i, s in enumerate(list(gaze['scene'].values)) if 'Guest Bedroom' in s]
bedroom = [i for i, s in enumerate(list(gaze['scene'].values)) if 'Bedroom' in s]
bedroom = np.setdiff1d(np.array(bedroom), np.array(guest_bedroom))
living_room = [i for i, s in enumerate(list(gaze['scene'].values)) if 'Living Room' in s]

gaze.loc[kitchen,'room'] = 'Kitchen'
gaze.loc[bathroom,'room'] = 'Bathroom'
gaze.loc[studio,'room'] = 'Studio'
gaze.loc[guest_bedroom,'room'] = 'Guest bedroom'
gaze.loc[bedroom,'room'] = 'Bedroom'
gaze.loc[living_room,'room'] = 'Living room'

## Mark location.
location_1 = [i for i, s in enumerate(list(gaze['scene'].values)) if 'Location 1' in s]
location_2 = [i for i, s in enumerate(list(gaze['scene'].values)) if 'Location 2' in s]
location_3 = [i for i, s in enumerate(list(gaze['scene'].values)) if 'Location 3' in s]
location_4 = [i for i, s in enumerate(list(gaze['scene'].values)) if 'Location 4' in s]
location_5 = [i for i, s in enumerate(list(gaze['scene'].values)) if 'Location 5' in s]

gaze.loc[location_1,'location'] = 1
gaze.loc[location_2,'location'] = 2
gaze.loc[location_3,'location'] = 3
gaze.loc[location_4,'location'] = 4
gaze.loc[location_5,'location'] = 5

## Mark trial within viewpoint (this defines the object set, and is useful for RSA).
trial_1 = [i for i, s in enumerate(list(gaze['scene'].values)) if 'Trial 1' in s]
trial_2 = [i for i, s in enumerate(list(gaze['scene'].values)) if 'Trial 2' in s]
trial_3 = [i for i, s in enumerate(list(gaze['scene'].values)) if 'Trial 3' in s]
trial_4 = [i for i, s in enumerate(list(gaze['scene'].values)) if 'Trial 4' in s]
trial_5 = [i for i, s in enumerate(list(gaze['scene'].values)) if 'Trial 5' in s]
trial_6 = [i for i, s in enumerate(list(gaze['scene'].values)) if 'Trial 6' in s]
trial_7 = [i for i, s in enumerate(list(gaze['scene'].values)) if 'Trial 7' in s]
trial_8 = [i for i, s in enumerate(list(gaze['scene'].values)) if 'Trial 8' in s]
trial_9 = [i for i, s in enumerate(list(gaze['scene'].values)) if 'Trial 9' in s]
trial_10 = [i for i, s in enumerate(list(gaze['scene'].values)) if 'Trial 10' in s]

gaze.loc[trial_1,'trial_vp'] = 1
gaze.loc[trial_2,'trial_vp'] = 2
gaze.loc[trial_3,'trial_vp'] = 3
gaze.loc[trial_4,'trial_vp'] = 4
gaze.loc[trial_5,'trial_vp'] = 5
gaze.loc[trial_6,'trial_vp'] = 6
gaze.loc[trial_7,'trial_vp'] = 7
gaze.loc[trial_8,'trial_vp'] = 8
gaze.loc[trial_9,'trial_vp'] = 9
gaze.loc[trial_10,'trial_vp'] = 10

## Load similarity.
shape_similarity_df = pd.read_csv(data_direc + 'shape_similarity_transformed.csv', index_col=False)
color_similarity_df = pd.read_csv(data_direc + 'color_similarity_transformed.csv', index_col=False)

## Get useful metadata. 
indexes = np.unique(gaze['participant'].values, return_index=True)[1]
all_participants = [gaze['participant'].values[index] for index in sorted(indexes)]
indexes = np.unique(gaze['scene'].values, return_index=True)[1]
all_scenes = [gaze['scene'].values[index] for index in sorted(indexes)]
n_participants = len(all_participants)
n_scenes = len(all_scenes)
unique_objects = shape_similarity_df.columns.values

### RSA for gaze scanpaths by scene context. 

In [4]:
scanpaths = []
room_label = []
location_label = []
trial_label = []
n_timesteps = [] 

rooms = np.unique(gaze['room'])

## Gather scanpaths by room, viewpoint, scene, and trial. 
for r in rooms: 
    
    gaze_room = gaze[gaze['room'] == r].reset_index(drop=True)
    room_locations = np.unique(gaze_room['location'])
    
    for l in room_locations:
        
        gaze_location = gaze_room[gaze_room['location'] == l].reset_index(drop=True)
        location_trials = np.unique(gaze_location['trial_vp'])
    
        for t in location_trials:

            gaze_trial = gaze_location[gaze_location['trial_vp'] == t].reset_index(drop=True)
            trial_participants = np.unique(gaze_trial['participant'].values)

            for p in trial_participants:

                gaze_episode = gaze_trial[gaze_trial['participant'] == p].reset_index(drop=True).dropna()

                # Remove times when participant was looking at the target. 
                # See Karl's solution for not removing samples when they may have moved on from the target. 
                gaze_before_found = gaze_episode[gaze_episode['object'] != gaze_episode['target']]
                # gaze_before_found = gaze_episode
                gaze_this_trial = gaze_before_found[['gaze_x_pos_pixel_360', 'gaze_y_pos_pixel_360']].values

                scanpaths.append(gaze_this_trial)
                room_label.append(r)
                location_label.append(l)
                trial_label.append(t)
                n_timesteps.append(gaze_this_trial.shape[0])
                
## Convert label lists to arrays.
room_label = np.array(room_label)
location_label = np.array(location_label)
trial_label = np.array(trial_label)

## Bin episodes by length.
n_timesteps = np.array(n_timesteps)
bins = np.arange(0,1100,100)
bin_idx = np.digitize(n_timesteps, bins=bins)
np.unique(bin_idx)
bin_idx.shape[0]
print(bins)        

[   0  100  200  300  400  500  600  700  800  900 1000]


In [None]:
## Collect similarity matrices.
sim_matrices = []
for s in np.arange(len(scanpaths)): 
    gaze = scanpaths[s]
    time_sim = cosine_similarity(gaze, dense_output=True)
    sim_matrices.append(time_sim)

In [None]:
## Caution: takes ~ 2 days to run! 
t_start = perf_counter()
p, rdm = visual_search_rsa.compute_average(sim_matrices, 'pearson')
t_stop = perf_counter() 

fig, ax = plt.subplots(1,1, figsize=(10,10))
ax.imshow(rdm, cmap='YlGnBu')

print("Elapsed time: ", t_stop-t_start) 

In [None]:
matrix_pairs = np.vstack(list(combinations(np.arange(len(sim_matrices)), 2)))

gaze_similarity = {"rdm": rdm, 
                   "matrix_pairs": matrix_pairs, 
                   "room_label": room_label, 
                   "location_label": location_label,
                   "trial_label": trial_label}

pickle_out = open("data/gaze_similarity.pickle","wb")
pickle.dump(gaze_similarity, pickle_out)
pickle_out.close()

In [None]:
gaze_similarity = pickle.load(open("data/gaze_similarity.pickle", "rb"))

In [None]:
bathroom_idx = np.where((gaze_similarity["room_label"] == 'Bathroom'))[0]
bedroom_idx = np.where((gaze_similarity["room_label"] == 'Bedroom'))[0]

bathroom_loc_idx = np.where((gaze_similarity["room_label"] == 'Bathroom') & 
                            (gaze_similarity["location_label"] == 5))[0]

bathroom_mask = np.zeros(gaze_similarity['rdm'].shape)
bathroom_mask[bathroom_idx[0]:bathroom_idx[-1],:][:,bathroom_idx[0]:bathroom_idx[-1]] = 1

bedroom_mask = np.zeros(gaze_similarity['rdm'].shape)
bedroom_mask[bedroom_idx[0]:bedroom_idx[-1],:][:,bedroom_idx[0]:bedroom_idx[-1]] = 1

fig, ax = plt.subplots(1,2, figsize=(20,10))
ax[0].imshow(gaze_similarity["rdm"], cmap='YlGnBu', vmin=0, vmax=0.5)
ax[0].imshow(bathroom_mask, cmap='binary', alpha=0.2)
ax[0].imshow(bedroom_mask, cmap='binary', alpha=0.2)
ax[0].set_xticks([]); 
ax[0].set_yticks([]);
ax[1].imshow(bathroom_mask, cmap='Reds', alpha=0.5)
ax[1].imshow(bedroom_mask, cmap='Blues', alpha=0.5);
ax[1].set_xticks([]); 
ax[1].set_yticks([]);

In [None]:
gaze_similarity["matrix_pairs"]
gaze_similarity["location_label"]
gaze_similarity["room_label"]

d = {'scanpath_idx': np.arange(gaze_similarity['rdm'].shape[0]),
     'room': gaze_similarity["room_label"],
     'location': gaze_similarity["location_label"], 
     'trial': gaze_similarity["trial_label"]
      }
df = pd.DataFrame(d)
df.head(10)

### Gaze similarity by location.

In [None]:
room = 'Kitchen'

fig, ax = plt.subplots(1,5, figsize=(18,4))
for i in np.arange(5): 
    idx = np.where((gaze_similarity["room_label"] == room) & (gaze_similarity["location_label"] == i+1))[0]
    rdm_partial = gaze_similarity['rdm'][idx,:][:,idx]
    ax[i].imshow(rdm_partial, cmap='YlGnBu', vmin=0, vmax=0.5)
    
fig, ax = plt.subplots(1,5, figsize=(18,4))
for i in np.arange(5): 
    idx = np.where((gaze_similarity["room_label"] == room) & (gaze_similarity["location_label"] == i+1))[0]
    
    n_scrambles = gaze_similarity['rdm'][idx,:][:,idx].shape[0]
    idx_room = np.where((gaze_similarity["room_label"] == room))[0]
    idx_scrambled = np.random.choice(idx_room, n_scrambles)
    rdm_scrambled = gaze_similarity['rdm'][idx_scrambled,:][:,idx_scrambled]
        
    ax[i].imshow(rdm_scrambled, cmap='YlGnBu', vmin=0, vmax=0.5)

In [None]:
fig, ax = plt.subplots(5, 1, figsize = (4,9))

for l in np.arange(5):

    idx = np.where((gaze_similarity["room_label"] == room) & (gaze_similarity["location_label"] == l+1))[0]
    rdm_partial = gaze_similarity['rdm'][idx,:][:,idx]
    mean_partial = np.mean(np.triu(rdm_partial))
    n_scrambles = rdm_partial.shape[0]
    idx_room = np.where((gaze_similarity["room_label"] == room))[0]

    n_boot = 1000

    mean_null = []
    for n in np.arange(n_boot): 
        idx_scrambled = np.random.choice(idx_room, n_scrambles)
        rdm_scrambled = gaze_similarity['rdm'][idx_scrambled,:][:,idx_scrambled]
        mean_null.append(np.mean(np.triu(rdm_scrambled)))

    mean_null = np.hstack(mean_null)

    ax[l].hist(mean_null, color='silver')
    ax[l].axvline(x=mean_partial, color='red', linewidth=3)
    
    ## high density intervals
    hpd_mu, x_mu, y_mu, modes_mu = hpd_grid(mean_null)
    
    for (x0, x1) in hpd_mu:
        ax[l].hlines(y=0, xmin=x0, xmax=x1, linewidth=5)
        ax[l].axvline(x=x0, color='k', linestyle='--', linewidth=3)
        ax[l].axvline(x=x1, color='k', linestyle='--', linewidth=3)
    
    ax[l].set_xticks([]); 
    ax[l].set_yticks([]);

### Gaze similarity by room.

In [None]:
bathroom_idx = np.where((gaze_similarity["room_label"] == 'Bathroom'))[0]
bedroom_idx = np.where((gaze_similarity["room_label"] == 'Bedroom'))[0]
guest_idx = np.where((gaze_similarity["room_label"] == 'Guest bedroom'))[0]
kitchen_idx = np.where((gaze_similarity["room_label"] == 'Kitchen'))[0]
living_idx = np.where((gaze_similarity["room_label"] == 'Living room'))[0]
studio_idx = np.where((gaze_similarity["room_label"] == 'Studio'))[0]
all_idx = np.arange(gaze_similarity['rdm'].shape[0])
not_bathroom_idx = np.setdiff1d(all_idx, bathroom_idx)

rooms = np.unique(gaze_similarity["room_label"])
print(rooms)
r = rooms[0]

idx = np.where(gaze_similarity["room_label"] == r)[0]
rdm_partial = gaze_similarity['rdm'][bathroom_idx,:]

fig, ax = plt.subplots(1,1, figsize=(18,4))
ax.imshow(rdm_partial, cmap='YlGnBu', vmin=0, vmax=0.5)
ax.hlines(y=0, xmin=bedroom_idx[0], xmax=bedroom_idx[-1], linewidth=18, color='r');
ax.hlines(y=0, xmin=guest_idx[0], xmax=guest_idx[-1], linewidth=18, color='g');
ax.hlines(y=0, xmin=kitchen_idx[0], xmax=kitchen_idx[-1], linewidth=18, color='y');
ax.hlines(y=0, xmin=living_idx[0], xmax=living_idx[-1], linewidth=18, color='c');
ax.hlines(y=0, xmin=studio_idx[0], xmax=studio_idx[-1], linewidth=18, color='b');

rdm_bathroom_self = gaze_similarity['rdm'][bathroom_idx,:][:,bathroom_idx]
rdm_bathroom_bedroom = gaze_similarity['rdm'][bathroom_idx,:][:,bedroom_idx]
# rdm_bathroom_

avg_bathroom_self = np.sum(np.triu(rdm_bathroom_self).flatten())/(np.triu(rdm_bathroom_self).flatten().shape[0] - rdm_bathroom_self.shape[0])
avg_bathroom_bedroom = np.mean(rdm_bathroom_bedroom.flatten())
print(avg_bathroom_self)
print(avg_bathroom_bedroom)

fig, ax = plt.subplots(1,1, figsize=(6,4))
a = np.triu(rdm_bathroom_self).flatten()
ax.hist(rdm_bathroom_bedroom.flatten())
ax.hist(a[np.nonzero(a)]);

In [None]:
rooms = np.unique(gaze_similarity["room_label"])
print(rooms)
fig, ax = plt.subplots(1,6, figsize=(18,4))
for i in np.arange(6): 
    idx = np.where(gaze_similarity["room_label"] == rooms[i])[0]
    rdm_partial = gaze_similarity['rdm'][idx,:][:,idx]
    ax[i].imshow(rdm_partial, cmap='YlGnBu', vmin=0, vmax=0.5)
    
fig, ax = plt.subplots(1,6, figsize=(18,4))
for i in np.arange(6): 
    
    idx = np.where(gaze_similarity["room_label"] == rooms[i])[0]
    n_scrambles = gaze_similarity['rdm'][idx,:][:,idx].shape[0]
    
    idx_all = np.arange(gaze_similarity['rdm'].shape[0])
    idx_scrambled = np.random.choice(idx_all, n_scrambles)
    rdm_scrambled = gaze_similarity['rdm'][idx_scrambled,:][:,idx_scrambled]
    
    ax[i].imshow(rdm_scrambled, cmap='YlGnBu', vmin=0, vmax=0.5)

In [None]:
fig, ax = plt.subplots(6, 1, figsize = (4,4))

for r in np.arange(6):
    
    idx = np.where(gaze_similarity["room_label"] == rooms[r])[0]
    idx_all = np.arange(gaze_similarity['rdm'].shape[0])

    rdm_partial = gaze_similarity['rdm'][idx,:][:,idx]
    mean_partial = np.mean(np.triu(rdm_partial))
    n_scrambles = rdm_partial.shape[0]

    n_boot = 1000

    mean_null = []
    for n in np.arange(n_boot): 
        idx_scrambled = np.random.choice(idx_all, n_scrambles)
        rdm_scrambled = gaze_similarity['rdm'][idx_scrambled,:][:,idx_scrambled]
        mean_null.append(np.mean(np.triu(rdm_scrambled)))

    mean_partial = np.mean(np.triu(rdm_partial))
    mean_null = np.hstack(mean_null)

    ax[r].hist(mean_null, color='silver')
    ax[r].axvline(x=mean_partial, color='red', linewidth=3)
    
    ## high density intervals
    hpd_mu, x_mu, y_mu, modes_mu = hpd_grid(mean_null)
    
    for (x0, x1) in hpd_mu:
        ax[r].hlines(y=0, xmin=x0, xmax=x1, linewidth=5)
        ax[r].axvline(x=x0, color='k', linestyle='--', linewidth=3)
        ax[r].axvline(x=x1, color='k', linestyle='--', linewidth=3)
    
    ax[r].set_xticks([]); 
    ax[r].set_yticks([]);

### Gaze similarity by object set.

In [None]:
room = 'Kitchen'
location = 1
idx_room_loc = np.where((gaze_similarity["room_label"] == room) & (gaze_similarity["location_label"] == location))[0]
    
fig, ax = plt.subplots(1,10, figsize=(18,4))
for i in np.arange(10): 
    idx = np.where((gaze_similarity["room_label"] == room) & (gaze_similarity["location_label"] == location) & (gaze_similarity["trial_label"] == i+1))[0]
    rdm_partial = gaze_similarity['rdm'][idx,:][:,idx]
    ax[i].imshow(rdm_partial, cmap='YlGnBu', vmin=0, vmax=0.5)
    
fig, ax = plt.subplots(1,10, figsize=(18,4))
for i in np.arange(10): 
    idx = np.where((gaze_similarity["room_label"] == room) & (gaze_similarity["location_label"] == location) & (gaze_similarity["trial_label"] == i+1))[0]
    
    n_scrambles = gaze_similarity['rdm'][idx,:][:,idx].shape[0]
    idx_scrambled = np.random.choice(idx_room_loc, n_scrambles)
    rdm_scrambled = gaze_similarity['rdm'][idx_scrambled,:][:,idx_scrambled]
        
    ax[i].imshow(rdm_scrambled, cmap='YlGnBu', vmin=0, vmax=0.5)

In [None]:
n_boot = 10000
fig, ax = plt.subplots(10, 1, figsize = (4,18))

for t in np.arange(10): 

    idx = np.where((gaze_similarity["room_label"] == room) & (gaze_similarity["location_label"] == location) & (gaze_similarity["trial_label"] == t+1))[0]
    rdm_partial = gaze_similarity['rdm'][idx,:][:,idx]
    mean_partial = np.mean(np.triu(rdm_partial))
    n_scrambles = rdm_partial.shape[0]
        
    mean_null = []
    for n in np.arange(n_boot): 
        idx_scrambled = np.random.choice(idx_room_loc, n_scrambles)
        rdm_scrambled = gaze_similarity['rdm'][idx_scrambled,:][:,idx_scrambled]
        mean_null.append(np.mean(np.triu(rdm_scrambled)))

    mean_null = np.hstack(mean_null)

    ax[t].hist(mean_null, color='silver')
    ax[t].axvline(x=mean_partial, color='red', linewidth=5)
    
    ## high density intervals
    hpd_mu, x_mu, y_mu, modes_mu = hpd_grid(mean_null)
    
    for (x0, x1) in hpd_mu:
        ax[t].hlines(y=0, xmin=x0, xmax=x1, linewidth=5)
        ax[t].axvline(x=x0, color='k', linestyle='--', linewidth=3)
        ax[t].axvline(x=x1, color='k', linestyle='--', linewidth=3)
    
    ax[t].set_xticks([]); 
    ax[t].set_yticks([]);
    # ax[t].set_xlim(mean_partial-0.05,None);