# Generate Social Metrics

Using our sampled files from [Data Resampling](1.DataResampling.ipynb) notebook, we generate proxemic disstances from all the participants in Mozilla Hubs in each of the four rooms.

## Using this Notebook, Code, or Data
This notebook and all of the resources included here are released on a [Mozilla Public License 2.0](https://www.mozilla.org/en-US/MPL/2.0/).  The data is released under [CC-BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/).  To cite the peper, the bespoke logging client, the dataset, or this notebook please see the [README.md](https://github.com/ayman/hubs-research-2021/blob/main/README.md) or the [DOI in the ACM Digital Library](https://doi.org/10.1145/3411764.3445729).

## Requirements

In [1]:
import pickle
import pandas as pd
import numpy as np
import scipy
import matplotlib as plt
from scipy import spatial

In [2]:
main_room = pd.read_csv('outputs/main_room_resampled_0.167.csv')
a_room = pd.read_csv('outputs/room_a_resampled_0.167.csv')
b_room = pd.read_csv('outputs/room_b_resampled_0.167.csv')
c_room = pd.read_csv('outputs/room_c_resampled_0.167.csv')

In [3]:
keynote_slice = main_room[('2020-04-29 14:30:00' <= main_room.frame_id)
                          & (main_room.frame_id < '2020-04-29 15:00:00')]
break_slice1 = main_room[('2020-04-29 15:05:00' <= main_room.frame_id)
                         & (main_room.frame_id < '2020-04-29 15:20:00')]

short_presentations = main_room[('2020-04-29 15:20:00' <= main_room.frame_id)
                                & (main_room.frame_id < '2020-04-29 16:10:00')]
break_slice2 = main_room[('2020-04-29 16:10:00' <= main_room.frame_id)
                         & (main_room.frame_id < '2020-04-29 16:30:00')]

def break_out_room_slice(df,
                         session1=['2020-04-29 16:30:00', '2020-04-29 17:00:00'],
                         session2=['2020-04-29 17:15:00', '2020-04-29 17:45:00']):
    slices = []
    for session in [session1, session2]:
        slices.append(df[(session[0] <= df.frame_id)
                         & (df.frame_id < session[1])])
    return pd.concat(slices)

a_room_slice = break_out_room_slice(a_room)
b_room_slice = break_out_room_slice(b_room)
c_room_slice = break_out_room_slice(c_room)

In [4]:
def prep_slice(my_slice):
    """Helper Function that sets up a DF for the metric table calculations"""
    # Setup indices for all the UUIDs in this data slice
    uuids = my_slice.uuid.sort_values().unique()
    uuids_ix = {uuid: ix for ix, uuid in enumerate(uuids)}

    # Setup indices for all the FRAMES in this data slice
    frames = my_slice.frame_id.sort_values().unique()
    frames_ix = {frame_id: ix for ix, frame_id in enumerate(frames)}

    by_frame = my_slice.sort_values("frame_id").groupby('frame_id')

    return uuids, uuids_ix, frames, frames_ix, by_frame

# uuids, uuids_ix, frames, frames_ix, by_frame = prep_slice(c_room_slice)
# print (len(uuids), len(frames))
# print (len(main_room.uuid.unique()))

In [5]:
# For each frame in the data slice, create a pairwise matrix of distances between each user
# This is stored in a dictionary where the key is the frame time stamp as a string in the distance matrix is XYZ position for each UUID

# Takes a DF of frames grouped by Frame ID, a list of unique UUIDs in this df, and a dict matching uuids to indices (int)
def distance_mat(by_frame, uuids, uuids_ix):

    # create empty dict, create dictionary where each frame_id (key) matches a matrix of pairwise distances (value)
    frame_dist_mat = {}

    for frame, group in by_frame:
        # Setup
        users_xyz = np.full((len(uuids), 3), np.nan)

        for row_index, row in group.iterrows():
            users_xyz[uuids_ix[row.uuid]] = [row.position_x,
                                             row.position_y,
                                             row.position_z]

        # Pairwise euclidian distance
        user_dist_mat = scipy.spatial.distance.pdist(users_xyz,
                                                     metric='euclidean')
        user_dist_mat = scipy.spatial.distance.squareform(user_dist_mat,
                                                          force='tomatrix',
                                                          checks=False)

        frame_dist_mat[frame] = user_dist_mat

    return frame_dist_mat

In [6]:
# For each frame in the data slice, create a pairwise matrix of angular distances between each user
# This angle is:  given a user A's position and direction, what is the relative position of user B in radians
# This is stored in a dictions where the key is the frame time stamp as a string 

def angular_distance(u, v):
    dif_xyz =   v[:3] -  u[:3]
    len_dif = np.linalg.norm(dif_xyz)
    dif_xyz = dif_xyz/len_dif
    # sin(40deg) = .64 (>.64 is out of FOV)
    d_angle = 1-np.sum(u[3:6] * dif_xyz)
    return d_angle
    

def angular_mat(by_frame, uuids, uuids_ix):
    
    frame_angle_mat = {}

    for frame, group in by_frame:
        # Setup
        users_direction_xyz = np.full((len(uuids), 6), np.nan)
        for row_index, row in group.iterrows():
            users_direction_xyz[uuids_ix[row.uuid]] = [row.position_x, row.position_y, row.position_z, row.direction_x, row.direction_y, row.direction_z]

        # Pairwise angular distance
        user_angle = scipy.spatial.distance.pdist(users_direction_xyz, angular_distance) 
        user_angle = scipy.spatial.distance.squareform(user_angle, force='tomatrix', checks=False)
        frame_angle_mat[frame] = user_angle
    
    return frame_angle_mat

In [7]:
def closest_dist(dist_mat, uuids, frames_ix):
    """Return closest standing person in each frame."""
    # Closest person
    # TODO Check whether 0.0 or nans should be used
    uuid_mins = np.full((len(uuids), len(dist_mat)), np.nan)

    for frame in dist_mat:

        for uuid in range(len(dist_mat[frame])):
            row = dist_mat[frame][uuid]
            closest = np.nanmin(row[np.nonzero(row)])

            if not np.isnan(closest):
                uuid_mins[uuid][frames_ix[frame]] = closest
#     print (uuid_mins)
    return uuid_mins

In [8]:
# For each frame, calculate the number of people at different social distances (Hall Proxemics)

# Intimate Distance .15 - .46 Meters
# Personal Distance (close) .46 - .76 Meters
# Personal Distance (far) .76 - 1.22 Meters
# Social Distance (close) 1.22 - 2.1 Meters
# Social Distance (far) 2.1 - 3.6 Meters
# Public Distance (close) 3.6 - 7.6 Meters
# Public Distance (far) 7.6 + Meters
def distance_counts(min_dist, max_dist, frame_dist, frames_ix, uuids):
    """Count number of people nearby in a given social distance. 
    Return as a 2d array distances[uuid_ix][frame_ix]"""
    count_array = np.full((len(uuids), len(frame_dist)), 0.0)
    # iterate through every frame (dictionary) in the distance matrix
    for frame in frame_dist:
        # For each UUID do a thing
        for uuid in range(len(frame_dist[frame])):
            row = frame_dist[frame][uuid]
            count = np.count_nonzero((row >= min_dist) & (row < max_dist))
            count_array[uuid][frames_ix[frame]] = count
    return count_array

In [9]:
# Helper function for looking at the ...
def print_zone_counts(uuids, frame_dist_mat, frames_ix):
    """Helper function for looking at the various intimate, 
    personal, social, and public distances."""
    uuid_count_intimate = distance_counts(.01, .46,
                                          frame_dist_mat,
                                          frames_ix,
                                          uuids)
    uuid_count_personal = distance_counts(.46,
                                          1.22,
                                          frame_dist_mat,
                                          frames_ix,
                                          uuids)
    uuid_count_social = distance_counts(1.22,
                                        3.6,
                                        frame_dist_mat,
                                        frames_ix,
                                        uuids)
    uuid_count_public = distance_counts(3.6,
                                        7.6,
                                        frame_dist_mat,
                                        frames_ix,
                                        uuids)
    uuid_count_public_far = distance_counts(7.6,
                                            100000000,
                                            frame_dist_mat,
                                            frames_ix,
                                            uuids)

    for distance in [uuid_count_intimate,
                     uuid_count_personal,
                     uuid_count_social,
                     uuid_count_public,
                     uuid_count_public_far]:
        print(np.mean(distance), np.median(distance), np.max(distance))

In [10]:
#  Calculate some matrices for the social metrics for proxemic analysis.

# The distance matrix is defined above in function distance_mat
# The angular distance is defined above in the angular mat
# The closest distance

# Keynote

def social_matrices(slc):
    uuids, uuids_ix, frames, frames_ix, by_frame = prep_slice(slc)
    matrices = {}
    matrices["dist_mat"] = distance_mat(by_frame, uuids, uuids_ix)
    matrices["angular_mat"] = angular_mat(by_frame, uuids, uuids_ix)
    matrices["mins_by_frame"] = closest_dist(matrices["dist_mat"], 
                                             uuids, 
                                             frames_ix)
    matrices["slice"] = slc
    return matrices

named_matrices = {}
for name, slc in [("keynote", keynote_slice), ("break_1", break_slice1),
                  ("a_room", a_room_slice),  ("b_room", b_room_slice),
                  ("c_room", c_room_slice), ("break_2", break_slice2)]:
    named_matrices[name] = social_matrices(slc)

  closest = np.nanmin(row[np.nonzero(row)])


In [11]:
# Example Metric - Visual Attention during a Keynote Talk
uuids, uuids_ix, frames, frames_ix, by_frame = prep_slice(keynote_slice)

# Angles to Keynote Speaker
speaker = "12a9924c-fbc4-4267-a26e-7c65af061f01"

time_in_view = np.full(
    (len(uuids), len(named_matrices["keynote"]["angular_mat"])), False)

for frame in named_matrices["keynote"]["angular_mat"]:
    for i in range(len(named_matrices["keynote"]["angular_mat"][frame])):
        row = named_matrices["keynote"]["angular_mat"][frame][i]

        if (row[uuids_ix[speaker]] < .64):
            time_in_view[i][frames_ix[frame]] = True
perc = []
# Percentage of time attending to Speaker
for uuid in range(len(time_in_view)):
    row = time_in_view[uuid]
    tot = np.sum(row) / len(row)
    if (tot > 0.05) & (tot != 1.0):
        perc.append(tot)
        
print (np.average(perc))        

0.5408637873754153


Pickle all the matrics for these slices for visualizing next.

In [13]:
with open("outputs/social_metrics.pkl", "wb") as f:
    pickle.dump(named_matrices, f, protocol=-1)

Next visit the <a href="3.VisualisingSocialMetrics.ipynb">Visualizing Social Metrics</a> notebook.