# Generate Social Metrics

Using our sampled files from [Data Resampling](1.DataResampling.ipynb) notebook, 
we generate proxemic distances from all the participants in Mozilla Hubs in each 
of the four rooms.

## Using this Notebook, Code, or Data
This notebook and all of the resources included here are released on 
a [Mozilla Public License 2.0](https://www.mozilla.org/en-US/MPL/2.0/).  
The data is released under [CC-BY-NC-SA 4.0](https://creativecommons.org/licenses/by-nc-sa/4.0/). 
To cite the paper, the bespoke logging client, the dataset, or this 
notebook please see the [README.md](https://github.com/ayman/hubs-research-2021/blob/main/README.md) 
or the [DOI in the ACM Digital Library](https://doi.org/10.1145/3411764.3445729).

## Requirements

In [1]:
import pickle
import pandas as pd
import numpy as np
import scipy
import matplotlib as plt
from scipy import spatial

In [2]:
group_1 = pd.read_csv('outputs/group1_resampled_30.csv')
group_2 = pd.read_csv('outputs/group2_resampled_30.csv')
group_3 = pd.read_csv('outputs/group3_resampled_30.csv')
group_4 = pd.read_csv('outputs/group4_resampled_30.csv')

# TODO Text on how small/big groups done

In [3]:
def group_slice(df, t1, t2, uuids):
    dft = df[(t1 <= df.frame_id) & (df.frame_id < t2)]
    dfs = dft[dft.uuid.isin(uuids)]
    return dfs

# Small Group Discussions
group1_1 = group_slice(group_1, '2021-08-10 15:08:51', '2021-08-10 15:15:10', ["135e32be-269f-4a2a-ad76-d2ce80532fb5", "92b6a91e-977b-4538-8312-318de6ad5bbb", "e87375fe-5573-4395-8235-c6151f5cc72f"]) 
group1_2 = group_slice(group_1, '2021-08-10 15:08:51', '2021-08-10 15:20:00', ["1734c592-ac64-402f-9e7a-084f041f0d2d", "d04bfb9a-ce5e-44b4-b9a6-4025e72fe087", "7e934b3e-3b35-4cab-b70e-b68bec2f6409"])

group2_1 = group_slice(group_2, '2021-08-11 11:17:45', '2021-08-11 11:30:00', ["c2231c4a-9606-41c3-ab4b-dd6ac1c17bdc", "8dc86cda-b789-4340-85aa-c22fd19ff729" , "e7d9f358-d6ee-47e6-80eb-65dca3e2e848"])
group2_2 = group_slice(group_2, '2021-08-11 11:18:30', '2021-08-11 11:30:00', ["7c3c1d25-f5e9-4568-8198-137858a491ad" , "a05a754f-f283-40aa-8610-37524bfba3f8", "7ed8e182-2292-461b-82c0-b631b07ed161"])

group3_1 = group_slice(group_3, '2021-08-12 09:10:11', '2021-08-12 09:29:30', ["3263ab7c-14a1-43a4-83d8-88e95dda2141", "465d3391-7d54-4f0b-aabf-349a74e40832", "e977a5f0-55b8-4b91-8621-2fdca1c4ea8d"])
group3_2 = group_slice(group_3, '2021-08-12 09:18:00', '2021-08-12 09:29:30', ["8a1dcfe7-05b3-4b70-bca3-9f97a245721d", "1089c2ae-d956-4fc6-82f2-f5efaff5f17a", "972550a1-fdc8-4257-b608-0e1c1c1595c2"])

group4_1 = group_slice(group_4, '2021-08-12 16:09:30', '2021-08-12 16:20:00', ["9cca61d7-058a-4302-8635-6fa9e3c92c79", "adaefcd7-7ea6-42df-b218-0946f68201dc", "dd8d4f2a-396d-46f2-b8c4-ddd3ea7a5303"])
group4_2 = group_slice(group_4, '2021-08-12 16:10:47', '2021-08-12 16:20:00', ["2b877303-fda6-4d18-8c52-a30fe4db479c", "c61f6fbe-9b43-4f17-b3f4-080e82a9d4b5",  "550e7d0a-69b7-4107-9fd8-9af2d32ae7a8"])

# Large Group Discussions
group_1d = group_1[('2021-08-10 15:20:15' >= group_1.frame_id)& (group_1.frame_id < '2021-08-10 15:44:00')]
group_2d = group_2[('2021-08-11 11:30:20' >= group_2.frame_id)& (group_2.frame_id < '2021-08-11 11:46:00')]
group_3d = group_3[('2021-08-12 09:29:45' >= group_3.frame_id)& (group_3.frame_id < '2021-08-12 09:49:00')]
group_4d = group_4[('2021-08-12 16:20:20' >= group_4.frame_id)& (group_4.frame_id < '2021-08-12 16:42:30')]


In [4]:
def prep_slice(my_slice):
    """Helper Function that sets up a DF for the metric table calculations"""
    # Setup indices for all the UUIDs in this data slice
    uuids = my_slice.uuid.sort_values().unique()
    uuids_ix = {uuid: ix for ix, uuid in enumerate(uuids)}

    # Setup indices for all the FRAMES in this data slice
    frames = my_slice.frame_id.sort_values().unique()
    frames_ix = {frame_id: ix for ix, frame_id in enumerate(frames)}

    by_frame = my_slice.sort_values("frame_id").groupby('frame_id')

    return uuids, uuids_ix, frames, frames_ix, by_frame


In [5]:
# For each frame in the data slice, create a pairwise matrix of
# distances between each user

# This is stored in a dictionary where the key is the frame time stamp
# as a string in the distance matrix is XYZ position for each UUID

# Takes a DF of frames grouped by Frame ID, a list of unique UUIDs in
# this df, and a dict matching uuids to indices (int)
def distance_mat(by_frame, uuids, uuids_ix):

    # create empty dict, create dictionary where each frame_id (key)
    # matches a matrix of pairwise distances (value)
    frame_dist_mat = {}

    for frame, group in by_frame:
        # Setup
        users_xyz = np.full((len(uuids), 3), np.nan)

        for row_index, row in group.iterrows():
            users_xyz[uuids_ix[row.uuid]] = [row.position_x,
                                             row.position_y,
                                             row.position_z]

        # Pairwise euclidian distance
        user_dist_mat = scipy.spatial.distance.pdist(users_xyz,
                                                     metric='euclidean')
        user_dist_mat = scipy.spatial.distance.squareform(user_dist_mat,
                                                          force='tomatrix',
                                                          checks=False)

        frame_dist_mat[frame] = user_dist_mat

    return frame_dist_mat

In [6]:
# For each frame in the data slice, create a pairwise matrix of
# angular distances between each user

# This angle is: given a user A's position and direction, what is the
# relative position of user B in radians

# This is stored in a dictions where the key is the frame time stamp
# as a string

def angular_distance(u, v):
    dif_xyz =   v[:3] -  u[:3]
    len_dif = np.linalg.norm(dif_xyz)
    dif_xyz = dif_xyz/len_dif
    # sin(40deg) = .64 (>.64 is out of FOV)
    d_angle = 1-np.sum(u[3:6] * dif_xyz)
    return d_angle
    

def angle_distance(u, v):
    dif_xyz =   v[:3] -  u[:3]
    len_dif = np.linalg.norm(dif_xyz)
    dif_xyz = dif_xyz/len_dif
    # angle in XZ plane of u->v
    angle_person = np.arctan2(dif_xyz[2], dif_xyz[0])
    # angle of U in world coordinates
    angle_view = np.arctan2(u[5], u[3])
    
    # sin(40deg) = .64 (>.64 is out of FOV)
    #d_angle = 1-np.sum(u[3:6] * dif_xyz)
    return angle_person - angle_view
    
    
def angular_mat(by_frame, uuids, uuids_ix):
    frame_angle_mat = {}

    for frame, group in by_frame:
        # Setup
        users_direction_xyz = np.full((len(uuids), 6), np.nan)
        for row_index, row in group.iterrows():
            users_direction_xyz[uuids_ix[row.uuid]] = [row.position_x,
                                                       row.position_y,
                                                       row.position_z,
                                                       row.direction_x,
                                                       row.direction_y,
                                                       row.direction_z]

        user_angle = np.zeros((len(uuids), len(uuids)))
        for i in range(len(uuids)):
            for j in range(len(uuids)):
                user_angle[i,j] = angle_distance(users_direction_xyz[i], users_direction_xyz[j])
        frame_angle_mat[frame] = user_angle
    
    return frame_angle_mat


In [7]:
def closest_dist(dist_mat, uuids, frames_ix):
    """Return closest standing person in each frame."""
    # Closest person
    # Nan values are not included
    uuid_mins = np.full((len(uuids), len(dist_mat)), np.nan)

    for frame in dist_mat:
        for uuid in range(len(dist_mat[frame])):
            row = dist_mat[frame][uuid]
            closest = np.nanmin(row[np.nonzero(row)])

            if not np.isnan(closest):
                uuid_mins[uuid][frames_ix[frame]] = closest

    return uuid_mins

In [8]:
# For each frame, calculate the number of people at different social
# distances (Hall Proxemics)

# Intimate Distance .15 - .46 Meters
# Personal Distance (close) .46 - .76 Meters
# Personal Distance (far) .76 - 1.22 Meters
# Social Distance (close) 1.22 - 2.1 Meters
# Social Distance (far) 2.1 - 3.6 Meters
# Public Distance (close) 3.6 - 7.6 Meters
# Public Distance (far) 7.6 + Meters

def distance_counts(min_dist, max_dist, frame_dist, frames_ix, uuids):
    """Count number of people nearby in a given social distance. 
    Return as a 2d array distances[uuid_ix][frame_ix]"""
    count_array = np.full((len(uuids), len(frame_dist)), 0.0)
    # iterate through every frame (dictionary) in the distance matrix
    for frame in frame_dist:
        # For each UUID do a thing
        for uuid in range(len(frame_dist[frame])):
            row = frame_dist[frame][uuid]
            count = np.count_nonzero((row >= min_dist) & (row < max_dist))
            count_array[uuid][frames_ix[frame]] = count
    return count_array

In [9]:
def print_zone_counts(uuids, frame_dist_mat, frames_ix):
    """Helper function for looking at the various intimate, 
    personal, social, and public distances."""
    uuid_count_intimate = distance_counts(.01, .46,
                                          frame_dist_mat,
                                          frames_ix,
                                          uuids)
    uuid_count_personal = distance_counts(.46,
                                          1.22,
                                          frame_dist_mat,
                                          frames_ix,
                                          uuids)
    uuid_count_social = distance_counts(1.22,
                                        3.6,
                                        frame_dist_mat,
                                        frames_ix,
                                        uuids)
    uuid_count_public = distance_counts(3.6,
                                        7.6,
                                        frame_dist_mat,
                                        frames_ix,
                                        uuids)
    uuid_count_public_far = distance_counts(7.6,
                                            100000000,
                                            frame_dist_mat,
                                            frames_ix,
                                            uuids)

    for distance in [uuid_count_intimate,
                     uuid_count_personal,
                     uuid_count_social,
                     uuid_count_public,
                     uuid_count_public_far]:
        print(np.mean(distance), np.median(distance), np.max(distance))

In [10]:
#  Calculate some matrices for the social metrics for proxemic analysis.

# The distance matrix is defined above in function distance_mat
# The angular distance is defined above in the angular mat
# The closest distance


def social_matrices(slc):
    uuids, uuids_ix, frames, frames_ix, by_frame = prep_slice(slc)
    matrices = {}
    matrices["dist_mat"] = distance_mat(by_frame, uuids, uuids_ix)
    matrices["angular_mat"] = angular_mat(by_frame, uuids, uuids_ix)
    matrices["mins_by_frame"] = closest_dist(matrices["dist_mat"], 
                                             uuids, 
                                             frames_ix)
    matrices["uuids"] = uuids
    matrices["uuids_ix"] = uuids_ix
    matrices["slice"] = slc
    return matrices

named_matrices = {}
for name, slc in [("group1_1", group1_1), 
                  ("group1_2", group1_2),
                  ("group2_1", group2_1),
                  ("group2_2", group2_2),
                  ("group3_1", group3_1),
                  ("group3_2", group3_2),
                  ("group4_1", group4_1),
                  ("group4_2", group4_2),
                  ("group_1d", group_1d),
                  ("group_2d", group_2d),
                  ("group_3d", group_3d),
                  ("group_4d", group_4d),
                 ]:
    named_matrices[name] = social_matrices(slc)

  dif_xyz = dif_xyz/len_dif
  closest = np.nanmin(row[np.nonzero(row)])


Pickle all the matrics for these slices for visualizing next.

In [11]:
# At 30FPS this takes some time to calculate.

print(named_matrices.keys())
with open("outputs/social_metrics_groups.pkl", "wb") as f:
    pickle.dump(named_matrices, f, protocol=-1)

dict_keys(['group1_1', 'group1_2', 'group2_1', 'group2_2', 'group3_1', 'group3_2', 'group4_1', 'group4_2', 'group_1d', 'group_2d', 'group_3d', 'group_4d'])


Next visit the <a href="3.VisualisingSocialMetrics.ipynb">Visualizing Social Metrics</a> notebook.