In [1]:
from datetime import datetime
from haversine import haversine
from itertools import combinations
from numpy import sqrt
from pandas import DataFrame, read_csv, merge, concat

### Loads the dataset with interpolated positions

In [2]:
# Loads the dataset with interpolated positions from the games played in May 2022
positions_df = read_csv('../data/2022_05/interpolated_positions.csv')

# Converts the entries in column Timestamp into datetime objects
positions_df['Timestamp'] = positions_df.apply(lambda x: datetime.strptime(x['Timestamp'], '%Y-%m-%d  %H:%M:%S'), axis=1)

### Generates the full edgelist for each game and computes the distance between each possible link

In [3]:
def compute_distance(row):
    """Computes the distance between positions"""
    return haversine((row.Latitude_1, row.Longitude_1), (row.Latitude_2, row.Longitude_2), unit='m')

In [4]:
distances_df = DataFrame()

for game in positions_df['Game'].unique():

    for players in combinations(positions_df[positions_df.Game==game]['Player_ID'].unique(), 2):
        tmp_df = merge(left=positions_df[(positions_df.Game==game) & (positions_df.Player_ID==players[0])].set_index('Timestamp'),
                       right=positions_df[(positions_df.Game==game) & (positions_df.Player_ID==players[1])].set_index('Timestamp'),
                       left_index=True, right_index=True, suffixes=('_1', '_2')).reset_index(inplace=False)
        tmp_df['distance'] = tmp_df.apply(lambda row: compute_distance(row), axis=1)
        distances_df = concat([distances_df, tmp_df], ignore_index=True)

distances_df.rename(columns={'Game_1': 'Game'}, inplace=True)

distances_df.to_csv('../data/2022_05/full_edgelist.csv',
                    index=False,
                    columns=['Game', 'Timestamp', 'distance',
                             'Player_ID_1', 'Latitude_1', 'Longitude_1',
                             'Player_ID_2', 'Latitude_2', 'Longitude_2'])

### (Re)loads the full edgelist

In [5]:
# Loads the full edgelist from the games played in May 2022
positions_df = read_csv('../data/2022_05/full_edgelist.csv')

# Converts the entries in column Timestamp into datetime objects
positions_df['Timestamp'] = positions_df.apply(lambda x: datetime.strptime(x['Timestamp'], '%Y-%m-%d  %H:%M:%S'), axis=1)

### Example: extracts an edgelist where edges exist if players are within a given distance threshold

In [6]:
# The game to focus on.
game_to_keep = 1

# Maximal distance allowed for an interaction.
distance_threshold = 10

positions_df[(positions_df.Game==game_to_keep) & (positions_df.distance < distance_threshold)].to_csv(
                  '../data/2022_05/edgelist_game{}_thresh{}.csv'.format(game_to_keep, distance_threshold),
                  index=False,
                  columns=['Player_ID_1', 'Player_ID_2', 'Timestamp'])
                  # other columns available:
                  #   Game, distance,
                  #   Latitude_1, Longitude_1
                  #   Latitude_2, Longitude_2