In [1]:
from datetime import datetime
from haversine import haversine
from itertools import combinations
from numpy import sqrt
from pandas import DataFrame, read_csv, merge, concat

### Loads the dataset with interpolated positions

In [2]:
# Loads the dataset with interpolated positions from the games played in May 2022
df = read_csv('../data/2022_05/interpolated_positions.csv')

# Converts the entries in column Timestamp into datetime objects
df['Timestamp'] = df.apply(lambda x: datetime.strptime(x['Timestamp'], '%Y-%m-%d  %H:%M:%S'), axis=1)

### Generates the full edgelist for each game and computes the distance between each possible link

In [3]:
def compute_distance(row):
    """Computes the distance between positions"""
    dr = haversine((row.Latitude_1, row.Longitude_1), (row.Latitude_2, row.Longitude_2), unit='m')
    # dz = row.Elevation_1 - row.Elevation_2
    return dr  #sqrt(dr**2 + dz**2)

In [4]:
new_df = DataFrame()

for game in df['Game'].unique():

    for players in combinations(df[df.Game==game]['Player_ID'].unique(), 2):
        tmp_df = merge(left=df[(df.Game==game) & (df.Player_ID==players[0])].set_index('Timestamp'),
                       right=df[(df.Game==game) & (df.Player_ID==players[1])].set_index('Timestamp'),
                       left_index=True, right_index=True, suffixes=('_1', '_2')).reset_index(inplace=False)
        tmp_df['distance'] = tmp_df.apply(lambda row: compute_distance(row), axis=1)
        new_df = concat([new_df, tmp_df], ignore_index=True)

new_df.rename(columns={'Game_1': 'Game'}, inplace=True)

new_df.to_csv('../data/2022_05/full_edgelist.csv',
              index=False,
              columns=['Game', 'Timestamp', 'distance',
                       'Player_ID_1', 'Latitude_1', 'Longitude_1',
                       'Player_ID_2', 'Latitude_2', 'Longitude_2'])