In [1]:
from datetime import datetime
from haversine import haversine
from math import atan2, pi
from pandas import DataFrame, read_csv

### Function computing the length and orientation of each step

In [2]:
def extract_steps(df):
    """Computes the length, orientation (in degrees) and elapsed time for each recorded step."""
    new_steps = []
    x0, y0, t0 = df['Latitude'].iloc[0], df['Longitude'].iloc[0], df['Timestamp'].iloc[0]
    
    for row in df[1:].itertuples():
        x1, y1, t1 = row.Latitude, row.Longitude, row.Timestamp
        new_steps.append([row.Game,
                         row.Player_ID,
                         haversine((y1, x1), (y0, x0), unit='m'),
                         360 * (atan2(x1-x0, y1-y0) + pi) / (2 * pi),   # Assuming flat space (i.e. small angles)
                         (t1-t0).total_seconds()])
        x0, y0, t0 = x1, y1, t1
    return new_steps

DataFrame.extract_steps = extract_steps

### Loads the original dataset

In [3]:
# Loads the original data from the games played in May 2022
filename = '../data/2022_05/Data_Jeu_Pred-Prey_IAOMay2022/Data/All_Tracks.csv'
cols = ['Elevation', 'Latitude', 'Longitude', 'Timestamp', 'Game', 'Player_ID']
positions_df = read_csv(filename, usecols=cols)

# Converts the entries in column Timestamp into datetime objects
positions_df['Timestamp'] = positions_df.apply(lambda x: datetime.strptime(x['Timestamp'], '%Y-%m-%d  %H:%M:%S'), axis=1)

In [4]:
# Extracts the length, orientation (in degrees) and elapsed time for each recorded step
steps = []
for name, sub_df in positions_df.groupby(['Game', 'Player_ID']):
    steps.extend(sub_df.extract_steps())
    
# Creates a new dataframe containing the steps
steps_df = DataFrame(steps, columns=['Game', 'Player_ID', 'length (m)', 'orientation (deg.)', 'time (s)'])
steps_df.sort_values(by=['Game', 'Player_ID'], inplace=True)

# Saves the dataframe to file
steps_df.to_csv('../data/2022_05/steps_original_positions.csv', index=False)

In [5]:
steps_df.head(10)

Unnamed: 0,Game,Player_ID,length (m),orientation (deg.),time (s)
0,1,DB,0.518919,43.898294,11.0
1,1,DB,1.883942,6.215636,8.0
2,1,DB,1.041307,198.898247,3.0
3,1,DB,2.885629,349.36915,7.0
4,1,DB,1.239599,180.430787,8.0
5,1,DB,3.031341,199.440035,9.0
6,1,DB,2.058005,205.717856,2.0
7,1,DB,13.786565,196.49906,11.0
8,1,DB,3.187621,211.588413,19.0
9,1,DB,12.659506,11.756468,11.0


### Dataset with interpolated positions

In [6]:
# Loads the dataset with interpolated positions from the games played in May 2022
positions_df = read_csv('../data/2022_05/interpolated_positions.csv')

# Converts the entries in column Timestamp into datetime objects
positions_df['Timestamp'] = positions_df.apply(lambda x: datetime.strptime(x['Timestamp'], '%Y-%m-%d  %H:%M:%S'), axis=1)

In [7]:
# Extracts the length, orientation (in degrees) and elapsed time for each recorded step
steps = []
for name, sub_df in positions_df.groupby(['Game', 'Player_ID']):
    steps.extend(sub_df.extract_steps())
    
# Creates a new dataframe containing the steps
steps_df = DataFrame(steps, columns=['Game', 'Player_ID', 'length (m)', 'orientation (deg.)', 'time (s)'])
steps_df.sort_values(by=['Game', 'Player_ID'], inplace=True)

# Saves the dataframe to file
steps_df.to_csv('../data/2022_05/steps_interpolated_positions.csv', index=False)

In [8]:
steps_df.head(10)

Unnamed: 0,Game,Player_ID,length (m),orientation (deg.),time (s)
0,1,DB,0.047174,43.898295,1.0
1,1,DB,0.047174,43.898293,1.0
2,1,DB,0.047174,43.898295,1.0
3,1,DB,0.047174,43.898295,1.0
4,1,DB,0.047174,43.898294,1.0
5,1,DB,0.047174,43.898294,1.0
6,1,DB,0.047174,43.898294,1.0
7,1,DB,0.047174,43.898295,1.0
8,1,DB,0.047174,43.898295,1.0
9,1,DB,0.047174,43.898294,1.0
