# Import soccer tracking data into ocel

In [1]:
# imports
import pandas as pd
import pm4py
from pm4py.objects.ocel.util.log_ocel import log_to_ocel_multiple_obj_types as log_to_ocel
import helpers
print(pm4py.__version__)

2.7.15.2


In [2]:
# settings
## Resolution of the grid
x_fields = 10
y_fields = 10

In [3]:
# load data from csvs

tracking_data_away_df = pd.read_csv('sample-data\data\Sample_Game_1\Sample_Game_1_RawTrackingData_Away_Team.csv', skiprows=2)
print('Tracking data away:')
print(tracking_data_away_df.columns)
display(tracking_data_away_df.head())
tracking_data_home_df = pd.read_csv('sample-data\data\Sample_Game_1\Sample_Game_1_RawTrackingData_Home_Team.csv', skiprows=2)
print('Tracking data home:')
print(tracking_data_home_df.columns)
display(tracking_data_home_df.head())

Tracking data away:
Index(['Period', 'Frame', 'Time [s]', 'Player25', 'Unnamed: 4', 'Player15',
       'Unnamed: 6', 'Player16', 'Unnamed: 8', 'Player17', 'Unnamed: 10',
       'Player18', 'Unnamed: 12', 'Player19', 'Unnamed: 14', 'Player20',
       'Unnamed: 16', 'Player21', 'Unnamed: 18', 'Player22', 'Unnamed: 20',
       'Player23', 'Unnamed: 22', 'Player24', 'Unnamed: 24', 'Player26',
       'Unnamed: 26', 'Player27', 'Unnamed: 28', 'Player28', 'Unnamed: 30',
       'Ball', 'Unnamed: 32'],
      dtype='object')


Unnamed: 0,Period,Frame,Time [s],Player25,Unnamed: 4,Player15,Unnamed: 6,Player16,Unnamed: 8,Player17,...,Player24,Unnamed: 24,Player26,Unnamed: 26,Player27,Unnamed: 28,Player28,Unnamed: 30,Ball,Unnamed: 32
0,1,1,0.04,0.90509,0.47462,0.58393,0.20794,0.67658,0.4671,0.6731,...,0.37833,0.27383,,,,,,,0.45472,0.38709
1,1,2,0.08,0.90494,0.47462,0.58393,0.20794,0.67658,0.4671,0.6731,...,0.37833,0.27383,,,,,,,0.49645,0.40656
2,1,3,0.12,0.90434,0.47463,0.58393,0.20794,0.67658,0.4671,0.6731,...,0.37833,0.27383,,,,,,,0.53716,0.42556
3,1,4,0.16,0.90377,0.47463,0.58351,0.20868,0.6764,0.46762,0.67279,...,0.37756,0.27473,,,,,,,0.55346,0.42231
4,1,5,0.2,0.90324,0.47464,0.58291,0.21039,0.67599,0.46769,0.67253,...,0.37663,0.27543,,,,,,,0.55512,0.4057


Tracking data home:
Index(['Period', 'Frame', 'Time [s]', 'Player11', 'Unnamed: 4', 'Player1',
       'Unnamed: 6', 'Player2', 'Unnamed: 8', 'Player3', 'Unnamed: 10',
       'Player4', 'Unnamed: 12', 'Player5', 'Unnamed: 14', 'Player6',
       'Unnamed: 16', 'Player7', 'Unnamed: 18', 'Player8', 'Unnamed: 20',
       'Player9', 'Unnamed: 22', 'Player10', 'Unnamed: 24', 'Player12',
       'Unnamed: 26', 'Player13', 'Unnamed: 28', 'Player14', 'Unnamed: 30',
       'Ball', 'Unnamed: 32'],
      dtype='object')


Unnamed: 0,Period,Frame,Time [s],Player11,Unnamed: 4,Player1,Unnamed: 6,Player2,Unnamed: 8,Player3,...,Player10,Unnamed: 24,Player12,Unnamed: 26,Player13,Unnamed: 28,Player14,Unnamed: 30,Ball,Unnamed: 32
0,1,1,0.04,0.00082,0.48238,0.32648,0.65322,0.33701,0.48863,0.30927,...,0.55243,0.43269,,,,,,,0.45472,0.38709
1,1,2,0.08,0.00096,0.48238,0.32648,0.65322,0.33701,0.48863,0.30927,...,0.55243,0.43269,,,,,,,0.49645,0.40656
2,1,3,0.12,0.00114,0.48238,0.32648,0.65322,0.33701,0.48863,0.30927,...,0.55243,0.43269,,,,,,,0.53716,0.42556
3,1,4,0.16,0.00121,0.48238,0.32622,0.65317,0.33687,0.48988,0.30944,...,0.55236,0.43313,,,,,,,0.55346,0.42231
4,1,5,0.2,0.00129,0.48238,0.32597,0.65269,0.33664,0.49018,0.30948,...,0.55202,0.43311,,,,,,,0.55512,0.4057


In [11]:
# reshape the tracking data to long format (one row per player per time point)
def reshape_tracking(df, team_label):
    long_rows = []
    for col in df.columns:
        if col.startswith("Player"):
            x_col = col
            y_col = f"Unnamed: {int(df.columns.get_loc(col)) + 1}"
            for _, row in df.iterrows():
                # only append if X and Y is not NaN
                if pd.notna(row[x_col]) and pd.notna(row[y_col]):
                    long_rows.append({
                        "Time [s]": row["Time [s]"],
                        "Frame": row["Frame"],
                        "Team": team_label.capitalize(),
                        "Player": f"{team_label.capitalize()}_{col}",
                        "X": row[x_col],
                        "Y": row[y_col]
                    })
    return pd.DataFrame(long_rows)

tracking_long_home_df = reshape_tracking(tracking_data_home_df, "home")
tracking_long_away_df = reshape_tracking(tracking_data_away_df, "away")
tracking_long_df = pd.concat([tracking_long_home_df, tracking_long_away_df])
tracking_long_df.head()

Unnamed: 0,Time [s],Frame,Team,Player,X,Y
0,0.04,1.0,Home,Home_Player11,0.00082,0.48238
1,0.08,2.0,Home,Home_Player11,0.00096,0.48238
2,0.12,3.0,Home,Home_Player11,0.00114,0.48238
3,0.16,4.0,Home,Home_Player11,0.00121,0.48238
4,0.2,5.0,Home,Home_Player11,0.00129,0.48238


In [12]:
# add grid position to tracking long
tracking_long_df['Grid Position'] = tracking_long_df.apply(lambda row: helpers.get_field_position(row['X'], row['Y'], x_fields=x_fields, y_fields=y_fields), axis=1)
tracking_long_df.head()

Unnamed: 0,Time [s],Frame,Team,Player,X,Y,Grid Position
0,0.04,1.0,Home,Home_Player11,0.00082,0.48238,A5
1,0.08,2.0,Home,Home_Player11,0.00096,0.48238,A5
2,0.12,3.0,Home,Home_Player11,0.00114,0.48238,A5
3,0.16,4.0,Home,Home_Player11,0.00121,0.48238,A5
4,0.2,5.0,Home,Home_Player11,0.00129,0.48238,A5


In [13]:
# identify only events when grid position of player changes
def get_grid_change_events(tracking_df):
    tracking_df = tracking_df.sort_values(by=['Player', 'Time [s]'])
    tracking_df['Prev Grid Position'] = tracking_df.groupby('Player')['Grid Position'].shift(1)
    grid_change_events = tracking_df[tracking_df['Grid Position'] != tracking_df['Prev Grid Position']]
    grid_change_events.rename(columns={'Grid Position': 'To Position', 
                                'Prev Grid Position': 'From Position'}, inplace=True
                       )
    return grid_change_events

tracking_grid_change_events_df = get_grid_change_events(tracking_long_df)
display(tracking_grid_change_events_df.head())

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  grid_change_events.rename(columns={'Grid Position': 'To Position',


Unnamed: 0,Time [s],Frame,Team,Player,X,Y,To Position,From Position
145006,0.04,1.0,Away,Away_Player15,0.58393,0.20794,F3,
145029,0.96,24.0,Away,Away_Player15,0.56821,0.19967,F2,F3
145084,3.16,79.0,Away,Away_Player15,0.54274,0.20009,F3,F2
145160,6.2,155.0,Away,Away_Player15,0.54714,0.30046,F4,F3
145223,8.72,218.0,Away,Away_Player15,0.50298,0.40046,F5,F4


In [14]:
# format dataframe for ocel
def format_ocel_df(tracking_df):
    ocel_df = tracking_df.copy()
    ocel_df['concept:name'] = "Player moved to " + ocel_df['To Position'].astype(str)
    ocel_df['case:concept:name'] = ocel_df['Team']
    ocel_df['time:timestamp'] = pd.to_datetime(ocel_df['Time [s]'], unit='s')
    
    return ocel_df

ocel_df = format_ocel_df(tracking_grid_change_events_df)
ocel_df

Unnamed: 0,Time [s],Frame,Team,Player,X,Y,To Position,From Position,concept:name,case:concept:name,time:timestamp
145006,0.04,1.0,Away,Away_Player15,0.58393,0.20794,F3,,Player moved to F3,Away,1970-01-01 00:00:00.040
145029,0.96,24.0,Away,Away_Player15,0.56821,0.19967,F2,F3,Player moved to F2,Away,1970-01-01 00:00:00.960
145084,3.16,79.0,Away,Away_Player15,0.54274,0.20009,F3,F2,Player moved to F3,Away,1970-01-01 00:00:03.160
145160,6.20,155.0,Away,Away_Player15,0.54714,0.30046,F4,F3,Player moved to F4,Away,1970-01-01 00:00:06.200
145223,8.72,218.0,Away,Away_Player15,0.50298,0.40046,F5,F4,Player moved to F5,Away,1970-01-01 00:00:08.720
...,...,...,...,...,...,...,...,...,...,...,...
1317161,5782.60,144565.0,Home,Home_Player9,0.11281,0.79993,B8,B9,Player moved to B8,Home,1970-01-01 01:36:22.600
1317173,5783.08,144577.0,Home,Home_Player9,0.11286,0.80001,B9,B8,Player moved to B9,Home,1970-01-01 01:36:23.080
1317273,5787.08,144677.0,Home,Home_Player9,0.11225,0.79987,B8,B9,Player moved to B8,Home,1970-01-01 01:36:27.080
1317345,5789.96,144749.0,Home,Home_Player9,0.11105,0.80019,B9,B8,Player moved to B9,Home,1970-01-01 01:36:29.960


In [15]:
# count isna in "to position" format_ocel_df
print("Count of NaN in 'To Position':", ocel_df['To Position'].isna().sum())

Count of NaN in 'To Position': 0


In [16]:
# convert data frame to event log
event_log = pm4py.convert_to_event_log(ocel_df)

In [17]:
# convert event log to ocel
ocel = log_to_ocel(event_log, activity_column='concept:name', 
                   timestamp_column='time:timestamp', 
                   obj_types=['Team','Player', 'To Position', 'From Position'])

In [18]:
# show how the ocel looks now
ocel.get_extended_table()

Unnamed: 0,ocel:eid,ocel:activity,ocel:timestamp,ocel:type:Team,ocel:type:Player,ocel:type:To Position,ocel:type:From Position
0,0,Player moved to F3,1970-01-01 00:00:00.040,[Away],[Away_Player15],[F3],
1,1,Player moved to F2,1970-01-01 00:00:00.960,[Away],[Away_Player15],[F2],[F3]
2,2,Player moved to F3,1970-01-01 00:00:03.160,[Away],[Away_Player15],[F3],[F2]
3,3,Player moved to F4,1970-01-01 00:00:06.200,[Away],[Away_Player15],[F4],[F3]
4,4,Player moved to F5,1970-01-01 00:00:08.720,[Away],[Away_Player15],[F5],[F4]
...,...,...,...,...,...,...,...
32343,32343,Player moved to B8,1970-01-01 01:36:22.600,[Home],[Home_Player9],[B8],[B9]
32344,32344,Player moved to B9,1970-01-01 01:36:23.080,[Home],[Home_Player9],[B9],[B8]
32345,32345,Player moved to B8,1970-01-01 01:36:27.080,[Home],[Home_Player9],[B8],[B9]
32346,32346,Player moved to B9,1970-01-01 01:36:29.960,[Home],[Home_Player9],[B9],[B8]


In [19]:
# write ocel out to .jsonocel
path = "tracking_game1.jsonocel"
pm4py.write_ocel(ocel, path)