In [1]:
import mplsoccer.statsbomb as sbapi
import pandas as pd
import os
import glob

This notebook takes the StatsBomb json files and turns them into parquet files. These are extremely fast to load so good for this prototyping kind of analysis.

# Change these paths/ parameters
You will need to change these paths/ parameters depending on where the StatsBomb open-data is located, how and where you want to save the resulting data, and if you only want the new files to be processed.

In [2]:
# open data folder is one folder down in the directory. To change if run elsewhere
STATSBOMB_DATA = os.path.join('..', '..', '..', 'open-data','data')
# save files in folder in current directory. To change if want to save elsewhere
DATA_FOLDER = os.path.join('..', '..', 'data', 'statsbomb')

# Get the data file paths

In [3]:
event_links = glob.glob(os.path.join(STATSBOMB_DATA, 'events', '**', '*.json'),recursive=True)
lineup_links = glob.glob(os.path.join(STATSBOMB_DATA, 'lineups', '**', '*.json'),recursive=True)
match_links = glob.glob(os.path.join(STATSBOMB_DATA, 'matches', '**', '*.json'),recursive=True)
competition_path = os.path.join(STATSBOMB_DATA, 'competitions.json')

# Make the directory structure

In [4]:
# make the directory structure
for folder in ['event_raw', 'related_event_raw', 'freeze_frame_raw', 'tactic_raw', 'lineup_raw']:
    path = os.path.join(DATA_FOLDER, folder)
    if not os.path.exists(path):
        os.mkdir(path)

# Read the competition datam

In [5]:
df_competition = sbapi.read_competition(competition_path, warn=False)
# note there is a slight loss of data quality with timestamps, but these aren't relevant for analysis
# pandas has nanoseconds, which aren't supported in parquet (supports milliseconds)
df_competition.to_parquet(os.path.join(DATA_FOLDER, 'competition.parquet'), allow_truncated_timestamps=True)
df_competition.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36 entries, 0 to 35
Data columns (total 8 columns):
 #   Column              Non-Null Count  Dtype         
---  ------              --------------  -----         
 0   competition_id      36 non-null     int64         
 1   season_id           36 non-null     int64         
 2   country_name        36 non-null     object        
 3   competition_name    36 non-null     object        
 4   competition_gender  36 non-null     object        
 5   season_name         36 non-null     object        
 6   match_updated       36 non-null     datetime64[ns]
 7   match_available     36 non-null     datetime64[ns]
dtypes: datetime64[ns](2), int64(2), object(4)
memory usage: 2.4+ KB


# Read the match data

In [6]:
match_dfs = [sbapi.read_match(file, warn=False) for file in match_links]
df_match = pd.concat(match_dfs)
# again there is a slight loss of quality when saving timestamps, but only relevant for last_updated
df_match.to_parquet(os.path.join(DATA_FOLDER, 'match.parquet'), allow_truncated_timestamps=True)
df_match.info()

Skipping ..\..\..\open-data\data\matches\16\4.json: empty json
Skipping ..\..\..\open-data\data\matches\16\42.json: empty json
Skipping ..\..\..\open-data\data\matches\16\44.json: empty json
Skipping ..\..\..\open-data\data\matches\16\76.json: empty json
<class 'pandas.core.frame.DataFrame'>
Int64Index: 842 entries, 0 to 51
Data columns (total 48 columns):
 #   Column                           Non-Null Count  Dtype         
---  ------                           --------------  -----         
 0   match_id                         842 non-null    int64         
 1   match_date                       842 non-null    datetime64[ns]
 2   kick_off                         841 non-null    datetime64[ns]
 3   home_score                       842 non-null    int64         
 4   away_score                       842 non-null    int64         
 5   last_updated                     842 non-null    datetime64[ns]
 6   match_week                       842 non-null    int64         
 7   competition_id 

# Read the lineup data

In [7]:
LINEUP_FOLDER = os.path.join(DATA_FOLDER, 'lineup_raw')
# loop through the links and store as parquet files - small and fast files
for file in lineup_links:
    save_path = f'{os.path.basename(file)[:-4]}parquet'
    if not os.path.isfile(os.path.join(LINEUP_FOLDER, save_path)):
        try:
            print('Trying:', file)
            df_lineup = sbapi.read_lineup(file, warn=False)
            df_lineup.to_parquet(os.path.join(LINEUP_FOLDER, save_path))
        except:
            print('Skipping:', file)
            pass

Trying: ..\..\..\open-data\data\lineups\15946.json
Trying: ..\..\..\open-data\data\lineups\15956.json
Trying: ..\..\..\open-data\data\lineups\15973.json
Trying: ..\..\..\open-data\data\lineups\15978.json
Trying: ..\..\..\open-data\data\lineups\15986.json
Trying: ..\..\..\open-data\data\lineups\15998.json
Trying: ..\..\..\open-data\data\lineups\16010.json
Trying: ..\..\..\open-data\data\lineups\16023.json
Trying: ..\..\..\open-data\data\lineups\16029.json
Trying: ..\..\..\open-data\data\lineups\16056.json
Trying: ..\..\..\open-data\data\lineups\16073.json
Trying: ..\..\..\open-data\data\lineups\16079.json
Trying: ..\..\..\open-data\data\lineups\16086.json
Trying: ..\..\..\open-data\data\lineups\16095.json
Trying: ..\..\..\open-data\data\lineups\16109.json
Trying: ..\..\..\open-data\data\lineups\16120.json
Trying: ..\..\..\open-data\data\lineups\16131.json
Trying: ..\..\..\open-data\data\lineups\16136.json
Trying: ..\..\..\open-data\data\lineups\16149.json
Trying: ..\..\..\open-data\data

Trying: ..\..\..\open-data\data\lineups\2275042.json
Trying: ..\..\..\open-data\data\lineups\2275044.json
Trying: ..\..\..\open-data\data\lineups\2275045.json
Trying: ..\..\..\open-data\data\lineups\2275048.json
Trying: ..\..\..\open-data\data\lineups\2275049.json
Trying: ..\..\..\open-data\data\lineups\2275050.json
Trying: ..\..\..\open-data\data\lineups\2275051.json
Trying: ..\..\..\open-data\data\lineups\2275052.json
Trying: ..\..\..\open-data\data\lineups\2275054.json
Trying: ..\..\..\open-data\data\lineups\2275056.json
Trying: ..\..\..\open-data\data\lineups\2275057.json
Trying: ..\..\..\open-data\data\lineups\2275061.json
Trying: ..\..\..\open-data\data\lineups\2275062.json
Trying: ..\..\..\open-data\data\lineups\2275063.json
Trying: ..\..\..\open-data\data\lineups\2275065.json
Trying: ..\..\..\open-data\data\lineups\2275070.json
Trying: ..\..\..\open-data\data\lineups\2275072.json
Trying: ..\..\..\open-data\data\lineups\2275073.json
Trying: ..\..\..\open-data\data\lineups\227507

Trying: ..\..\..\open-data\data\lineups\266516.json
Trying: ..\..\..\open-data\data\lineups\266525.json
Trying: ..\..\..\open-data\data\lineups\266528.json
Trying: ..\..\..\open-data\data\lineups\266531.json
Trying: ..\..\..\open-data\data\lineups\266557.json
Trying: ..\..\..\open-data\data\lineups\266560.json
Trying: ..\..\..\open-data\data\lineups\266574.json
Trying: ..\..\..\open-data\data\lineups\266603.json
Trying: ..\..\..\open-data\data\lineups\266613.json
Trying: ..\..\..\open-data\data\lineups\266620.json
Trying: ..\..\..\open-data\data\lineups\266631.json
Trying: ..\..\..\open-data\data\lineups\266653.json
Trying: ..\..\..\open-data\data\lineups\266664.json
Trying: ..\..\..\open-data\data\lineups\266669.json
Trying: ..\..\..\open-data\data\lineups\266670.json
Trying: ..\..\..\open-data\data\lineups\266724.json
Trying: ..\..\..\open-data\data\lineups\266731.json
Trying: ..\..\..\open-data\data\lineups\266741.json
Trying: ..\..\..\open-data\data\lineups\266770.json
Trying: ..\.

Trying: ..\..\..\open-data\data\lineups\68354.json
Trying: ..\..\..\open-data\data\lineups\68355.json
Trying: ..\..\..\open-data\data\lineups\68356.json
Trying: ..\..\..\open-data\data\lineups\68357.json
Trying: ..\..\..\open-data\data\lineups\68358.json
Trying: ..\..\..\open-data\data\lineups\68359.json
Trying: ..\..\..\open-data\data\lineups\68360.json
Trying: ..\..\..\open-data\data\lineups\68361.json
Trying: ..\..\..\open-data\data\lineups\68362.json
Trying: ..\..\..\open-data\data\lineups\68363.json
Trying: ..\..\..\open-data\data\lineups\68364.json
Trying: ..\..\..\open-data\data\lineups\68365.json
Trying: ..\..\..\open-data\data\lineups\68366.json
Trying: ..\..\..\open-data\data\lineups\69137.json
Trying: ..\..\..\open-data\data\lineups\69138.json
Trying: ..\..\..\open-data\data\lineups\69139.json
Trying: ..\..\..\open-data\data\lineups\69141.json
Trying: ..\..\..\open-data\data\lineups\69142.json
Trying: ..\..\..\open-data\data\lineups\69143.json
Trying: ..\..\..\open-data\data

Trying: ..\..\..\open-data\data\lineups\69318.json
Trying: ..\..\..\open-data\data\lineups\69319.json
Trying: ..\..\..\open-data\data\lineups\69320.json
Trying: ..\..\..\open-data\data\lineups\69321.json
Trying: ..\..\..\open-data\data\lineups\69322.json
Trying: ..\..\..\open-data\data\lineups\69323.json
Trying: ..\..\..\open-data\data\lineups\69324.json
Trying: ..\..\..\open-data\data\lineups\69325.json
Trying: ..\..\..\open-data\data\lineups\69326.json
Trying: ..\..\..\open-data\data\lineups\69327.json
Trying: ..\..\..\open-data\data\lineups\69328.json
Trying: ..\..\..\open-data\data\lineups\69329.json
Trying: ..\..\..\open-data\data\lineups\69330.json
Trying: ..\..\..\open-data\data\lineups\69331.json
Trying: ..\..\..\open-data\data\lineups\69332.json
Trying: ..\..\..\open-data\data\lineups\69333.json
Trying: ..\..\..\open-data\data\lineups\69334.json
Trying: ..\..\..\open-data\data\lineups\69335.json
Trying: ..\..\..\open-data\data\lineups\69336.json
Trying: ..\..\..\open-data\data

Trying: ..\..\..\open-data\data\lineups\8656.json
Trying: ..\..\..\open-data\data\lineups\8657.json
Trying: ..\..\..\open-data\data\lineups\8658.json
Trying: ..\..\..\open-data\data\lineups\9575.json
Trying: ..\..\..\open-data\data\lineups\9581.json
Trying: ..\..\..\open-data\data\lineups\9592.json
Trying: ..\..\..\open-data\data\lineups\9602.json
Trying: ..\..\..\open-data\data\lineups\9609.json
Trying: ..\..\..\open-data\data\lineups\9620.json
Trying: ..\..\..\open-data\data\lineups\9636.json
Trying: ..\..\..\open-data\data\lineups\9642.json
Trying: ..\..\..\open-data\data\lineups\9650.json
Trying: ..\..\..\open-data\data\lineups\9661.json
Trying: ..\..\..\open-data\data\lineups\9673.json
Trying: ..\..\..\open-data\data\lineups\9682.json
Trying: ..\..\..\open-data\data\lineups\9695.json
Trying: ..\..\..\open-data\data\lineups\9700.json
Trying: ..\..\..\open-data\data\lineups\9717.json
Trying: ..\..\..\open-data\data\lineups\9726.json
Trying: ..\..\..\open-data\data\lineups\9736.json


Convert to a single dataframe

In [8]:
lineup_files = glob.glob(os.path.join(LINEUP_FOLDER, '*.parquet'))
df_lineup = pd.concat([pd.read_parquet(file) for file in lineup_files])
# replace some ids that appear to be duplicated. Then de-duplicate
df_lineup.player_id.replace({18103: 38522,  # Dietmar Hamann
                             17275: 4656,  # Hannah Jayne Blundell
                             17524: 4655,  # Jennifer Beattie
                             10172: 4644,  # Jill Scott
                             4634: 5088,  # Crystal Dunn
                             }, inplace=True)
df_lineup.to_parquet(os.path.join(DATA_FOLDER, 'lineup.parquet'))
df_lineup.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 25004 entries, 0 to 35
Data columns (total 9 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   team_id               25004 non-null  int64  
 1   team_name             25004 non-null  object 
 2   match_id              25004 non-null  int64  
 3   player_id             25004 non-null  int64  
 4   player_name           25004 non-null  object 
 5   player_nickname       13399 non-null  object 
 6   player_jersey_number  25004 non-null  int64  
 7   player_country_id     24999 non-null  float64
 8   player_country_name   24999 non-null  object 
dtypes: float64(1), int64(4), object(4)
memory usage: 1.9+ MB


# Read the event data

In [9]:
# loop through the links and store as parquet files - small and fast files
for file in event_links:
    save_path = f'{os.path.basename(file)[:-4]}parquet'
    if not os.path.isfile(os.path.join(DATA_FOLDER, 'event_raw', save_path)):
        try:
            print('Trying:', file)
            dict_event = sbapi.read_event(file, warn=False)
            # save to parquet files
            # using the dictionary key to access the dataframes from the dictionary
            dict_event['event'].to_parquet(os.path.join(DATA_FOLDER, 'event_raw', save_path))
            dict_event['related_event'].to_parquet(os.path.join(DATA_FOLDER, 'related_event_raw', save_path))
            dict_event['shot_freeze_frame'].to_parquet(os.path.join(DATA_FOLDER, 'freeze_frame_raw', save_path))
            dict_event['tactics_lineup'].to_parquet(os.path.join(DATA_FOLDER, 'tactic_raw', save_path))
        except:
            print('Skipping:', file)
            pass

Trying: ..\..\..\open-data\data\events\15946.json
Trying: ..\..\..\open-data\data\events\15956.json
Trying: ..\..\..\open-data\data\events\15973.json
Trying: ..\..\..\open-data\data\events\15978.json
Trying: ..\..\..\open-data\data\events\15986.json
Trying: ..\..\..\open-data\data\events\15998.json
Trying: ..\..\..\open-data\data\events\16010.json
Trying: ..\..\..\open-data\data\events\16023.json
Trying: ..\..\..\open-data\data\events\16029.json
Trying: ..\..\..\open-data\data\events\16056.json
Trying: ..\..\..\open-data\data\events\16073.json
Trying: ..\..\..\open-data\data\events\16079.json
Trying: ..\..\..\open-data\data\events\16086.json
Trying: ..\..\..\open-data\data\events\16095.json
Trying: ..\..\..\open-data\data\events\16109.json
Trying: ..\..\..\open-data\data\events\16120.json
Trying: ..\..\..\open-data\data\events\16131.json
Trying: ..\..\..\open-data\data\events\16136.json
Trying: ..\..\..\open-data\data\events\16149.json
Trying: ..\..\..\open-data\data\events\16157.json


Trying: ..\..\..\open-data\data\events\2275044.json
Trying: ..\..\..\open-data\data\events\2275045.json
Trying: ..\..\..\open-data\data\events\2275048.json
Trying: ..\..\..\open-data\data\events\2275049.json
Trying: ..\..\..\open-data\data\events\2275050.json
Trying: ..\..\..\open-data\data\events\2275051.json
Trying: ..\..\..\open-data\data\events\2275052.json
Trying: ..\..\..\open-data\data\events\2275054.json
Trying: ..\..\..\open-data\data\events\2275056.json
Trying: ..\..\..\open-data\data\events\2275057.json
Trying: ..\..\..\open-data\data\events\2275061.json
Trying: ..\..\..\open-data\data\events\2275062.json
Trying: ..\..\..\open-data\data\events\2275063.json
Trying: ..\..\..\open-data\data\events\2275065.json
Trying: ..\..\..\open-data\data\events\2275070.json
Trying: ..\..\..\open-data\data\events\2275072.json
Trying: ..\..\..\open-data\data\events\2275073.json
Trying: ..\..\..\open-data\data\events\2275074.json
Trying: ..\..\..\open-data\data\events\2275075.json
Trying: ..\.

Trying: ..\..\..\open-data\data\events\266531.json
Trying: ..\..\..\open-data\data\events\266557.json
Trying: ..\..\..\open-data\data\events\266560.json
Trying: ..\..\..\open-data\data\events\266574.json
Trying: ..\..\..\open-data\data\events\266603.json
Trying: ..\..\..\open-data\data\events\266613.json
Trying: ..\..\..\open-data\data\events\266620.json
Trying: ..\..\..\open-data\data\events\266631.json
Trying: ..\..\..\open-data\data\events\266653.json
Trying: ..\..\..\open-data\data\events\266664.json
Trying: ..\..\..\open-data\data\events\266669.json
Trying: ..\..\..\open-data\data\events\266670.json
Trying: ..\..\..\open-data\data\events\266724.json
Trying: ..\..\..\open-data\data\events\266731.json
Trying: ..\..\..\open-data\data\events\266741.json
Trying: ..\..\..\open-data\data\events\266770.json
Trying: ..\..\..\open-data\data\events\266794.json
Trying: ..\..\..\open-data\data\events\266815.json
Trying: ..\..\..\open-data\data\events\266827.json
Trying: ..\..\..\open-data\data

Trying: ..\..\..\open-data\data\events\68359.json
Trying: ..\..\..\open-data\data\events\68360.json
Trying: ..\..\..\open-data\data\events\68361.json
Trying: ..\..\..\open-data\data\events\68362.json
Trying: ..\..\..\open-data\data\events\68363.json
Trying: ..\..\..\open-data\data\events\68364.json
Trying: ..\..\..\open-data\data\events\68365.json
Trying: ..\..\..\open-data\data\events\68366.json
Trying: ..\..\..\open-data\data\events\69137.json
Trying: ..\..\..\open-data\data\events\69138.json
Trying: ..\..\..\open-data\data\events\69139.json
Trying: ..\..\..\open-data\data\events\69141.json
Trying: ..\..\..\open-data\data\events\69142.json
Trying: ..\..\..\open-data\data\events\69143.json
Trying: ..\..\..\open-data\data\events\69144.json
Trying: ..\..\..\open-data\data\events\69145.json
Trying: ..\..\..\open-data\data\events\69146.json
Trying: ..\..\..\open-data\data\events\69147.json
Trying: ..\..\..\open-data\data\events\69148.json
Trying: ..\..\..\open-data\data\events\69149.json


Trying: ..\..\..\open-data\data\events\69320.json
Trying: ..\..\..\open-data\data\events\69321.json
Trying: ..\..\..\open-data\data\events\69322.json
Trying: ..\..\..\open-data\data\events\69323.json
Trying: ..\..\..\open-data\data\events\69324.json
Trying: ..\..\..\open-data\data\events\69325.json
Trying: ..\..\..\open-data\data\events\69326.json
Trying: ..\..\..\open-data\data\events\69327.json
Trying: ..\..\..\open-data\data\events\69328.json
Trying: ..\..\..\open-data\data\events\69329.json
Trying: ..\..\..\open-data\data\events\69330.json
Trying: ..\..\..\open-data\data\events\69331.json
Trying: ..\..\..\open-data\data\events\69332.json
Trying: ..\..\..\open-data\data\events\69333.json
Trying: ..\..\..\open-data\data\events\69334.json
Trying: ..\..\..\open-data\data\events\69335.json
Trying: ..\..\..\open-data\data\events\69336.json
Trying: ..\..\..\open-data\data\events\69337.json
Trying: ..\..\..\open-data\data\events\69338.json
Trying: ..\..\..\open-data\data\events\69340.json


Trying: ..\..\..\open-data\data\events\8657.json
Trying: ..\..\..\open-data\data\events\8658.json
Trying: ..\..\..\open-data\data\events\9575.json
Trying: ..\..\..\open-data\data\events\9581.json
Trying: ..\..\..\open-data\data\events\9592.json
Trying: ..\..\..\open-data\data\events\9602.json
Trying: ..\..\..\open-data\data\events\9609.json
Trying: ..\..\..\open-data\data\events\9620.json
Trying: ..\..\..\open-data\data\events\9636.json
Trying: ..\..\..\open-data\data\events\9642.json
Trying: ..\..\..\open-data\data\events\9650.json
Trying: ..\..\..\open-data\data\events\9661.json
Trying: ..\..\..\open-data\data\events\9673.json
Trying: ..\..\..\open-data\data\events\9682.json
Trying: ..\..\..\open-data\data\events\9695.json
Trying: ..\..\..\open-data\data\events\9700.json
Trying: ..\..\..\open-data\data\events\9717.json
Trying: ..\..\..\open-data\data\events\9726.json
Trying: ..\..\..\open-data\data\events\9736.json
Trying: ..\..\..\open-data\data\events\9742.json
Trying: ..\..\..\ope

Single dataframe events

In [10]:
event_files = glob.glob(os.path.join(DATA_FOLDER, 'event_raw', '*.parquet'))
df_event = pd.concat([pd.read_parquet(file) for file in event_files])
df_event.to_parquet(os.path.join(DATA_FOLDER, 'event.parquet'))
df_event.info(verbose=True, null_counts=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3056120 entries, 0 to 3746
Data columns (total 124 columns):
 #   Column                            Non-Null Count    Dtype  
---  ------                            --------------    -----  
 0   match_id                          3056120 non-null  int64  
 1   id                                3056120 non-null  object 
 2   index                             3056120 non-null  int64  
 3   period                            3056120 non-null  int64  
 4   timestamp_minute                  3056120 non-null  int64  
 5   timestamp_second                  3056120 non-null  int64  
 6   timestamp_millisecond             3056120 non-null  int64  
 7   minute                            3056120 non-null  int64  
 8   second                            3056120 non-null  int64  
 9   type_id                           3056120 non-null  int64  
 10  type_name                         3056120 non-null  object 
 11  outcome_id                        42817

Single dataframe shot freeze frames

In [11]:
freeze_files = glob.glob(os.path.join(DATA_FOLDER, 'freeze_frame_raw', '*.parquet'))
df_freeze = pd.concat([pd.read_parquet(file) for file in freeze_files])
df_freeze.to_parquet(os.path.join(DATA_FOLDER, 'freeze.parquet'))
df_freeze.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 266478 entries, 0 to 341
Data columns (total 10 columns):
 #   Column                Non-Null Count   Dtype  
---  ------                --------------   -----  
 0   id                    266478 non-null  object 
 1   event_freeze_id       266478 non-null  int64  
 2   player_teammate       266478 non-null  bool   
 3   player_id             266478 non-null  int64  
 4   player_name           266478 non-null  object 
 5   player_position_id    266478 non-null  int64  
 6   player_position_name  266478 non-null  object 
 7   x                     266478 non-null  float64
 8   y                     266478 non-null  float64
 9   match_id              266478 non-null  int64  
dtypes: bool(1), float64(2), int64(4), object(3)
memory usage: 20.6+ MB


Single dataframe tactics

In [12]:
tactic_files = glob.glob(os.path.join(DATA_FOLDER, 'tactic_raw', '*.parquet'))
df_tactic = pd.concat([pd.read_parquet(file) for file in tactic_files])
df_tactic.to_parquet(os.path.join(DATA_FOLDER, 'tactic.parquet'))
df_tactic.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 35225 entries, 0 to 32
Data columns (total 8 columns):
 #   Column                Non-Null Count  Dtype  
---  ------                --------------  -----  
 0   id                    35225 non-null  object 
 1   event_tactics_id      35225 non-null  int64  
 2   player_jersey_number  35221 non-null  float64
 3   player_id             35225 non-null  int64  
 4   player_name           35225 non-null  object 
 5   player_position_id    35225 non-null  int64  
 6   player_position_name  35225 non-null  object 
 7   match_id              35225 non-null  int64  
dtypes: float64(1), int64(4), object(3)
memory usage: 2.4+ MB


Single dataframe related events

In [13]:
related_files = glob.glob(os.path.join(DATA_FOLDER, 'related_event_raw', '*.parquet'))
df_related = pd.concat([pd.read_parquet(file) for file in related_files])
df_related.to_parquet(os.path.join(DATA_FOLDER, 'related.parquet'))
df_related.info(verbose=True, null_counts=True)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5941216 entries, 0 to 7467
Data columns (total 7 columns):
 #   Column             Non-Null Count    Dtype 
---  ------             --------------    ----- 
 0   id                 5941216 non-null  object
 1   id_related         5941216 non-null  object
 2   type_name          5941216 non-null  object
 3   index              5941216 non-null  int64 
 4   type_name_related  5941216 non-null  object
 5   index_related      5941216 non-null  int64 
 6   match_id           5941216 non-null  int64 
dtypes: int64(3), object(4)
memory usage: 362.6+ MB
