In [1]:
import duckdb
import pandas as pd
from statsbombpy import sb

# DuckDB bağlantısı



In [2]:
def pass_data_creation(events_extra_info,match_ID):
    
    events_df = sb.events(match_id=match_ID)
    events_df = events_df.set_index("match_id").join(
        events_extra_info.set_index("match_id"), how="left", rsuffix='_match'
    ).reset_index()

    # Koordinatları ayır
    events_df[['x', 'y']] = events_df['location'].apply(pd.Series)
    events_df[['pass_end_x', 'pass_end_y']] = events_df['pass_end_location'].apply(pd.Series)

    # Paslar tablosunu oluştur
    passes = events_df[(events_df.type == "Pass")]
    passes = passes[passes["pass_outcome"] != "Injury Clearance"]
    shots = events_df[(events_df.type == "Shot")][["player","shot_type","shot_key_pass_id","shot_outcome","shot_statsbomb_xg"]].dropna()
    passes_M = passes.set_index("id").join(
        shots.set_index("shot_key_pass_id"), how="left", rsuffix='_shotFromPass'
    )
    passes_M = passes_M[passes_M["pass_outcome"] != "Injury Clearance"]

    passes_M["pass_succes"] = passes_M.pass_outcome.isna()
    passes_M["f3rd_pass"] = (passes_M.x < 80) & (passes_M.pass_end_x > 80)
    passes_M["pass_progression"] = passes_M["pass_end_x"] - passes_M["x"]
    passes_M.loc[passes_M.pass_outcome.notna(), "pass_progression"] = None
    passes_M["shot_assisted_pass"] = (passes_M["pass_succes"] == True) &  (passes_M["shot_outcome_shotFromPass"].notna())
    passes_M["goal_assisted_pass"] = (passes_M["pass_succes"] == True) & (passes_M["shot_outcome_shotFromPass"] == "Goal")
    
    passes_M["cross_pass"] = (passes_M.pass_cross == True)
    passes_M["box_pass"] = (
        ((passes_M.x < 102) | ((passes_M.y < 18) | (passes_M.y > 62))) & ((passes_M.pass_end_x > 102) & ((passes_M.pass_end_y > 18) & (passes_M.pass_end_y < 62)))
    )

    passes_M["rowID"] = passes_M.index

    return passes_M

In [3]:
def foul_data_creation(events_extra_info,match_ID):

    events_df = sb.events(match_id=match_ID)
    events_df = events_df.set_index("match_id").join(
        events_extra_info.set_index("match_id"), how="left", rsuffix='_match'
    ).reset_index()

    # Koordinatları ayır
    events_df[['x', 'y']] = events_df['location'].apply(pd.Series)
    events_df[['pass_end_x', 'pass_end_y']] = events_df['pass_end_location'].apply(pd.Series)

    # Fauller tablosunu oluştur

    existing_columns = set(events_df.columns)
    # Set of columns you want to check
    columns_to_check = {"foul_committed_advantage", "foul_committed_card"}
    # Find which columns exist and which do not
    missing = columns_to_check - existing_columns
    if len(missing) == 0:
        foul_commited = events_df[(events_df.type == "Foul Committed")][
        ["id", "type", "player", "position","team","timestamp","minute", "foul_committed_advantage", "foul_committed_card", "x", "y"]
        ].set_index("id")
    elif len(missing) > 1:
        print("there was no commited advantage nor commited card in this match ")
        foul_commited = events_df[(events_df.type == "Foul Committed")][
            ["id", "type", "player","position","team","timestamp","minute", "x", "y"]]
        foul_commited["foul_committed_advantage_foul_commitedJ"] = None
        foul_commited["foul_committed_card_foul_commitedJ"] = None
        foul_commited = foul_commited.set_index("id")
    elif "foul_committed_card" in missing:
        print("there was no commited card in this match ")
        foul_commited = events_df[(events_df.type == "Foul Committed")][
            ["id", "type", "player","position","team","timestamp","minute", "foul_committed_advantage", "x", "y"]]
        foul_commited["foul_committed_card_foul_commitedJ"] = None
        foul_commited = foul_commited.set_index("id")
    elif "foul_committed_advantage" in missing:
        print("there was no commited advantage in this match ")
        foul_commited = events_df[(events_df.type == "Foul Committed")][
            ["id", "type", "player","position","team","timestamp","minute", "foul_committed_card", "x", "y"]]
        foul_commited["foul_committed_advantage_foul_commitedJ"] = None
        foul_commited = foul_commited.set_index("id")

    
    fouls = events_df[(events_df.type == "Foul Won")].explode("related_events")
    fouls_M = fouls.set_index("related_events").join(foul_commited, how="left", rsuffix='_foul_commitedJ')
    fouls_M = fouls_M[fouls_M["player_foul_commitedJ"].notna()]
    fouls_M["rowID"] = fouls_M.index
    return fouls_M

In [4]:
def network_data_creation(match_pass_data,timestamp_flag=1,minute_flag=1):

    df_network = match_pass_data.copy()
    
    if timestamp_flag==1:
        df_network['timestamp'] = pd.to_datetime(df_network['timestamp'])
   
    if minute_flag==1:
        df_network.loc[df_network['minute'] >= 45, 'timestamp'] += pd.Timedelta(minutes=45)


    df_network = df_network.sort_values(by='timestamp').reset_index(drop=True)

    # Sekans ID ve türleri için listeler
    sequence_id = []
    in_sequence_id = []
    sequence_type = []  # Sekans türleri: 'Goal', 'Shot', 'NoShot'
    sequence_xg = []
    pass_recipient_positions = []

    latest_positions = {}

    current_sequence_id = 0
    current_in_sequence_id = 0
    previous_player_to = None  # Bir önceki pası alan oyuncu

    for i, row in df_network.iterrows():
        # Yeni bir sekans başlatma kriterleri
        
        if (row['player'] != previous_player_to) :
            # Eğer önceki satırda şut sonucu varsa sekans türünü belirle
            if i > 0:
                previous_row = df_network.iloc[i - 1]
                if previous_row.get('shot_outcome_shotFromPass') == 'Goal':
                    sequence_type[-1] = 'Goal'
                    sequence_xg[-1] = previous_row.get('shot_statsbomb_xg_shotFromPass')

                elif not pd.isna(previous_row['player_shotFromPass']) :
                    sequence_type[-1] = 'Shot'
                    sequence_xg[-1] = previous_row.get('shot_statsbomb_xg_shotFromPass')

                else:
                    sequence_type[-1] = 'NoShot'
                    sequence_xg[-1] = 0

            # Yeni bir sekans başlat
            current_sequence_id += 1
            current_in_sequence_id = 0

        current_in_sequence_id+=1
        # Sekans ID'sini ve geçici türü kaydet
        sequence_id.append(current_sequence_id)
        in_sequence_id.append(current_in_sequence_id)
        sequence_type.append('')  # Tür daha sonra doldurulacak
        sequence_xg.append('')
        # Bir sonraki kontrol için 'pass_recipient' değerini güncelle
        previous_player_to = row['pass_recipient']

        #
        latest_positions.update({row['player']:row['position']})
        recipient = row['pass_recipient']
        pass_recipient_positions.append(latest_positions.get(recipient, None))
        

    # Son sekansı kontrol et
    last_row = df_network.iloc[-1]
    if last_row.get('shot_outcome_shotFromPass') == 'Goal':
        sequence_type[-1] = 'Goal'
        sequence_xg[-1] = previous_row.get('shot_statsbomb_xg_shotFromPass')

    elif not pd.isna(last_row['player_shotFromPass']):
        sequence_type[-1] = 'Shot'
        sequence_xg[-1] = previous_row.get('shot_statsbomb_xg_shotFromPass')
    else:
        sequence_type[-1] = 'NoShot'
        sequence_xg[-1] = 0



    # Sekans türlerini tüm satırlara yayma
    df_network['pass_recipient_position'] = pass_recipient_positions

    df_network['SequenceID'] = sequence_id
    df_network['InSequenceID'] = in_sequence_id
    df_network['SequenceType'] = sequence_type
    df_network['SequenceXG'] = sequence_xg
    df_network['SequenceType'] = df_network.groupby('SequenceID')['SequenceType'].transform('last')
    df_network['SequenceXG'] = df_network.groupby('SequenceID')['SequenceXG'].transform('last')
    df_network['SequenceBuildUpXG'] = df_network.groupby('SequenceID')['SequenceXG'].transform(
        lambda x: [0 if idx == len(x) - 1 else val for idx, val in enumerate(x)]
    )
    df_network['time_past'] = df_network.groupby('SequenceID')['timestamp'].transform(lambda x: (x - x.min()).dt.total_seconds())


    # Create a dictionary to keep track of the latest position of each player

    return df_network
# İlgili sütunları gösterme

In [5]:
def event_data_creation(events_extra_info,match_ID):
    
    events_df = sb.events(match_id=match_ID)
    events_df = events_df.set_index("match_id").join(events_extra_info.set_index("match_id"),how="left", rsuffix='_match').reset_index()
    #events_df.set_index("id")

    events_df['timestamp'] = pd.to_datetime(events_df['timestamp'], format='%H:%M:%S.%f')
    events_df.loc[events_df['minute'] >= 45, 'timestamp'] += pd.Timedelta(minutes=45)
    posessions_sorted = events_df[["duration","kick_off","minute","timestamp","possession","possession_team"]].sort_values("timestamp")
    possession_time = posessions_sorted.groupby("possession").apply(
        lambda x: (x["timestamp"].max() - x["timestamp"].min()).total_seconds(),include_groups=False
    ).reset_index(name="possession_time")
    events_df = events_df.set_index("possession").join(possession_time.set_index("possession"),how="left").reset_index()

    players = (
        events_df[events_df["player"].notnull()][["player", "position", "team"]]
        .groupby("player")["position"]
        .agg(lambda x: x.value_counts().idxmax())
        .reset_index()
    )
    events_df = events_df.set_index("player").join(players.set_index("player"),how="left",rsuffix="_most_played").reset_index()

    events_df[['x', 'y']] = events_df['location'].apply(pd.Series)
    events_df[['pass_end_x', 'pass_end_y']] = events_df['pass_end_location'].apply(pd.Series)
    passes = events_df[(events_df.type=="Pass")]
    passes = passes[passes["pass_outcome"] != "Injury Clearance"]
    events_df = events_df[events_df.type!="Pass"]
    shots_fromPass = events_df[(events_df.type=="Shot")][["player","shot_type","shot_key_pass_id","shot_outcome","shot_statsbomb_xg"]]
    passes_M = passes.set_index("id").join(shots_fromPass.set_index("shot_key_pass_id"),how="left",rsuffix='_shotFromPass').reset_index()
    passes_M["pass_succes"] = passes_M.pass_outcome.isna()
    passes_M["f3rd_pass"] = (passes_M.x < 80) & (passes_M.pass_end_x > 80)
    passes_M["pass_progression"] = passes_M["x"] - passes_M["pass_end_x"]
    passes_M.loc[passes_M.pass_outcome.notna(), "pass_progression"] = None
    passes_M["shot_assisted_pass"] = (passes_M.pass_outcome.isna()) &  (passes_M["shot_outcome_shotFromPass"].notna())
    passes_M["goal_assisted_pass"] = (passes_M.pass_outcome.isna()) & (passes_M["shot_outcome_shotFromPass"] == "Goal")
    passes_M["cross"] = (passes_M.pass_cross == True)
    passes_M["box_pass"] = (
        (passes_M.x < 102)
        & ((passes_M.y < 18) | (passes_M.y > 62))
        & (passes_M.pass_end_x > 102)
        & ((passes_M.pass_end_y > 18) | (passes_M.pass_end_y < 62))
    )
    passes_M = passes_M.set_index("pass_recipient").join(players.set_index("player"),how="left",rsuffix="_re_most_played").reset_index()
    passes_M = network_data_creation(passes_M,0,0)
    
     # Set of existing columns in the DataFrame
    existing_columns = set(events_df.columns)
        # Set of columns you want to check
    columns_to_check = {"foul_committed_advantage", "foul_committed_card"}
    # Find which columns exist and which do not
    missing = columns_to_check - existing_columns
    if len(missing) == 0:
        foul_commited = events_df[(events_df.type == "Foul Committed")][
        ["id", "type", "player", "position","team","timestamp","minute", "foul_committed_advantage", "foul_committed_card", "x", "y"]
        ].set_index("id")
    elif len(missing) > 1:
        print("there was no commited advantage nor commited card in this match ")
        foul_commited = events_df[(events_df.type == "Foul Committed")][
            ["id", "type", "player","position","team","timestamp","minute", "x", "y"]]
        foul_commited["foul_committed_advantage_foul_commitedJ"] = None
        foul_commited["foul_committed_card_foul_commitedJ"] = None
        foul_commited = foul_commited.set_index("id")
    elif "foul_committed_card" in missing:
        print("there was no commited card in this match ")
        foul_commited = events_df[(events_df.type == "Foul Committed")][
            ["id", "type", "player","position","team","timestamp","minute", "foul_committed_advantage", "x", "y"]]
        foul_commited["foul_committed_card_foul_commitedJ"] = None
        foul_commited = foul_commited.set_index("id")
    elif "foul_committed_advantage" in missing:
        print("there was no commited advantage in this match ")
        foul_commited = events_df[(events_df.type == "Foul Committed")][
            ["id", "type", "player","position","team","timestamp","minute", "foul_committed_card", "x", "y"]]
        foul_commited["foul_committed_advantage_foul_commitedJ"] = None
        foul_commited = foul_commited.set_index("id")

    
    
    
    
    fouls = events_df[(events_df["type"]=="Foul Won")].explode("related_events")
    events_df = events_df[~((events_df["type"]=="Foul Won") | (events_df["type"]=="Foul Committed"))]
    fouls_M = fouls.set_index("related_events").join(foul_commited,how="left", rsuffix='_foul_commitedJ').reset_index()
    fouls_M = fouls_M[fouls_M["player_foul_commitedJ"].notna()]

    events_df.set_index("id",inplace=True)
    passes_M.set_index("id",inplace=True)
    fouls_M.set_index("id",inplace=True)
    
    events_df = pd.concat([events_df,passes_M,fouls_M])

    return events_df



### Main

In [6]:
passDF = pd.DataFrame()
foulsDF= pd.DataFrame()
eventsDF = pd.DataFrame()

# # Lig ve sezon bilgileri
# competitions_and_seasons = [
#     (11, 27, "2015/16 La Liga"),
#     (9, 27, "2015/16 Bundesliga"),
#     (2, 27, "2015/16 Premier League"),
#     (12, 27, "2015/16 Serie A"),
#     (7, 27, "2015/16 Ligue 1"),
#     (43, 106, "FIFA World Cup 22"),
#     (43, 3, "FIFA World Cup 18"),
#     (55, 282, "Euro 2024"),
#     (55, 43, "Euro 2020")
# ]




laliga = (11, 27, "2015/16 La Liga")
bundesliga = (9, 27, "2015/16 Bundesliga")
premierleague = (2, 27, "2015/16 Premier League")
seriea = (12, 27, "2015/16 Serie A")
ligue1 = (7, 27, "2015/16 Ligue 1")
worldcup22 = (43, 106, "FIFA World Cup 22")
worldcup18 = (43, 3, "FIFA World Cup 18")
euru24 = (55, 282, "Euro 2024")
euro20 = (55, 43, "Euro 2020")

In [15]:
con = duckdb.connect("big5.db")

In [7]:
import warnings
warnings.filterwarnings('ignore')

#for competition, season, competitions_name in competitions_and_seasons:
    # Maçları al
big5 = [laliga,bundesliga,premierleague,seriea,ligue1]

for league in big5:
    competition, season, competitions_name = league
    matches = sb.matches(competition_id=competition, season_id=season)

    events_extra_info = matches[[
        "match_id",
        "match_date",
        "kick_off",
        "competition",
        "home_team",
        "away_team",
        "home_score",
        "away_score",
        "match_status",
        "match_week",
        "competition_stage",
        "referee",
        "home_managers",
        "away_managers",
    ]]

    matchCount = matches.shape[0]
    counter = 0


    for match_ID in matches["match_id"].unique():
        counter += 1
        info = events_extra_info[events_extra_info["match_id"]==match_ID]
        print(f"{counter}/{matchCount} | league: {info['competition'].values[0]}, match id {match_ID}, {info['home_team'].values[0]}-{info['away_team'].values[0]}")
        #passData = pass_data_creation(events_extra_info,match_ID)
        #foulData = foul_data_creation(events_extra_info,match_ID)
        #passDF = pd.concat([passDF, passData], axis=0, ignore_index=True)
        #foulsDF = pd.concat([foulsDF, foulData], axis=0, ignore_index=True)

        events = event_data_creation(events_extra_info,match_ID)
        eventsDF = pd.concat([eventsDF,events], axis=0, ignore_index=True)
    

1/380 | league: Spain - La Liga, match id 3825848, Levante UD-Eibar
2/380 | league: Spain - La Liga, match id 3825895, Las Palmas-Sevilla
3/380 | league: Spain - La Liga, match id 3825894, RC Deportivo La Coruña-Getafe
4/380 | league: Spain - La Liga, match id 3825855, Málaga-Levante UD
5/380 | league: Spain - La Liga, match id 3825908, Espanyol-Eibar
6/380 | league: Spain - La Liga, match id 3825883, Málaga-Las Palmas
7/380 | league: Spain - La Liga, match id 3825900, Sporting Gijón-Villarreal
8/380 | league: Spain - La Liga, match id 3825902, Rayo Vallecano-Levante UD
9/380 | league: Spain - La Liga, match id 3825876, Real Betis-Getafe
10/380 | league: Spain - La Liga, match id 3825846, RC Deportivo La Coruña-Real Madrid
11/380 | league: Spain - La Liga, match id 3825869, Athletic Club-Sevilla
12/380 | league: Spain - La Liga, match id 3825882, Atlético Madrid-Celta Vigo
13/380 | league: Spain - La Liga, match id 3825849, Valencia-Real Sociedad
14/380 | league: Spain - La Liga, match

KeyboardInterrupt: 

In [None]:
foulsDF=eventsDF[eventsDF["type"]== "Foul Won"]


columns = [
    "competition",
    "home_team",
    "away_team",
    "referee",
    "x",
    "y",
    "player",
    "position",
    "possession_team",
    "type_foul_commitedJ",
    "player_foul_commitedJ",
    "position_foul_commitedJ",
    "team_foul_commitedJ",
    "related_events",
    "foul_committed_advantage_foul_commitedJ",
    "x_foul_commitedJ",
    "y_foul_commitedJ",
    "foul_committed_card_foul_commitedJ",
]


foulsDF[columns].tail(10)

In [10]:
eventsDF['competition'].unique()

array(['Spain - La Liga', 'Germany - 1. Bundesliga',
       'England - Premier League'], dtype=object)

In [11]:
bundesliga = eventsDF[eventsDF['competition']=="Germany - 1. Bundesliga"] 
laliga = eventsDF[eventsDF['competition']=="Spain - La Liga"] 

In [12]:

# Connect to a DuckDB database (or create one if it doesn't exist)
# Use ':memory:' for an in-memory database or specify a filename for a persistent database
con = duckdb.connect("laliga.db")



In [14]:
laliga.tail()

Unnamed: 0,player,possession,match_id,bad_behaviour_card,ball_receipt_outcome,ball_recovery_recovery_failure,block_deflection,carry_end_location,clearance_aerial_won,clearance_body_part,...,shot_saved_off_target,shot_redirect,goalkeeper_lost_out,goalkeeper_lost_in_play,goalkeeper_shot_saved_to_post,shot_saved_to_post,shot_follows_dribble,goalkeeper_success_out,half_end_early_video_end,half_start_late_video_start
1282665,Jordi Alba Ramos,147,266557,,,,,,,,...,,,,,,,,,,
1282666,Rodrigo Moreno Machado,155,266557,,,,,,,,...,,,,,,,,,,
1282667,Sergio Busquets i Burgos,158,266557,,,,,,,,...,,,,,,,,,,
1282668,João Pedro Cavaco Cancelo,169,266557,,,,,,,,...,,,,,,,,,,
1282669,Neymar da Silva Santos Junior,177,266557,,,,,,,,...,,,,,,,,,,


In [18]:
# Tüm sütunlarda boş stringleri kontrol et
problem_columns = []
for col in laliga.columns:
    if (eventsDF[col] == '').any():  # Eğer sütun boş string içeriyorsa
        problem_columns.append(col)

print(f"Boş string içeren sütunlar: {problem_columns}")


Boş string içeren sütunlar: []


In [16]:
import numpy as np

# Tespit edilen sütunlarda boş stringleri NaN ile değiştir
for col in problem_columns:
    eventsDF[col] = eventsDF[col].replace('', np.nan)


In [19]:
# Write the Pandas DataFrame to the DuckDB database
# This creates a table named 'my_table'
con.execute("CREATE TABLE events AS SELECT * FROM laliga")




InvalidInputException: Invalid Input Error: Failed to cast value: Could not convert string '' to DOUBLE

In [None]:
# You can also use the `from_df` function for direct insertion:
# con.from_df(df, "my_table")

# Verify the data was written to the database
result = con.execute("SELECT * FROM events").fetchall()
print(result)

# Close the connection

In [78]:
data = con.execute("SELECT * FROM events").df()
data.tail(5)

Unnamed: 0,player,possession,match_id,bad_behaviour_card,ball_receipt_outcome,ball_recovery_recovery_failure,block_deflection,carry_end_location,clearance_aerial_won,clearance_body_part,...,goalkeeper_shot_saved_off_target,shot_saved_off_target,shot_redirect,goalkeeper_lost_out,goalkeeper_lost_in_play,goalkeeper_shot_saved_to_post,shot_saved_to_post,shot_follows_dribble,goalkeeper_success_out,half_end_early_video_end
1282665,Jordi Alba Ramos,147,266557,,,,,,,,...,,,,,,,,,,
1282666,Rodrigo Moreno Machado,155,266557,,,,,,,,...,,,,,,,,,,
1282667,Sergio Busquets i Burgos,158,266557,,,,,,,,...,,,,,,,,,,
1282668,João Pedro Cavaco Cancelo,169,266557,,,,,,,,...,,,,,,,,,,
1282669,Neymar da Silva Santos Junior,177,266557,,,,,,,,...,,,,,,,,,,


In [12]:
data.shape

(1059472, 162)

In [20]:
con.execute("DROP TABLE events")


CatalogException: Catalog Error: Table with name events does not exist!
Did you mean "pg_views"?

In [79]:
# Close the connection
con.close()



### Test


In [53]:
matches = sb.matches(competition_id=11, season_id=27)

events_extra_info = matches[[
    "match_id",
    "match_date",
    "kick_off",
    "competition",
    "home_team",
    "away_team",
    "home_score",
    "away_score",
    "match_status",
    "match_week",
    "competition_stage",
    "referee",
    "home_managers",
    "away_managers",
]]
events_df = sb.events(match_id=3825895)
events = event_data_creation(events_extra_info,3825895)




In [54]:


columnsp = [
    "match_id",
    "competition",
    "home_team",
    "home_score",
    "away_team",
    "away_score",
    "referee",
    "team",
    "possession",
    "possession_time",
    "minute",
    "timestamp",
    "player",
    "position",
    "position_most_played",
    "x",
    "y",
    "pass_end_x",
    "pass_end_y",
    "pass_length",
    "pass_recipient",
    "pass_recipient_id",
    "position_re_most_played",
    "pass_recipient_position",
    "player_shotFromPass",
    "shot_type_shotFromPass",
    "shot_outcome_shotFromPass",
    "shot_statsbomb_xg_shotFromPass",
    'SequenceXG',
    "SequenceType",
    "SequenceBuildUpXG",
    "pass_succes",
    "pass_outcome",
]

pass_data=events[events["type"]== "Pass"][columnsp]
pass_data


Unnamed: 0_level_0,match_id,competition,home_team,home_score,away_team,away_score,referee,team,possession,possession_time,...,pass_recipient_position,player_shotFromPass,shot_type_shotFromPass,shot_outcome_shotFromPass,shot_statsbomb_xg_shotFromPass,SequenceXG,SequenceType,SequenceBuildUpXG,pass_succes,pass_outcome
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0d4f8d2d-ef7c-447b-a3a0-b50c67e40502,3825895,Spain - La Liga,Las Palmas,2,Sevilla,0,Carlos del Cerro Grande,Sevilla,2,9.047,...,,,,,,0,NoShot,0.000000,True,
2a791f4f-2fdd-4ca1-90d3-98bafc6f035b,3825895,Spain - La Liga,Las Palmas,2,Sevilla,0,Carlos del Cerro Grande,Sevilla,2,9.047,...,,,,,,0,NoShot,0.000000,True,
d382050a-3d28-436a-88be-9115b21ceb3a,3825895,Spain - La Liga,Las Palmas,2,Sevilla,0,Carlos del Cerro Grande,Sevilla,2,9.047,...,,,,,,0,NoShot,0.000000,True,
675a4374-71ba-457f-bed5-2af2936e166b,3825895,Spain - La Liga,Las Palmas,2,Sevilla,0,Carlos del Cerro Grande,Sevilla,2,9.047,...,,,,,,0,NoShot,0.000000,False,Pass Offside
72496900-3aa7-4fe3-a76e-8de7fa9c432a,3825895,Spain - La Liga,Las Palmas,2,Sevilla,0,Carlos del Cerro Grande,Las Palmas,3,5.686,...,,,,,,0.019472,Shot,0.019472,True,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5a525344-3f0a-48ba-94b8-08f67a7b82ce,3825895,Spain - La Liga,Las Palmas,2,Sevilla,0,Carlos del Cerro Grande,Las Palmas,199,51.766,...,,,,,,0,NoShot,0.000000,False,Incomplete
1c183033-971b-4086-a00b-29d1a17dea09,3825895,Spain - La Liga,Las Palmas,2,Sevilla,0,Carlos del Cerro Grande,Sevilla,199,51.766,...,,,,,,0,NoShot,0.000000,False,Incomplete
f733f7cf-51b1-4328-9aa5-b64f9e0e7232,3825895,Spain - La Liga,Las Palmas,2,Sevilla,0,Carlos del Cerro Grande,Sevilla,199,51.766,...,Left Center Back,,,,,0,NoShot,0.000000,True,
0e07c5ab-6573-4b8e-8756-7bcc12d4ec70,3825895,Spain - La Liga,Las Palmas,2,Sevilla,0,Carlos del Cerro Grande,Sevilla,199,51.766,...,Left Back,,,,,0,NoShot,0.000000,True,


In [44]:
pass_data['shot_outcome_shotFromPass'].unique()

array([nan, 'Saved', 'Off T', 'Goal', 'Blocked'], dtype=object)

In [34]:
pass_data['player_shotFromPass'].isna()

id
0d4f8d2d-ef7c-447b-a3a0-b50c67e40502    True
2a791f4f-2fdd-4ca1-90d3-98bafc6f035b    True
d382050a-3d28-436a-88be-9115b21ceb3a    True
675a4374-71ba-457f-bed5-2af2936e166b    True
72496900-3aa7-4fe3-a76e-8de7fa9c432a    True
                                        ... 
5a525344-3f0a-48ba-94b8-08f67a7b82ce    True
1c183033-971b-4086-a00b-29d1a17dea09    True
f733f7cf-51b1-4328-9aa5-b64f9e0e7232    True
0e07c5ab-6573-4b8e-8756-7bcc12d4ec70    True
6e98f50d-55bd-4ea5-ad62-7d0aaf521390    True
Name: player_shotFromPass, Length: 897, dtype: bool

In [51]:
test = pass_data['player_shotFromPass'].iloc[0]
not pd.isna(test)

False