In [3]:
import duckdb
import pandas as pd
from statsbombpy import sb

# DuckDB bağlantısı
#con = duckdb.connect("bitirme.db")


In [4]:
def pass_data_creation(events_extra_info,match_ID):
    
    events_df = sb.events(match_id=match_ID)
    events_df = events_df.set_index("match_id").join(
        events_extra_info.set_index("match_id"), how="left", rsuffix='_match'
    ).reset_index()

    # Koordinatları ayır
    events_df[['x', 'y']] = events_df['location'].apply(pd.Series)
    events_df[['pass_end_x', 'pass_end_y']] = events_df['pass_end_location'].apply(pd.Series)

    # Paslar tablosunu oluştur
    passes = events_df[(events_df.type == "Pass")]
    passes = passes[passes["pass_outcome"] != "Injury Clearance"]
    shots = events_df[(events_df.type == "Shot")][["player","shot_type","shot_key_pass_id","shot_outcome","shot_statsbomb_xg"]].dropna()
    passes_M = passes.set_index("id").join(
        shots.set_index("shot_key_pass_id"), how="left", rsuffix='_shotFromPass'
    )
    passes_M = passes_M[passes_M["pass_outcome"] != "Injury Clearance"]

    passes_M["pass_succes"] = passes_M.pass_outcome.isna()
    passes_M["f3rd_pass"] = (passes_M.x < 80) & (passes_M.pass_end_x > 80)
    passes_M["pass_progression"] = passes_M["pass_end_x"] - passes_M["x"]
    passes_M.loc[passes_M.pass_outcome.notna(), "pass_progression"] = None
    passes_M["shot_assisted_pass"] = (passes_M["pass_succes"] == True) &  (passes_M["shot_outcome_shotFromPass"].notna())
    passes_M["goal_assisted_pass"] = (passes_M["pass_succes"] == True) & (passes_M["shot_outcome_shotFromPass"] == "Goal")
    
    passes_M["cross_pass"] = (passes_M.pass_cross == True)
    passes_M["box_pass"] = (
        ((passes_M.x < 102) | ((passes_M.y < 18) | (passes_M.y > 62))) & ((passes_M.pass_end_x > 102) & ((passes_M.pass_end_y > 18) & (passes_M.pass_end_y < 62)))
    )

    passes_M["rowID"] = passes_M.index

    return passes_M

In [5]:
def foul_data_creation(events_extra_info,match_ID):

    events_df = sb.events(match_id=match_ID)
    events_df = events_df.set_index("match_id").join(
        events_extra_info.set_index("match_id"), how="left", rsuffix='_match'
    ).reset_index()

    # Koordinatları ayır
    events_df[['x', 'y']] = events_df['location'].apply(pd.Series)
    events_df[['pass_end_x', 'pass_end_y']] = events_df['pass_end_location'].apply(pd.Series)

    # Fauller tablosunu oluştur

    existing_columns = set(events_df.columns)
    # Set of columns you want to check
    columns_to_check = {"foul_committed_advantage", "foul_committed_card"}
    # Find which columns exist and which do not
    missing = columns_to_check - existing_columns
    if len(missing) == 0:
        foul_commited = events_df[(events_df.type == "Foul Committed")][
        ["id", "type", "player", "position","team","timestamp","minute", "foul_committed_advantage", "foul_committed_card", "x", "y"]
        ].set_index("id")
    elif len(missing) > 1:
        print("there was no commited advantage nor commited card in this match ")
        foul_commited = events_df[(events_df.type == "Foul Committed")][
            ["id", "type", "player","position","team","timestamp","minute", "x", "y"]]
        foul_commited["foul_committed_advantage_foul_commitedJ"] = None
        foul_commited["foul_committed_card_foul_commitedJ"] = None
        foul_commited = foul_commited.set_index("id")
    elif "foul_committed_card" in missing:
        print("there was no commited card in this match ")
        foul_commited = events_df[(events_df.type == "Foul Committed")][
            ["id", "type", "player","position","team","timestamp","minute", "foul_committed_advantage", "x", "y"]]
        foul_commited["foul_committed_card_foul_commitedJ"] = None
        foul_commited = foul_commited.set_index("id")
    elif "foul_committed_advantage" in missing:
        print("there was no commited advantage in this match ")
        foul_commited = events_df[(events_df.type == "Foul Committed")][
            ["id", "type", "player","position","team","timestamp","minute", "foul_committed_card", "x", "y"]]
        foul_commited["foul_committed_advantage_foul_commitedJ"] = None
        foul_commited = foul_commited.set_index("id")

    
    fouls = events_df[(events_df.type == "Foul Won")].explode("related_events")
    fouls_M = fouls.set_index("related_events").join(foul_commited, how="left", rsuffix='_foul_commitedJ')
    fouls_M = fouls_M[fouls_M["player_foul_commitedJ"].notna()]
    fouls_M["rowID"] = fouls_M.index
    return fouls_M

In [6]:
def network_data_creation(match_pass_data,timestamp_flag=1,minute_flag=1):

    df_network = match_pass_data.copy()
    
    if timestamp_flag==1:
        df_network['timestamp'] = pd.to_datetime(df_network['timestamp'])
   
    if minute_flag==1:
        df_network.loc[df_network['minute'] >= 45, 'timestamp'] += pd.Timedelta(minutes=45)


    df_network = df_network.sort_values(by='timestamp').reset_index(drop=True)

    # Sekans ID ve türleri için listeler
    sequence_id = []
    in_sequence_id = []
    sequence_type = []  # Sekans türleri: 'Goal', 'Shot', 'NoShot'
    sequence_xg = []
    pass_recipient_positions = []

    latest_positions = {}

    current_sequence_id = 0
    current_in_sequence_id = 0
    previous_player_to = None  # Bir önceki pası alan oyuncu

    for i, row in df_network.iterrows():
        # Yeni bir sekans başlatma kriterleri
        
        if (row['player'] != previous_player_to) :
            # Eğer önceki satırda şut sonucu varsa sekans türünü belirle
            if i > 0:
                previous_row = df_network.iloc[i - 1]
                if previous_row.get('shot_outcome_shotFromPass') == 'Goal':
                    sequence_type[-1] = 'Goal'
                    sequence_xg[-1] = previous_row.get('shot_statsbomb_xg_shotFromPass')

                elif previous_row['player_shotFromPass'] is not None:
                    sequence_type[-1] = 'Shot'
                    sequence_xg[-1] = previous_row.get('shot_statsbomb_xg_shotFromPass')

                else:
                    sequence_type[-1] = 'NoShot'
                    sequence_xg[-1] = 0

            # Yeni bir sekans başlat
            current_sequence_id += 1
            current_in_sequence_id = 0

        current_in_sequence_id+=1
        # Sekans ID'sini ve geçici türü kaydet
        sequence_id.append(current_sequence_id)
        in_sequence_id.append(current_in_sequence_id)
        sequence_type.append('')  # Tür daha sonra doldurulacak
        sequence_xg.append('')
        # Bir sonraki kontrol için 'pass_recipient' değerini güncelle
        previous_player_to = row['pass_recipient']

        #
        latest_positions.update({row['player']:row['position']})
        recipient = row['pass_recipient']
        pass_recipient_positions.append(latest_positions.get(recipient, None))
        

    # Son sekansı kontrol et
    last_row = df_network.iloc[-1]
    if last_row.get('shot_outcome_shotFromPass') == 'goal':
        sequence_type[-1] = 'Goal'
        sequence_xg[-1] = previous_row.get('shot_statsbomb_xg_shotFromPass')

    elif last_row['player_shotFromPass'] is not None:
        sequence_type[-1] = 'Shot'
        sequence_xg[-1] = previous_row.get('shot_statsbomb_xg_shotFromPass')
    else:
        sequence_type[-1] = 'NoShot'
        sequence_xg[-1] = 0



    # Sekans türlerini tüm satırlara yayma
    df_network['pass_recipient_position'] = pass_recipient_positions

    df_network['SequenceID'] = sequence_id
    df_network['InSequenceID'] = in_sequence_id
    df_network['SequenceType'] = sequence_type
    df_network['SequenceXG'] = sequence_xg
    df_network['SequenceType'] = df_network.groupby('SequenceID')['SequenceType'].transform('last')
    df_network['SequenceXG'] = df_network.groupby('SequenceID')['SequenceXG'].transform('last')
    df_network['SequenceBuildUpXG'] = df_network.groupby('SequenceID')['SequenceXG'].transform(
        lambda x: [0 if idx == len(x) - 1 else val for idx, val in enumerate(x)]
    )
    df_network['time_past'] = df_network.groupby('SequenceID')['timestamp'].transform(lambda x: (x - x.min()).dt.total_seconds())


    # Create a dictionary to keep track of the latest position of each player

    return df_network
# İlgili sütunları gösterme

In [9]:
def event_data_creation(events_extra_info,match_ID):
    
    events_df = sb.events(match_id=match_ID)
    events_df = events_df.set_index("match_id").join(events_extra_info.set_index("match_id"),how="left", rsuffix='_match').reset_index()
    #events_df.set_index("id")

    events_df['timestamp'] = pd.to_datetime(events_df['timestamp'], format='%H:%M:%S.%f')
    events_df.loc[events_df['minute'] >= 45, 'timestamp'] += pd.Timedelta(minutes=45)
    posessions_sorted = events_df[["duration","kick_off","minute","timestamp","possession","possession_team"]].sort_values("timestamp")
    possession_time = posessions_sorted.groupby("possession").apply(
        lambda x: (x["timestamp"].max() - x["timestamp"].min()).total_seconds(),include_groups=False
    ).reset_index(name="possession_time")
    events_df = events_df.set_index("possession").join(possession_time.set_index("possession"),how="left").reset_index()

    players = (
        events_df[events_df["player"].notnull()][["player", "position", "team"]]
        .groupby("player")["position"]
        .agg(lambda x: x.value_counts().idxmax())
        .reset_index()
    )
    events_df = events_df.set_index("player").join(players.set_index("player"),how="left",rsuffix="_most_played").reset_index()

    events_df[['x', 'y']] = events_df['location'].apply(pd.Series)
    events_df[['pass_end_x', 'pass_end_y']] = events_df['pass_end_location'].apply(pd.Series)
    passes = events_df[(events_df.type=="Pass")]
    passes = passes[passes["pass_outcome"] != "Injury Clearance"]
    events_df = events_df[events_df.type!="Pass"]
    shots_fromPass = events_df[(events_df.type=="Shot")][["player","shot_type","shot_key_pass_id","shot_outcome","shot_statsbomb_xg"]]
    passes_M = passes.set_index("id").join(shots_fromPass.set_index("shot_key_pass_id"),how="left",rsuffix='_shotFromPass').reset_index()
    passes_M["pass_succes"] = passes_M.pass_outcome.isna()
    passes_M["f3rd_pass"] = (passes_M.x < 80) & (passes_M.pass_end_x > 80)
    passes_M["pass_progression"] = passes_M["x"] - passes_M["pass_end_x"]
    passes_M.loc[passes_M.pass_outcome.notna(), "pass_progression"] = None
    passes_M["shot_assisted_pass"] = (passes_M.pass_outcome.isna()) &  (passes_M["shot_outcome_shotFromPass"].notna())
    passes_M["goal_assisted_pass"] = (passes_M.pass_outcome.isna()) & (passes_M["shot_outcome_shotFromPass"] == "Goal")
    passes_M["cross"] = (passes_M.pass_cross == True)
    passes_M["box_pass"] = (
        (passes_M.x < 102)
        & ((passes_M.y < 18) | (passes_M.y > 62))
        & (passes_M.pass_end_x > 102)
        & ((passes_M.pass_end_y > 18) | (passes_M.pass_end_y < 62))
    )
    passes_M = passes_M.set_index("pass_recipient").join(players.set_index("player"),how="left",rsuffix="_re_most_played").reset_index()
    passes_M = network_data_creation(passes_M,0,0)
    
     # Set of existing columns in the DataFrame
    existing_columns = set(events_df.columns)
        # Set of columns you want to check
    columns_to_check = {"foul_committed_advantage", "foul_committed_card"}
    # Find which columns exist and which do not
    missing = columns_to_check - existing_columns
    if len(missing) == 0:
        foul_commited = events_df[(events_df.type == "Foul Committed")][
        ["id", "type", "player", "position","team","timestamp","minute", "foul_committed_advantage", "foul_committed_card", "x", "y"]
        ].set_index("id")
    elif len(missing) > 1:
        print("there was no commited advantage nor commited card in this match ")
        foul_commited = events_df[(events_df.type == "Foul Committed")][
            ["id", "type", "player","position","team","timestamp","minute", "x", "y"]]
        foul_commited["foul_committed_advantage_foul_commitedJ"] = None
        foul_commited["foul_committed_card_foul_commitedJ"] = None
        foul_commited = foul_commited.set_index("id")
    elif "foul_committed_card" in missing:
        print("there was no commited card in this match ")
        foul_commited = events_df[(events_df.type == "Foul Committed")][
            ["id", "type", "player","position","team","timestamp","minute", "foul_committed_advantage", "x", "y"]]
        foul_commited["foul_committed_card_foul_commitedJ"] = None
        foul_commited = foul_commited.set_index("id")
    elif "foul_committed_advantage" in missing:
        print("there was no commited advantage in this match ")
        foul_commited = events_df[(events_df.type == "Foul Committed")][
            ["id", "type", "player","position","team","timestamp","minute", "foul_committed_card", "x", "y"]]
        foul_commited["foul_committed_advantage_foul_commitedJ"] = None
        foul_commited = foul_commited.set_index("id")

    
    
    
    
    fouls = events_df[(events_df["type"]=="Foul Won")].explode("related_events")
    events_df = events_df[~((events_df["type"]=="Foul Won") | (events_df["type"]=="Foul Committed"))]
    fouls_M = fouls.set_index("related_events").join(foul_commited,how="left", rsuffix='_foul_commitedJ').reset_index()
    fouls_M = fouls_M[fouls_M["player_foul_commitedJ"].notna()]

    events_df.set_index("id",inplace=True)
    passes_M.set_index("id",inplace=True)
    fouls_M.set_index("id",inplace=True)
    
    events_df = pd.concat([events_df,passes_M,fouls_M])

    return events_df



In [8]:
passDF = pd.DataFrame()
foulsDF= pd.DataFrame()
eventsDF = pd.DataFrame()

# # Lig ve sezon bilgileri
# competitions_and_seasons = [
#     (11, 27, "2015/16 La Liga"),
#     (9, 27, "2015/16 Bundesliga"),
#     (2, 27, "2015/16 Premier League"),
#     (12, 27, "2015/16 Serie A"),
#     (7, 27, "2015/16 Ligue 1"),
#     (43, 106, "FIFA World Cup 22"),
#     (43, 3, "FIFA World Cup 18"),
#     (55, 282, "Euro 2024"),
#     (55, 43, "Euro 2020")
# ]




laliga = (11, 27, "2015/16 La Liga")
bundesliga = (9, 27, "2015/16 Bundesliga")
premierleague = (2, 27, "2015/16 Premier League")
seriea = (12, 27, "2015/16 Serie A")
ligue1 = (7, 27, "2015/16 Ligue 1")
worldcup22 = (43, 106, "FIFA World Cup 22")
worldcup18 = (43, 3, "FIFA World Cup 18")
euru24 = (55, 282, "Euro 2024")
euro20 = (55, 43, "Euro 2020")

In [17]:


#for competition, season, competitions_name in competitions_and_seasons:
    # Maçları al

competition, season, competitions_name = laliga
matches = sb.matches(competition_id=competition, season_id=season)

events_extra_info = matches[[
    "match_id",
    "match_date",
    "kick_off",
    "competition",
    "home_team",
    "away_team",
    "home_score",
    "away_score",
    "match_status",
    "match_week",
    "competition_stage",
    "referee",
    "home_managers",
    "away_managers",
]]

matchCount = matches.shape[0]
counter = 0


for match_ID in matches["match_id"].unique():
    counter += 1
    info = events_extra_info[events_extra_info["match_id"]==match_ID]
    print(f"{counter}/{matchCount} | league: {info['competition'].values[0]}, match id {match_ID}, {info['home_team'].values[0]}-{info['away_team'].values[0]}")
    #passData = pass_data_creation(events_extra_info,match_ID)
    #foulData = foul_data_creation(events_extra_info,match_ID)
    #passDF = pd.concat([passDF, passData], axis=0, ignore_index=True)
    #foulsDF = pd.concat([foulsDF, foulData], axis=0, ignore_index=True)

    events = event_data_creation(events_extra_info,match_ID)
    eventsDF = pd.concat([eventsDF,events], axis=0, ignore_index=True)
    

363/380 | league: Spain - La Liga, match id 266815, Sporting Gijón-Barcelona




364/380 | league: Spain - La Liga, match id 267506, Granada-Barcelona




365/380 | league: Spain - La Liga, match id 266664, Real Sociedad-Barcelona




366/380 | league: Spain - La Liga, match id 266106, Villarreal-Barcelona




367/380 | league: Spain - La Liga, match id 266670, Espanyol-Barcelona




368/380 | league: Spain - La Liga, match id 266149, Barcelona-Athletic Club




369/380 | league: Spain - La Liga, match id 266620, Barcelona-Granada




370/380 | league: Spain - La Liga, match id 266056, Eibar-Barcelona




371/380 | league: Spain - La Liga, match id 267327, Barcelona-Real Betis




372/380 | league: Spain - La Liga, match id 266166, Atlético Madrid-Barcelona




373/380 | league: Spain - La Liga, match id 266160, Levante UD-Barcelona




374/380 | league: Spain - La Liga, match id 265839, Barcelona-Sevilla




375/380 | league: Spain - La Liga, match id 266424, Real Madrid-Barcelona




376/380 | league: Spain - La Liga, match id 266467, Celta Vigo-Barcelona




377/380 | league: Spain - La Liga, match id 267273, Las Palmas-Barcelona




378/380 | league: Spain - La Liga, match id 266490, Barcelona-Levante UD




379/380 | league: Spain - La Liga, match id 266254, Barcelona-Celta Vigo




380/380 | league: Spain - La Liga, match id 266557, Barcelona-Valencia




there was no commited advantage in this match 


In [18]:
foulsDF=eventsDF[eventsDF["type"]== "Foul Won"]


columns = [
    "competition",
    "home_team",
    "away_team",
    "referee",
    "x",
    "y",
    "player",
    "position",
    "possession_team",
    "type_foul_commitedJ",
    "player_foul_commitedJ",
    "position_foul_commitedJ",
    "team_foul_commitedJ",
    "related_events",
    "foul_committed_advantage_foul_commitedJ",
    "x_foul_commitedJ",
    "y_foul_commitedJ",
    "foul_committed_card_foul_commitedJ",
]


foulsDF[columns].head(10)

Unnamed: 0,competition,home_team,away_team,referee,x,y,player,position,possession_team,type_foul_commitedJ,player_foul_commitedJ,position_foul_commitedJ,team_foul_commitedJ,related_events,foul_committed_advantage_foul_commitedJ,x_foul_commitedJ,y_foul_commitedJ,foul_committed_card_foul_commitedJ
2540,Spain - La Liga,Levante UD,Eibar,,6.3,44.2,Zouhair Feddal,Center Back,Eibar,Foul Committed,Borja González Tomás,Center Forward,Eibar,ff9a47b4-1a6d-44c7-ba3e-c51ee74831cc,,113.8,35.9,
2541,Spain - La Liga,Levante UD,Eibar,,57.8,67.3,Daniel García Carrillo,Left Defensive Midfield,Eibar,Foul Committed,Víctor Casadesús Castaño,Left Center Forward,Levante UD,ed724098-9d33-48a8-a7c2-2d742d2b05ca,,62.3,12.8,
2542,Spain - La Liga,Levante UD,Eibar,,40.5,32.3,David Juncà Reñé,Left Back,Eibar,Foul Committed,Nabil Ghilas,Right Center Forward,Levante UD,1d9c8d0e-aa35-4fcb-ad54-359f1cbf4ad6,,79.6,47.8,
2543,Spain - La Liga,Levante UD,Eibar,,54.2,54.5,Víctor Casadesús Castaño,Left Center Forward,Levante UD,Foul Committed,Gonzalo Escalante,Right Defensive Midfield,Eibar,e5962e06-bb4d-4008-9885-ed0e3db06c4b,,65.9,25.6,
2544,Spain - La Liga,Levante UD,Eibar,,43.7,56.0,José María Martín-Bejarano Serrano,Right Center Midfield,Levante UD,Foul Committed,Takashi Inui,Left Wing,Eibar,a1f1fdee-0358-4d0e-a328-4dc0be869f26,,76.4,24.1,
2545,Spain - La Liga,Levante UD,Eibar,,50.3,14.7,José María Martín-Bejarano Serrano,Right Center Midfield,Eibar,Foul Committed,Borja González Tomás,Center Forward,Eibar,972d8d75-eafa-463d-b9b0-5e86d14502b9,,69.8,65.4,
2546,Spain - La Liga,Levante UD,Eibar,,74.5,71.0,Sergio Gontán Gallardo,Right Wing,Eibar,Foul Committed,José María Martín-Bejarano Serrano,Right Center Midfield,Levante UD,c7c1d113-ea5f-4c55-bde7-135888ea55e6,,45.6,9.1,
2547,Spain - La Liga,Levante UD,Eibar,,67.2,24.8,Adrián González Morales,Center Attacking Midfield,Eibar,Foul Committed,José María Martín-Bejarano Serrano,Right Center Midfield,Levante UD,94bd2b4b-24b0-43e7-93e4-91a4cfb576da,,52.9,55.3,
2548,Spain - La Liga,Levante UD,Eibar,,68.3,74.6,José Luis Morales Nogales,Right Wing Back,Levante UD,Foul Committed,Takashi Inui,Left Wing,Eibar,3017cde7-ea96-4d70-8d00-b859d19a3c8a,,51.8,5.5,
2549,Spain - La Liga,Levante UD,Eibar,,51.6,41.4,José María Martín-Bejarano Serrano,Right Center Midfield,Levante UD,Foul Committed,Borja González Tomás,Center Forward,Eibar,0c70671c-b627-4735-aa92-7a9c7b883eae,,68.5,38.7,


In [19]:
passDF=eventsDF[eventsDF["type"]== "Pass"]

passDF[[
    "match_id",
    "competition",
    "home_team",
    "away_team",
    "referee",
    "player",
    "x",
    "y",
    "pass_end_x",
    "pass_end_y",
    "player_shotFromPass",
    "shot_outcome_shotFromPass",
    "shot_statsbomb_xg_shotFromPass",
    "pass_succes",
    "f3rd_pass",
    "pass_progression",
    "shot_assisted_pass",
    "goal_assisted_pass",
    "box_pass"
]][passDF["shot_outcome_shotFromPass"]=="Goal"]

Unnamed: 0,match_id,competition,home_team,away_team,referee,player,x,y,pass_end_x,pass_end_y,player_shotFromPass,shot_outcome_shotFromPass,shot_statsbomb_xg_shotFromPass,pass_succes,f3rd_pass,pass_progression,shot_assisted_pass,goal_assisted_pass,box_pass
1717,3825848,Spain - La Liga,Levante UD,Eibar,,Mauro Javier Dos Santos,116.2,24.6,115.1,39.5,Borja González Tomás,Goal,0.394774,True,False,1.1,True,False,False
2093,3825848,Spain - La Liga,Levante UD,Eibar,,Takashi Inui,114.8,26.0,108.5,35.6,Borja González Tomás,Goal,0.116855,True,False,6.3,True,False,False
2126,3825848,Spain - La Liga,Levante UD,Eibar,,Víctor Camarasa Ferrando,65.4,41.2,81.0,72.1,José Luis Morales Nogales,Goal,0.091734,True,True,-15.6,True,False,False
2537,3825848,Spain - La Liga,Levante UD,Eibar,,José Luis Morales Nogales,87.9,71.4,106.2,37.3,Deyverson Brum Silva Acosta,Goal,0.070940,True,False,-18.3,True,False,True
5062,3825895,Spain - La Liga,Las Palmas,Sevilla,Carlos del Cerro Grande,Sergio Ezequiel Araújo,93.4,32.1,104.9,51.5,Roque Mesa Quevedo,Goal,0.191248,True,False,-11.5,True,False,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1279268,266254,Spain - La Liga,Barcelona,Celta Vigo,,Lionel Andrés Messi Cuccittini,91.8,48.6,110.5,45.1,Luis Alberto Suárez Díaz,Goal,0.256660,True,False,-18.7,True,False,False
1279522,266254,Spain - La Liga,Barcelona,Celta Vigo,,Luis Alberto Suárez Díaz,85.3,20.7,104.7,38.1,Ivan Rakitić,Goal,0.318462,True,False,-19.4,True,False,False
1279565,266254,Spain - La Liga,Barcelona,Celta Vigo,,Luis Alberto Suárez Díaz,75.0,65.0,100.5,27.4,Neymar da Silva Santos Junior,Goal,0.184366,True,True,-25.5,True,False,False
1282905,266557,Spain - La Liga,Barcelona,Valencia,David Fernández Borbalan,Jordi Alba Ramos,110.6,22.8,109.3,37.1,Lionel Andrés Messi Cuccittini,Goal,0.101879,True,False,1.3,True,False,False


In [20]:
eventsDF["test"] = 1

In [6]:

# Connect to a DuckDB database (or create one if it doesn't exist)
# Use ':memory:' for an in-memory database or specify a filename for a persistent database
con = duckdb.connect("example.db")



In [22]:
# Write the Pandas DataFrame to the DuckDB database
# This creates a table named 'my_table'
con.execute("CREATE TABLE events AS SELECT * FROM eventsDF")




<duckdb.duckdb.DuckDBPyConnection at 0x3fef62670>

In [None]:
# You can also use the `from_df` function for direct insertion:
# con.from_df(df, "my_table")

# Verify the data was written to the database
result = con.execute("SELECT * FROM events").fetchall()
print(result)

# Close the connection

In [3]:
data = con.execute("SELECT * FROM events").df()
data.tail(5)

Unnamed: 0,player,possession,match_id,bad_behaviour_card,ball_receipt_outcome,ball_recovery_recovery_failure,block_deflection,carry_end_location,clearance_aerial_won,clearance_body_part,...,goalkeeper_shot_saved_off_target,shot_saved_off_target,shot_redirect,goalkeeper_lost_out,goalkeeper_lost_in_play,goalkeeper_shot_saved_to_post,shot_saved_to_post,shot_follows_dribble,goalkeeper_success_out,half_end_early_video_end
1283213,Jordi Alba Ramos,147,266557,,,,,,,,...,,,,,,,,,,
1283214,Rodrigo Moreno Machado,155,266557,,,,,,,,...,,,,,,,,,,
1283215,Sergio Busquets i Burgos,158,266557,,,,,,,,...,,,,,,,,,,
1283216,João Pedro Cavaco Cancelo,169,266557,,,,,,,,...,,,,,,,,,,
1283217,Neymar da Silva Santos Junior,177,266557,,,,,,,,...,,,,,,,,,,


In [4]:
data.shape

(1283218, 164)

In [21]:
con.execute("DROP TABLE events")


<duckdb.duckdb.DuckDBPyConnection at 0x3fef62670>

In [5]:
# Close the connection
con.close()

