# Playground for Exploring the Data

### Import packages

In [1]:
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from collections import OrderedDict
from datetime import datetime

import pandas as pd
import numpy as np
import random
import json
import glob
import os

from utils import load_processed_frames
from settings import *

### Global variables

In [None]:
n_matches = 10

## Load files

In [2]:
# Load every frames_df to a list
frames_dfs = load_processed_frames(n_matches=n_matches)

### Start Playing Around

### Store as xlsx

In [None]:
# Store frames_df as xslx
frames_df_head = frames_df.head(19979)

# Specify the file path for the Excel file
excel_file_path = f"{DATA_LOCAL_FOLDER}/Brommapojkarna_vs_Sirius.xlsx"

# Write the DataFrame to an Excel file
frames_df_head.to_excel(excel_file_path, index=False)

print(f"DataFrame saved to {excel_file_path}")

### Extract all unique player names

In [3]:
# Initialize a set to store unique player names along with their teams
player_names = set()

# Iterate through each game DataFrame
for frames_df in frames_dfs:
    # Extract unique player names and their teams
    players = frames_df[['player', 'team_name']].drop_duplicates()
    
    # Update the set of unique player names
    player_names.update(zip(players['player'], players['team_name']))

# Convert to a DataFrame
players_df = pd.DataFrame(list(player_names), columns=['Player', 'Team'])

# Sort values
players_df = players_df.sort_values(by=['Player', 'Team'], ascending=[True, True])

# Store as xlsx
players_df.to_excel(f"{DATA_LOCAL_FOLDER}/data/players/Players_2023.xlsx", index=False)

players_df

Unnamed: 0,Player,Team
114,Abdelkarim Mammar Chaouche,Degerfors IF
176,Abdelrahman Boudah Saidi,Hammarby
411,Abdelrahman Saidi,Hammarby
96,Abdihakin Ali,AIK
145,Abdussalam Magashy,AIK
...,...,...
407,Yassine El Ouatki,Varbergs BoIS FC
390,Zachary Elbouzedi,AIK
243,Zeidane Inoussa,IF Brommapojkarna
165,ball,ball


## Only used buildup

In [4]:
# Read the file as a DataFrame
build_up_events_df = pd.read_csv(f"{DATA_LOCAL_FOLDER}/data/buildup_events_2023.csv")
build_up_df = pd.read_csv(f"{DATA_LOCAL_FOLDER}/data/buildup_synced_2023.csv")

  build_up_df = pd.read_csv(f"{DATA_LOCAL_FOLDER}/data/buildup_synced_2023.csv")


In [5]:
# build_up_events_df.iloc[0:20]
# for column in build_up_events_df.columns:
#     print(column)
build_up_ev_ef = build_up_events_df.copy()

build_up_ev_ef = build_up_ev_ef[build_up_ev_ef['first_event']]
build_up_ev_ef['possession_duration']  = (np.floor(build_up_ev_ef['possession_duration'])).astype(int)
build_up_ev_ef['match_time_event_start'] = build_up_ev_ef['match_time']
build_up_ev_ef['match_time_event_end'] = build_up_ev_ef['match_time'] + build_up_ev_ef['possession_duration']
build_up_ev_ef[['match_id', 'minute', 'second', 'match_time_event_start', 'match_time_event_end','possession_duration']]

Unnamed: 0,match_id,minute,second,match_time_event_start,match_time_event_end,possession_duration
0,5420892,23,56,1436,1443,7
4,5420892,44,11,2651,2742,91
36,5420892,49,39,2979,3002,23
47,5420892,73,47,4427,4437,10
55,5420892,79,1,4741,4779,38
...,...,...,...,...,...,...
44399,5420663,80,59,4859,4867,8
44500,5420663,17,3,1023,1039,16
44507,5420663,22,10,1330,1343,13
44549,5420663,74,28,4468,4502,34


In [None]:
# Group by 'match_id' and sum 'possession_duration' for each group
match_possession_duration = build_up_ev_ef.groupby('match_id')['possession_duration'].sum().reset_index()

# Calculate the average possession duration
average_possession_duration = match_possession_duration['possession_duration'].mean()

# Display the average possession duration
print("Average Possession Duration:", average_possession_duration)

In [7]:
for column in build_up_events_df.columns:
    print(column)

Unnamed: 0
index
id
player_id
team_id
minute
second
period
start_x
start_y
end_x
end_y
cutback
switch
successful
video_start
possession_team_id
possession_id
time_diff
pass_accurate
shot_xg
shot_xgot
pass_length
possession_duration
first_event
event_index
possession_events
type
shot_goal
shot_body_part
shot_on_target
shot_on_post
shot_off_target
shot_blocked
pass_high
pass_low
pass_blocked
pass_recipient_id
possession_xg
possession_goal
player_position
xA
xT_start
xT_end
take_on
stopped_progress
recovered_possession
retain_possession
kept_possession
progressed_with_ball
aerial_won
match_state
team_win_state
possession_state
xg_after_5s
shot_id
pass
back_pass
short_or_medium_pass
forward_pass
loss
pass_to_final_third
progressive_pass
recovery
smart_pass
offside
free_kick
lateral_pass
acceleration
progressive_run
carry
cross
pass_to_penalty_area
interception
game_interruption
ball_out
corner
head_pass
shot
head_shot
opportunity
shot_after_corner
touch_in_box
goal_kick
long_pass
under_pre