# JM0250 Data Visualization 
### Academic year 2022-2023

## FIFA World Cup 2022 Data Exploration
Data sources:

- FIFA World Cup 2022 Player Data (https://www.kaggle.com/datasets/swaptr/fifa-world-cup-2022-player-data)
- FIFA World Cup 2022 Match Data (https://www.kaggle.com/datasets/swaptr/fifa-world-cup-2022-match-data)
- FIFA World Cup 2022 Team Data (https://www.kaggle.com/datasets/swaptr/fifa-world-cup-2022-statistics)
- FIFA World Cup 2022 Twitter Dataset (https://www.kaggle.com/datasets/kumari2000/fifa-world-cup-twitter-dataset-2022)
- FIFA World Cup 2022 Prediction (https://www.kaggle.com/datasets/shilongzhuang/soccer-world-cup-challenge)
- FIFA World Cup 2022 Player Images (https://www.kaggle.com/datasets/soumendraprasad/fifa-2022-all-players-image-dataset)
- FIFA World Cup Historic (https://www.kaggle.com/datasets/piterfm/fifa-football-world-cup)
- FIFA World Cup Penalty Shootouts (https://www.kaggle.com/datasets/pablollanderos33/world-cup-penalty-shootouts, https://www.kaggle.com/datasets/jandimovski/world-cup-penalty-shootouts-2022)

Data dictionaries and additional info can be found in the respective data folders.

In [1]:
# Import libraries
import pandas as pd
import plotly.express as px
import matplotlib.pyplot as plt
import numpy as np
from PIL import Image
import os
from functools import reduce


# Do not truncate tables
pd.set_option('display.max_columns', None)

In [2]:
# Load the data

# Match data
df_match_data = pd.read_csv('../Data/FIFA World Cup 2022 Match Data/data.csv', delimiter=',')

# Player data
df_player_defense       = pd.read_csv('../Data/FIFA World Cup 2022 Player Data/player_defense.csv', delimiter=',')
df_player_gca           = pd.read_csv('../Data/FIFA World Cup 2022 Player Data/player_gca.csv', delimiter=',')
df_player_keepers       = pd.read_csv('../Data/FIFA World Cup 2022 Player Data/player_keepers.csv', delimiter=',')
df_player_keepersadv    = pd.read_csv('../Data/FIFA World Cup 2022 Player Data/player_keepersadv.csv', delimiter=',')
df_player_misc          = pd.read_csv('../Data/FIFA World Cup 2022 Player Data/player_misc.csv', delimiter=',')
df_player_passing       = pd.read_csv('../Data/FIFA World Cup 2022 Player Data/player_passing.csv', delimiter=',')
df_player_passing_types = pd.read_csv('../Data/FIFA World Cup 2022 Player Data/player_passing_types.csv', delimiter=',')
df_player_playingtime   = pd.read_csv('../Data/FIFA World Cup 2022 Player Data/player_playingtime.csv', delimiter=',')
df_player_possession    = pd.read_csv('../Data/FIFA World Cup 2022 Player Data/player_possession.csv', delimiter=',')
df_player_shooting      = pd.read_csv('../Data/FIFA World Cup 2022 Player Data/player_shooting.csv', delimiter=',')
df_player_stats         = pd.read_csv('../Data/FIFA World Cup 2022 Player Data/player_stats.csv', delimiter=',')

# Team data
df_team_data        = pd.read_csv('../Data/FIFA World Cup 2022 Team Data/team_data.csv', delimiter=',')
df_team_group_stats = pd.read_csv('../Data/FIFA World Cup 2022 Team Data/group_stats.csv', delimiter=',')

# Historic data
df_historic_fifa_ranking      = pd.read_csv('../Data/FIFA World Cup Historic/fifa_ranking_2022-10-06.csv', delimiter=',')
df_historic_matches_1930_2022 = pd.read_csv('../Data/FIFA World Cup Historic/matches_1930_2022.csv', delimiter=',')
df_historic_world_cup         = pd.read_csv('../Data/FIFA World Cup Historic/world_cup.csv', delimiter=',')

# Penalty shootouts
df_penalty_shootouts = pd.read_csv('../Data/FIFA World Cup Penalty Shootouts/WorldCupShootouts.csv', delimiter=',')

# Twitter data
df_tweets_01 = pd.read_csv('../Data/FIFA World Cup 2022 Twitter Dataset/tweets1.csv', delimiter=';')
df_tweets_02 = pd.read_csv('../Data/FIFA World Cup 2022 Twitter Dataset/tweets2.csv', delimiter=';')
df_tweets = pd.concat([df_tweets_01, df_tweets_02])

# Prediction data
df_prediction_groups  = pd.read_csv('../Data/FIFA World Cup 2022 Prediction/2022_world_cup_groups.csv', delimiter=',')
df_prediction_matches = pd.read_csv('../Data/FIFA World Cup 2022 Prediction/2022_world_cup_matches.csv', delimiter=',')
df_prediction_international_matches = pd.read_csv('../Data/FIFA World Cup 2022 Prediction/international_matches.csv', delimiter=',')
df_prediction_world_cup_matches = pd.read_csv('../Data/FIFA World Cup 2022 Prediction/world_cup_matches.csv', delimiter=',')
df_prediction_world_cups = pd.read_csv('../Data/FIFA World Cup 2022 Prediction/world_cups.csv', delimiter=',')

# Player images
def list_full_paths(directory):
    return [os.path.join(directory, file) for file in os.listdir(directory)]

def img_reshape(img):
    img = Image.open(img).convert('RGB')
    img = img.resize((300,300))
    img = np.asarray(img)
    return img

def showImages(group, land, player):
    images  = list_full_paths('../Data/FIFA World Cup 2022 Player Images/Images/Images/Group ' + group + '/' + land + ' Players/Images_' + player)
    img_arr = []
    
    for image in images:
        img_arr.append(img_reshape(image))
        
    rows = 5
    cols = 5
    img_count = 0
    fig, axes = plt.subplots(nrows=rows, ncols=cols, figsize=((5,5)))
                             
    for i in range(rows):
        for j in range(cols):
            if img_count < len(img_arr):
                axes[i,j].imshow(img_arr[img_count])
                axes[i,j].axis('off')
                img_count+=1
                
    plt.subplots_adjust(wspace=0, hspace=0)
    

In [7]:
dataframes = [df_player_misc, df_player_passing, df_player_passing_types, df_player_playingtime, df_player_possession, df_player_shooting, df_player_stats]

In [36]:
df_player_stats_combined = pd.merge(df_player_gca, df_player_defense, on=['player'])

In [6]:
def merge_dataframes(dfs):
    df_result = pd.merge(df_player_defense, df_player_gca, on=['player'])
    for df in dfs:
        print(list(df.columns))
        df_result = pd.merge(df_result, df, on=['player'])
        df_result.columns = df_result.columns.str.removesuffix("_x")
        # df_result = df_result.rename(columns={'position_x' : 'position', 'age_x' : 'age', 'birth_year_x' : 'birth_year', 'team_x' : 'team', 'minutes_90s_x' : 'minutes_90s'})
    return df_result

In [8]:
df_player_stats_combined = merge_dataframes(dataframes)

['player', 'position', 'team', 'age', 'birth_year', 'minutes_90s', 'cards_yellow', 'cards_red', 'cards_yellow_red', 'fouls', 'fouled', 'offsides', 'crosses', 'interceptions', 'tackles_won', 'pens_won', 'pens_conceded', 'own_goals', 'ball_recoveries', 'aerials_won', 'aerials_lost', 'aerials_won_pct']
['player', 'position', 'team', 'age', 'birth_year', 'minutes_90s', 'passes_completed', 'passes', 'passes_pct', 'passes_total_distance', 'passes_progressive_distance', 'passes_completed_short', 'passes_short', 'passes_pct_short', 'passes_completed_medium', 'passes_medium', 'passes_pct_medium', 'passes_completed_long', 'passes_long', 'passes_pct_long', 'assists', 'xg_assist', 'pass_xa', 'xg_assist_net', 'assisted_shots', 'passes_into_final_third', 'passes_into_penalty_area', 'crosses_into_penalty_area', 'progressive_passes']
['player', 'position', 'team', 'age', 'birth_year', 'minutes_90s', 'passes', 'passes_live', 'passes_dead', 'passes_free_kicks', 'through_balls', 'passes_switches', 'cross

In [9]:
df_player_stats_combined.head()

Unnamed: 0,player,position,team,age,birth_year,minutes_90s,tackles,tackles_won,tackles_def_3rd,tackles_mid_3rd,tackles_att_3rd,dribble_tackles,dribbles_vs,dribble_tackles_pct,dribbled_past,blocks,blocked_shots,blocked_passes,interceptions,tackles_interceptions,clearances,errors,position_y,team_y,age_y,birth_year_y,minutes_90s_y,sca,sca_per90,sca_passes_live,sca_passes_dead,sca_dribbles,sca_shots,sca_fouled,sca_defense,gca,gca_per90,gca_passes_live,gca_passes_dead,gca_dribbles,gca_shots,gca_fouled,gca_defense,position.1,team.1,age.1,birth_year.1,minutes_90s.1,cards_yellow,cards_red,cards_yellow_red,fouls,fouled,offsides,crosses,interceptions_y,tackles_won_y,pens_won,pens_conceded,own_goals,ball_recoveries,aerials_won,aerials_lost,aerials_won_pct,position_y.1,team_y.1,age_y.1,birth_year_y.1,minutes_90s_y.1,passes_completed,passes,passes_pct,passes_total_distance,passes_progressive_distance,passes_completed_short,passes_short,passes_pct_short,passes_completed_medium,passes_medium,passes_pct_medium,passes_completed_long,passes_long,passes_pct_long,assists,xg_assist,pass_xa,xg_assist_net,assisted_shots,passes_into_final_third,passes_into_penalty_area,crosses_into_penalty_area,progressive_passes,position_y.2,team_y.2,age_y.2,birth_year_y.2,minutes_90s_y.2,passes_y,passes_live,passes_dead,passes_free_kicks,through_balls,passes_switches,crosses_y,throw_ins,corner_kicks,corner_kicks_in,corner_kicks_out,corner_kicks_straight,passes_completed_y,passes_offsides,passes_blocked,position_y.3,team_y.3,age_y.3,birth_year_y.3,games,minutes,minutes_per_game,minutes_pct,minutes_90s_y.3,games_starts,minutes_per_start,games_complete,games_subs,minutes_per_sub,unused_subs,points_per_game,on_goals_for,on_goals_against,plus_minus,plus_minus_per90,plus_minus_wowy,on_xg_for,on_xg_against,xg_plus_minus,xg_plus_minus_per90,xg_plus_minus_wowy,position_y.4,team_y.4,age_y.4,birth_year_y.4,minutes_90s_y.4,touches,touches_def_pen_area,touches_def_3rd,touches_mid_3rd,touches_att_3rd,touches_att_pen_area,touches_live_ball,dribbles_completed,dribbles,dribbles_completed_pct,miscontrols,dispossessed,passes_received,progressive_passes_received,position_y.5,team_y.5,age_y.5,birth_year_y.5,minutes_90s_y.5,goals,shots,shots_on_target,shots_on_target_pct,shots_per90,shots_on_target_per90,goals_per_shot,goals_per_shot_on_target,average_shot_distance,shots_free_kicks,pens_made,pens_att,xg,npxg,npxg_per_shot,xg_net,npxg_net,position_y.6,team_y.6,age_y.6,club,birth_year_y.6,games_y,games_starts_y,minutes_y,minutes_90s_y.6,goals_y,assists_y,goals_pens,pens_made_y,pens_att_y,cards_yellow_y,cards_red_y,goals_per90,assists_per90,goals_assists_per90,goals_pens_per90,goals_assists_pens_per90,xg_y,npxg_y,xg_assist_y,npxg_xg_assist,xg_per90,xg_assist_per90,xg_xg_assist_per90,npxg_per90,npxg_xg_assist_per90
0,Aaron Mooy,MF,Australia,32-094,1990,4.0,9.0,6,4.0,4.0,1.0,6.0,8.0,75.0,2.0,6.0,4.0,2.0,3,12.0,9.0,0.0,MF,Australia,32-094,1990,4.0,5.0,1.25,2.0,2.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,MF,Australia,32-094,1990,4.0,1,0,0,4,1,0,10,3,6,0.0,0.0,0,35.0,2.0,3.0,40.0,MF,Australia,32-094,1990,4.0,170.0,217.0,78.3,2819.0,951.0,78.0,90.0,86.7,59.0,73.0,80.8,18.0,31.0,58.1,0,0.1,0.1,-0.1,1.0,22.0,1.0,0.0,14.0,MF,Australia,32-094,1990,4.0,217.0,206.0,11.0,8.0,0.0,4.0,10,0.0,3.0,0.0,3.0,0.0,170.0,0.0,4.0,MF,Australia,32-094,1990,4,360.0,90.0,100.0,4.0,4,90.0,4,0,,0,1.5,4.0,6.0,-2.0,-0.5,,2.3,7.2,-4.9,-1.23,,MF,Australia,32-094,1990,4.0,255.0,14.0,62.0,168.0,26.0,0.0,255.0,2.0,3.0,66.7,5.0,4.0,152.0,1.0,MF,Australia,32-094,1990,4.0,0,1,0,0.0,0.25,0.0,0.0,,29.1,0.0,0,0,0.0,0.0,0.05,0.0,0.0,MF,Australia,32-094,Celtic,1990,4,4,360,4.0,0,0,0,0,0,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.1,0.01,0.02,0.03,0.01,0.03
1,Aaron Ramsey,MF,Wales,31-357,1990,3.0,2.0,0,0.0,2.0,0.0,0.0,2.0,0.0,2.0,4.0,1.0,3.0,0,2.0,2.0,1.0,MF,Wales,31-357,1990,3.0,3.0,1.02,2.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,MF,Wales,31-357,1990,3.0,1,0,0,3,3,0,5,0,0,0.0,0.0,0,19.0,0.0,2.0,0.0,MF,Wales,31-357,1990,3.0,88.0,112.0,78.6,1796.0,339.0,37.0,41.0,90.2,27.0,35.0,77.1,18.0,23.0,78.3,0,0.0,0.1,0.0,1.0,7.0,1.0,0.0,5.0,MF,Wales,31-357,1990,3.0,112.0,101.0,10.0,3.0,0.0,3.0,5,0.0,1.0,0.0,1.0,0.0,88.0,1.0,3.0,MF,Wales,31-357,1990,3,266.0,89.0,98.5,3.0,3,89.0,2,0,,0,0.33,1.0,4.0,-3.0,-1.02,43.98,2.6,4.1,-1.5,-0.49,9.06,MF,Wales,31-357,1990,3.0,147.0,6.0,32.0,73.0,42.0,5.0,147.0,2.0,8.0,25.0,9.0,4.0,98.0,7.0,MF,Wales,31-357,1990,3.0,0,1,0,0.0,0.34,0.0,0.0,,18.0,0.0,0,0,0.0,0.0,0.04,0.0,0.0,MF,Wales,31-357,Nice,1990,3,3,266,3.0,0,0,0,0,0,1,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.01,0.01,0.02,0.01,0.02
2,Abdelhamid Sabiri,MF,Morocco,26-020,1996,2.0,3.0,1,1.0,2.0,0.0,2.0,6.0,33.3,4.0,3.0,1.0,2.0,5,8.0,3.0,0.0,MF,Morocco,26-020,1996,2.0,4.0,2.4,2.0,1.0,0.0,1.0,0.0,0.0,1.0,0.6,0.0,1.0,0.0,0.0,0.0,0.0,MF,Morocco,26-020,1996,2.0,1,0,0,2,3,0,1,5,1,0.0,0.0,0,7.0,2.0,2.0,50.0,MF,Morocco,26-020,1996,2.0,45.0,58.0,77.6,690.0,122.0,30.0,33.0,90.9,7.0,13.0,53.8,7.0,10.0,70.0,1,0.9,0.1,0.1,3.0,3.0,0.0,0.0,0.0,MF,Morocco,26-020,1996,2.0,58.0,55.0,3.0,2.0,0.0,2.0,1,0.0,1.0,0.0,0.0,0.0,45.0,0.0,1.0,MF,Morocco,26-020,1996,5,181.0,36.0,27.4,2.0,2,55.0,0,3,24.0,2,1.6,5.0,3.0,2.0,0.99,1.18,3.1,1.6,1.5,0.9,1.3,MF,Morocco,26-020,1996,2.0,86.0,5.0,23.0,50.0,13.0,1.0,86.0,0.0,3.0,0.0,0.0,3.0,54.0,0.0,MF,Morocco,26-020,1996,2.0,0,3,0,0.0,1.49,0.0,0.0,,34.2,1.0,0,0,0.1,0.1,0.04,-0.1,-0.1,MF,Morocco,26-020,Sampdoria,1996,5,2,181,2.0,0,1,0,0,0,1,0,0.0,0.5,0.5,0.0,0.5,0.1,0.1,0.9,1.0,0.08,0.53,0.6,0.08,0.6
3,Abdelkarim Hassan,DF,Qatar,29-112,1993,3.0,7.0,3,5.0,2.0,0.0,3.0,4.0,75.0,1.0,3.0,1.0,2.0,1,8.0,5.0,0.0,DF,Qatar,29-112,1993,3.0,4.0,1.33,4.0,0.0,0.0,0.0,0.0,0.0,1.0,0.33,1.0,0.0,0.0,0.0,0.0,0.0,DF,Qatar,29-112,1993,3.0,0,0,0,1,4,1,6,1,3,0.0,0.0,0,11.0,3.0,1.0,75.0,DF,Qatar,29-112,1993,3.0,122.0,161.0,75.8,2462.0,813.0,57.0,63.0,90.5,44.0,51.0,86.3,20.0,37.0,54.1,0,0.0,0.1,0.0,1.0,13.0,1.0,0.0,8.0,DF,Qatar,29-112,1993,3.0,161.0,148.0,13.0,11.0,0.0,10.0,6,2.0,0.0,0.0,0.0,0.0,122.0,0.0,8.0,DF,Qatar,29-112,1993,3,270.0,90.0,100.0,3.0,3,90.0,3,0,,0,0.0,1.0,7.0,-6.0,-2.0,,1.4,3.5,-2.1,-0.71,,DF,Qatar,29-112,1993,3.0,193.0,15.0,83.0,95.0,17.0,2.0,193.0,1.0,5.0,20.0,2.0,0.0,138.0,1.0,DF,Qatar,29-112,1993,3.0,0,6,0,0.0,2.0,0.0,0.0,,23.6,0.0,0,0,0.3,0.3,0.05,-0.3,-0.3,DF,Qatar,29-112,Al Sadd SC,1993,3,3,270,3.0,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.3,0.3,0.0,0.3,0.1,0.01,0.11,0.1,0.11
4,Abderrazak Hamdallah,FW,Morocco,32-001,1990,0.8,0.0,0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0,0.0,2.0,0.0,FW,Morocco,32-001,1990,0.8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,FW,Morocco,32-001,1990,0.8,0,0,0,3,2,0,0,0,0,0.0,0.0,0,4.0,1.0,7.0,12.5,FW,Morocco,32-001,1990,0.8,8.0,15.0,53.3,64.0,11.0,7.0,9.0,77.8,0.0,4.0,0.0,0.0,0.0,,0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,FW,Morocco,32-001,1990,0.8,15.0,14.0,1.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,8.0,0.0,1.0,FW,Morocco,32-001,1990,4,68.0,17.0,10.3,0.8,0,,0,4,17.0,3,1.75,2.0,1.0,1.0,1.32,1.32,1.9,1.0,0.9,1.16,1.42,FW,Morocco,32-001,1990,0.8,28.0,2.0,2.0,15.0,12.0,5.0,28.0,2.0,3.0,66.7,4.0,3.0,18.0,3.0,FW,Morocco,32-001,1990,0.8,0,2,1,50.0,2.65,1.32,0.0,0.0,7.8,0.0,0,0,0.4,0.4,0.2,-0.4,-0.4,FW,Morocco,32-001,Al-Ittihad,1990,4,0,68,0.8,0,0,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0,0.4,0.4,0.0,0.4,0.52,0.0,0.52,0.52,0.52


In [94]:
df_player_stats_combined.loc[df_player_stats_combined['player']=="Harry Kane"]

Unnamed: 0,player,position,team,age,birth_year,minutes_90s,tackles,tackles_won,tackles_def_3rd,tackles_mid_3rd,tackles_att_3rd,dribble_tackles,dribbles_vs,dribble_tackles_pct,dribbled_past,blocks,blocked_shots,blocked_passes,interceptions,tackles_interceptions,clearances,errors,position_y,team_y,age_y,birth_year_y,minutes_90s_y,sca,sca_per90,sca_passes_live,sca_passes_dead,sca_dribbles,sca_shots,sca_fouled,sca_defense,gca,gca_per90,gca_passes_live,gca_passes_dead,gca_dribbles,gca_shots,gca_fouled,gca_defense,position.1,team.1,age.1,birth_year.1,minutes_90s.1,cards_yellow,cards_red,cards_yellow_red,fouls,fouled,offsides,crosses,interceptions_y,tackles_won_y,pens_won,pens_conceded,own_goals,ball_recoveries,aerials_won,aerials_lost,aerials_won_pct,position_y.1,team_y.1,age_y.1,birth_year_y.1,minutes_90s_y.1,passes_completed,passes,passes_pct,passes_total_distance,passes_progressive_distance,passes_completed_short,passes_short,passes_pct_short,passes_completed_medium,passes_medium,passes_pct_medium,passes_completed_long,passes_long,passes_pct_long,assists,xg_assist,pass_xa,xg_assist_net,assisted_shots,passes_into_final_third,passes_into_penalty_area,crosses_into_penalty_area,progressive_passes,position_y.2,team_y.2,age_y.2,birth_year_y.2,minutes_90s_y.2,passes_y,passes_live,passes_dead,passes_free_kicks,through_balls,passes_switches,crosses_y,throw_ins,corner_kicks,corner_kicks_in,corner_kicks_out,corner_kicks_straight,passes_completed_y,passes_offsides,passes_blocked,position_y.3,team_y.3,age_y.3,birth_year_y.3,games,minutes,minutes_per_game,minutes_pct,minutes_90s_y.3,games_starts,minutes_per_start,games_complete,games_subs,minutes_per_sub,unused_subs,points_per_game,on_goals_for,on_goals_against,plus_minus,plus_minus_per90,plus_minus_wowy,on_xg_for,on_xg_against,xg_plus_minus,xg_plus_minus_per90,xg_plus_minus_wowy,position_y.4,team_y.4,age_y.4,birth_year_y.4,minutes_90s_y.4,touches,touches_def_pen_area,touches_def_3rd,touches_mid_3rd,touches_att_3rd,touches_att_pen_area,touches_live_ball,dribbles_completed,dribbles,dribbles_completed_pct,miscontrols,dispossessed,passes_received,progressive_passes_received,position_y.5,team_y.5,age_y.5,birth_year_y.5,minutes_90s_y.5,goals,shots,shots_on_target,shots_on_target_pct,shots_per90,shots_on_target_per90,goals_per_shot,goals_per_shot_on_target,average_shot_distance,shots_free_kicks,pens_made,pens_att,xg,npxg,npxg_per_shot,xg_net,npxg_net,position_y.6,team_y.6,age_y.6,club,birth_year_y.6,games_y,games_starts_y,minutes_y,minutes_90s_y.6,goals_y,assists_y,goals_pens,pens_made_y,pens_att_y,cards_yellow_y,cards_red_y,goals_per90,assists_per90,goals_assists_per90,goals_pens_per90,goals_assists_pens_per90,xg_y,npxg_y,xg_assist_y,npxg_xg_assist,xg_per90,xg_assist_per90,xg_xg_assist_per90,npxg_per90,npxg_xg_assist_per90
240,Harry Kane,FW,England,29-143,1993,4.5,2.0,1,1.0,1.0,0.0,1.0,1.0,100.0,0.0,2.0,1.0,1.0,0,2.0,5.0,0.0,FW,England,29-143,1993,4.5,10.0,2.24,5.0,0.0,1.0,3.0,1.0,0.0,3.0,0.67,3.0,0.0,0.0,0.0,0.0,0.0,FW,England,29-143,1993,4.5,0,0,0,1,9,0,6,0,1,0.0,0.0,0,16.0,9.0,13.0,40.9,FW,England,29-143,1993,4.5,62.0,98.0,63.3,1027.0,390.0,33.0,43.0,76.7,24.0,36.0,66.7,5.0,7.0,71.4,3,1.5,0.7,1.5,5.0,12.0,5.0,1.0,16.0,FW,England,29-143,1993,4.5,98.0,94.0,3.0,0.0,2.0,1.0,6,1.0,0.0,0.0,0.0,0.0,62.0,1.0,5.0,FW,England,29-143,1993,5,402.0,80.0,89.3,4.5,5,80.0,3,0,,0,2.0,11.0,3.0,8.0,1.79,-0.08,6.8,2.8,3.9,0.88,-0.4,FW,England,29-143,1993,4.5,138.0,7.0,8.0,61.0,71.0,13.0,136.0,4.0,8.0,50.0,10.0,5.0,110.0,22.0,FW,England,29-143,1993,4.5,2,10,5,50.0,2.24,1.12,0.1,0.2,20.4,0.0,1,2,2.7,1.1,0.11,-0.7,-0.1,FW,England,29-143,Tottenham,1993,5,5,402,4.5,2,3,1,1,2,0,0,0.45,0.67,1.12,0.22,0.9,2.7,1.1,1.5,2.6,0.6,0.34,0.94,0.25,0.59


In [18]:
df_player_stats_combined = df_player_stats_combined.loc[:,~df_player_stats_combined.columns.str.contains('^_y', case=False)] 
df_player_stats_combined['age'] = df_player_stats_combined['age'].apply(lambda x: int(x.split('-')[0].strip()))
df_player_stats_combined = df_player_stats_combined.loc[:,~df_player_stats_combined.columns.duplicated()].copy()
df_player_stats_combined.to_csv('../Data/FIFA World Cup 2022 Player Data/stats_combined.csv', index=False)

In [108]:
df_player_stats_combined.loc[df_player_stats_combined['age'] > 32]

Unnamed: 0,player,position,team,age,birth_year,minutes_90s,tackles,tackles_won,tackles_def_3rd,tackles_mid_3rd,tackles_att_3rd,dribble_tackles,dribbles_vs,dribble_tackles_pct,dribbled_past,blocks,blocked_shots,blocked_passes,interceptions,tackles_interceptions,clearances,errors,position_y,team_y,age_y,birth_year_y,minutes_90s_y,sca,sca_per90,sca_passes_live,sca_passes_dead,sca_dribbles,sca_shots,sca_fouled,sca_defense,gca,gca_per90,gca_passes_live,gca_passes_dead,gca_dribbles,gca_shots,gca_fouled,gca_defense,cards_yellow,cards_red,cards_yellow_red,fouls,fouled,offsides,crosses,interceptions_y,tackles_won_y,pens_won,pens_conceded,own_goals,ball_recoveries,aerials_won,aerials_lost,aerials_won_pct,passes_completed,passes,passes_pct,passes_total_distance,passes_progressive_distance,passes_completed_short,passes_short,passes_pct_short,passes_completed_medium,passes_medium,passes_pct_medium,passes_completed_long,passes_long,passes_pct_long,assists,xg_assist,pass_xa,xg_assist_net,assisted_shots,passes_into_final_third,passes_into_penalty_area,crosses_into_penalty_area,progressive_passes,passes_y,passes_live,passes_dead,passes_free_kicks,through_balls,passes_switches,crosses_y,throw_ins,corner_kicks,corner_kicks_in,corner_kicks_out,corner_kicks_straight,passes_completed_y,passes_offsides,passes_blocked,games,minutes,minutes_per_game,minutes_pct,games_starts,minutes_per_start,games_complete,games_subs,minutes_per_sub,unused_subs,points_per_game,on_goals_for,on_goals_against,plus_minus,plus_minus_per90,plus_minus_wowy,on_xg_for,on_xg_against,xg_plus_minus,xg_plus_minus_per90,xg_plus_minus_wowy,touches,touches_def_pen_area,touches_def_3rd,touches_mid_3rd,touches_att_3rd,touches_att_pen_area,touches_live_ball,dribbles_completed,dribbles,dribbles_completed_pct,miscontrols,dispossessed,passes_received,progressive_passes_received,goals,shots,shots_on_target,shots_on_target_pct,shots_per90,shots_on_target_per90,goals_per_shot,goals_per_shot_on_target,average_shot_distance,shots_free_kicks,pens_made,pens_att,xg,npxg,npxg_per_shot,xg_net,npxg_net,club,games_y,games_starts_y,minutes_y,goals_y,assists_y,goals_pens,pens_made_y,pens_att_y,cards_yellow_y,cards_red_y,goals_per90,assists_per90,goals_assists_per90,goals_pens_per90,goals_assists_pens_per90,xg_y,npxg_y,xg_assist_y,npxg_xg_assist,xg_per90,xg_assist_per90,xg_xg_assist_per90,npxg_per90,npxg_xg_assist_per90
34,Ali Al-Bulaihi,DF,Saudi Arabia,33,1989,2.4,4.0,2,2.0,2.0,0.0,2.0,4.0,50.0,2.0,1.0,1.0,0.0,3,7.0,1.0,0.0,DF,Saudi Arabia,33-027,1989,2.4,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,1,1,0,0,3,2,0.0,0.0,0,5.0,3.0,3.0,50.0,78.0,97.0,80.4,1353.0,379.0,31.0,36.0,86.1,39.0,45.0,86.7,6.0,13.0,46.2,0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,5.0,97.0,87.0,10.0,4.0,0.0,1.0,0,6.0,0.0,0.0,0.0,0.0,78.0,0.0,0.0,3,216.0,72.0,80.0,3,72.0,2,0,,0,1.00,2.0,3.0,-1.0,-0.42,1.25,2.0,4.5,-2.6,-1.06,-0.27,110.0,7.0,54.0,55.0,1.0,0.0,110.0,0.0,0.0,,2.0,0.0,65.0,0.0,0,0,0,,0.00,0.00,,,,0.0,0,0,0.0,0.0,,0.0,0.0,Al-Hilal,3,3,216,0,0,0,0,0,1,0,0.00,0.00,0.00,0.00,0.00,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.00
55,André Ayew,MF,Ghana,33,1989,2.2,4.0,1,0.0,4.0,0.0,1.0,2.0,50.0,1.0,1.0,0.0,1.0,0,4.0,3.0,0.0,MF,Ghana,33-001,1989,2.2,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,1,0,0,2,6,1,0,0,1,0.0,0.0,0,6.0,3.0,13.0,18.8,30.0,37.0,81.1,411.0,47.0,18.0,22.0,81.8,9.0,11.0,81.8,1.0,1.0,100.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,37.0,30.0,7.0,1.0,0.0,0.0,0,1.0,0.0,0.0,0.0,0.0,30.0,0.0,0.0,3,198.0,66.0,73.3,3,66.0,0,0,,0,1.00,4.0,5.0,-1.0,-0.45,0.80,2.9,4.2,-1.3,-0.58,-0.37,61.0,3.0,12.0,31.0,19.0,4.0,60.0,0.0,2.0,0.0,5.0,4.0,35.0,5.0,1,1,1,100.0,0.45,0.45,1.00,1.00,17.3,0.0,0,1,1.1,0.4,0.36,-0.1,0.6,Al Sadd SC,3,3,198,1,0,1,0,1,1,0,0.45,0.00,0.45,0.45,0.45,1.1,0.4,0.0,0.4,0.52,0.00,0.52,0.16,0.16
58,Andrés Guardado,MF,Mexico,36,1986,0.5,4.0,2,0.0,4.0,0.0,2.0,3.0,66.7,1.0,0.0,0.0,0.0,0,4.0,1.0,0.0,MF,Mexico,36-081,1986,0.5,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,1,0,0,0,0,2,0.0,0.0,0,5.0,0.0,0.0,,11.0,16.0,68.8,190.0,15.0,5.0,5.0,100.0,5.0,7.0,71.4,1.0,3.0,33.3,0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,0.0,16.0,14.0,2.0,1.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,11.0,0.0,1.0,1,41.0,41.0,15.2,1,41.0,0,0,,2,0.00,0.0,0.0,0.0,0.00,0.39,0.2,0.1,0.1,0.23,-0.08,22.0,0.0,6.0,16.0,1.0,0.0,22.0,0.0,0.0,,0.0,1.0,10.0,0.0,0,0,0,,0.00,0.00,,,,0.0,0,0,0.0,0.0,,0.0,0.0,Betis,1,1,41,0,0,0,0,0,0,0,0.00,0.00,0.00,0.00,0.00,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.00
72,Artur Jędrzejczyk,DF,Poland,35,1987,0.2,0.0,0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0,0.0,1.0,0.0,DF,Poland,35-044,1987,0.2,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0.0,0.0,0.0,,3.0,4.0,75.0,88.0,44.0,1.0,1.0,100.0,1.0,2.0,50.0,1.0,1.0,100.0,0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,4.0,3.0,1.0,0.0,0.0,0.0,0,1.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,1,19.0,19.0,5.3,0,,0,1,19.0,3,0.00,0.0,0.0,0.0,0.00,0.53,0.0,0.7,-0.7,-3.31,-2.77,5.0,0.0,2.0,3.0,0.0,0.0,5.0,0.0,0.0,,0.0,0.0,3.0,0.0,0,0,0,,0.00,0.00,,,,0.0,0,0,0.0,0.0,,0.0,0.0,Legia Warsaw,1,0,19,0,0,0,0,0,0,0,0.00,0.00,0.00,0.00,0.00,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.00
75,Atiba Hutchinson,MF,Canada,39,1983,1.8,4.0,1,4.0,0.0,0.0,2.0,2.0,100.0,0.0,3.0,0.0,3.0,1,5.0,3.0,0.0,MF,Canada,39-313,1983,1.8,2.0,1.13,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,2,2,0,0,1,1,0.0,0.0,0,10.0,5.0,0.0,100.0,100.0,112.0,89.3,1399.0,519.0,57.0,63.0,90.5,34.0,35.0,97.1,3.0,4.0,75.0,0,0.0,0.0,0.0,0.0,12.0,0.0,0.0,13.0,112.0,108.0,4.0,4.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,3.0,3,159.0,53.0,58.9,2,64.0,0,1,31.0,0,0.00,1.0,4.0,-3.0,-1.70,-0.08,3.2,2.1,1.1,0.60,1.21,131.0,5.0,38.0,72.0,21.0,2.0,131.0,1.0,1.0,100.0,1.0,2.0,90.0,0.0,0,2,0,0.0,1.13,0.00,0.00,,16.8,0.0,0,0,0.1,0.1,0.06,-0.1,-0.1,Beşiktaş,3,2,159,0,0,0,0,0,0,0,0.00,0.00,0.00,0.00,0.00,0.1,0.1,0.0,0.1,0.07,0.00,0.07,0.07,0.07
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
650,Wéverton,GK,Brazil,35,1987,0.1,0.0,0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,GK,Brazil,35-005,1987,0.1,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0.0,0.0,0.0,,3.0,5.0,60.0,145.0,126.0,0.0,0.0,,0.0,0.0,,3.0,5.0,60.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,2.0,3.0,1.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,1,11.0,11.0,2.3,0,,0,1,11.0,4,3.00,0.0,0.0,0.0,0.00,-0.96,0.3,0.1,0.2,1.71,-0.16,5.0,5.0,5.0,0.0,0.0,0.0,5.0,0.0,0.0,,0.0,0.0,2.0,0.0,0,0,0,,0.00,0.00,,,,0.0,0,0,0.0,0.0,,0.0,0.0,Palmeiras,1,0,11,0,0,0,0,0,0,0,0.00,0.00,0.00,0.00,0.00,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.00
655,Yann Sommer,GK,Switzerland,34,1988,3.0,0.0,0,0.0,0.0,0.0,0.0,0.0,,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,GK,Switzerland,34-001,1988,3.0,1.0,0.33,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,5.0,0.0,1.0,0.0,108.0,118.0,91.5,2284.0,1327.0,27.0,27.0,100.0,64.0,65.0,98.5,16.0,25.0,64.0,0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,118.0,97.0,21.0,2.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,108.0,0.0,0.0,3,270.0,90.0,75.0,3,90.0,3,0,,0,1.00,2.0,7.0,-5.0,-1.67,-2.67,3.0,4.0,-1.1,-0.35,-1.73,126.0,99.0,126.0,0.0,0.0,0.0,126.0,0.0,0.0,,0.0,0.0,76.0,0.0,0,0,0,,0.00,0.00,,,,0.0,0,0,0.0,0.0,,0.0,0.0,M'Gladbach,3,3,270,0,0,0,0,0,0,0,0.00,0.00,0.00,0.00,0.00,0.0,0.0,0.0,0.0,0.00,0.00,0.00,0.00,0.00
670,Yuto Nagatomo,MF,Japan,36,1986,2.3,4.0,2,1.0,2.0,1.0,1.0,1.0,100.0,0.0,2.0,0.0,2.0,0,4.0,3.0,0.0,MF,Japan,36-097,1986,2.3,2.0,0.86,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.00,0.0,0.0,0.0,0.0,0.0,0.0,0,0,0,6,0,0,6,0,2,0.0,0.0,0,11.0,1.0,2.0,33.3,78.0,109.0,71.6,1171.0,383.0,51.0,57.0,89.5,22.0,31.0,71.0,3.0,14.0,21.4,0,0.1,0.1,-0.1,1.0,7.0,2.0,2.0,2.0,109.0,88.0,17.0,3.0,0.0,0.0,6,14.0,0.0,0.0,0.0,0.0,78.0,4.0,1.0,4,209.0,52.0,53.6,4,52.0,0,0,,0,1.75,1.0,3.0,-2.0,-0.86,-2.35,1.3,3.4,-2.1,-0.89,-1.59,123.0,6.0,27.0,59.0,37.0,0.0,123.0,3.0,4.0,75.0,1.0,1.0,67.0,2.0,0,0,0,,0.00,0.00,,,,0.0,0,0,0.0,0.0,,0.0,0.0,FC Tokyo,4,4,209,0,0,0,0,0,0,0,0.00,0.00,0.00,0.00,0.00,0.0,0.0,0.1,0.1,0.00,0.05,0.05,0.00,0.05
675,Ángel Di María,MF,Argentina,34,1988,3.2,3.0,1,2.0,0.0,1.0,1.0,3.0,33.3,2.0,3.0,0.0,3.0,1,4.0,0.0,0.0,MF,Argentina,34-307,1988,3.2,21.0,6.77,16.0,3.0,1.0,0.0,1.0,0.0,4.0,1.29,2.0,0.0,1.0,0.0,1.0,0.0,0,0,0,0,7,5,27,1,1,1.0,0.0,0,7.0,0.0,1.0,0.0,130.0,168.0,77.4,2080.0,480.0,73.0,77.0,94.8,43.0,53.0,81.1,10.0,23.0,43.5,1,0.6,0.7,0.4,10.0,3.0,11.0,3.0,17.0,168.0,151.0,17.0,4.0,0.0,2.0,27,3.0,10.0,4.0,2.0,0.0,130.0,0.0,7.0,5,288.0,58.0,41.7,4,70.0,1,1,9.0,2,1.60,5.0,2.0,3.0,0.94,0.04,6.8,0.8,6.0,1.92,0.91,201.0,1.0,13.0,59.0,132.0,17.0,201.0,13.0,25.0,52.0,10.0,6.0,163.0,24.0,1,6,3,50.0,1.87,0.94,0.17,0.33,20.7,0.0,0,0,0.8,0.8,0.14,0.2,0.2,Juventus,5,4,288,1,1,1,0,0,0,0,0.31,0.31,0.62,0.31,0.62,0.8,0.8,0.6,1.4,0.27,0.20,0.46,0.27,0.46


In [6]:
df_keeper_stats_combined = pd.merge(df_player_keepers, df_player_keepersadv, on=['player'])
df_keeper_stats_combined.columns = df_keeper_stats_combined.columns.str.removesuffix("_x")
df_keeper_stats_combined.columns = df_keeper_stats_combined.columns.str.removesuffix("_y")
df_keeper_stats_combined = df_keeper_stats_combined.loc[:,~df_keeper_stats_combined.columns.duplicated()].copy()
df_keeper_stats_combined['age'] = df_keeper_stats_combined['age'].apply(lambda x: int(x.split('-')[0].strip()))
df_keeper_stats_combined.head()

Unnamed: 0,player,position,team,age,club,birth_year,gk_games,gk_games_starts,gk_minutes,minutes_90s,gk_goals_against,gk_goals_against_per90,gk_shots_on_target_against,gk_saves,gk_save_pct,gk_wins,gk_ties,gk_losses,gk_clean_sheets,gk_clean_sheets_pct,gk_pens_att,gk_pens_allowed,gk_pens_saved,gk_pens_missed,gk_pens_save_pct,gk_free_kick_goals_against,gk_corner_kick_goals_against,gk_own_goals_against,gk_psxg,gk_psnpxg_per_shot_on_target_against,gk_psxg_net,gk_psxg_net_per90,gk_passes_completed_launched,gk_passes_launched,gk_passes_pct_launched,gk_passes,gk_passes_throws,gk_pct_passes_launched,gk_passes_length_avg,gk_goal_kicks,gk_pct_goal_kicks_launched,gk_goal_kick_length_avg,gk_crosses,gk_crosses_stopped,gk_crosses_stopped_pct,gk_def_actions_outside_pen_area,gk_def_actions_outside_pen_area_per90,gk_avg_distance_def_actions
0,Aimen Dahmen,GK,Tunisia,25,CS Sfaxien,1997,3,3,270,3.0,1,0.33,10,9,90.0,1,1,1,2,66.7,0,0,0,0,,0,0,0,2.4,0.24,1.4,0.46,15,41,36.6,59,7,49.2,39.9,18,66.7,51.1,52,0,0.0,0,0.0,12.0
1,Alireza Beiranvand,GK,IR Iran,30,Persepolis,1992,2,2,109,1.2,1,0.83,5,4,80.0,0,0,1,0,0.0,0,0,0,0,,0,0,0,1.8,0.35,0.8,0.64,5,16,31.3,22,3,40.9,33.5,9,77.8,53.1,19,0,0.0,0,0.0,4.7
2,Alisson,GK,Brazil,30,Liverpool,1992,4,4,379,4.2,2,0.47,7,5,71.4,2,1,0,2,50.0,0,0,0,0,,0,0,0,1.9,0.28,-0.1,-0.02,5,14,35.7,74,12,13.5,23.4,19,21.1,25.6,44,0,0.0,7,1.81,22.0
3,Andries Noppert,GK,Netherlands,28,Heerenveen,1994,5,5,480,5.3,4,0.75,22,18,86.4,3,2,0,2,40.0,1,1,0,0,0.0,0,0,0,6.5,0.25,2.5,0.5,22,55,40.0,152,34,25.7,29.2,40,40.0,35.7,63,6,9.5,5,1.0,13.3
4,André Onana,GK,Cameroon,26,Inter,1996,1,1,80,0.9,1,1.13,3,2,66.7,0,0,1,0,0.0,0,0,0,0,,0,0,0,0.9,0.31,-0.1,-0.06,6,20,30.0,50,6,36.0,31.0,9,22.2,28.6,17,0,0.0,0,0.0,13.0


In [7]:
df_keeper_stats_combined.to_csv('../Data/FIFA World Cup 2022 Player Data/stats_combined_keepers.csv', index=False)

In [19]:
df_keeper_stats_combined.head()

Unnamed: 0,player,position,team,age,club,birth_year,gk_games,gk_games_starts,gk_minutes,minutes_90s,gk_goals_against,gk_goals_against_per90,gk_shots_on_target_against,gk_saves,gk_save_pct,gk_wins,gk_ties,gk_losses,gk_clean_sheets,gk_clean_sheets_pct,gk_pens_att,gk_pens_allowed,gk_pens_saved,gk_pens_missed,gk_pens_save_pct,position_y,team_y,age_y,birth_year_y,minutes_90s_y,gk_goals_against_y,gk_pens_allowed_y,gk_free_kick_goals_against,gk_corner_kick_goals_against,gk_own_goals_against,gk_psxg,gk_psnpxg_per_shot_on_target_against,gk_psxg_net,gk_psxg_net_per90,gk_passes_completed_launched,gk_passes_launched,gk_passes_pct_launched,gk_passes,gk_passes_throws,gk_pct_passes_launched,gk_passes_length_avg,gk_goal_kicks,gk_pct_goal_kicks_launched,gk_goal_kick_length_avg,gk_crosses,gk_crosses_stopped,gk_crosses_stopped_pct,gk_def_actions_outside_pen_area,gk_def_actions_outside_pen_area_per90,gk_avg_distance_def_actions
0,Aimen Dahmen,GK,Tunisia,25,CS Sfaxien,1997,3,3,270,3.0,1,0.33,10,9,90.0,1,1,1,2,66.7,0,0,0,0,,GK,Tunisia,25-324,1997,3.0,1,0,0,0,0,2.4,0.24,1.4,0.46,15,41,36.6,59,7,49.2,39.9,18,66.7,51.1,52,0,0.0,0,0.0,12.0
1,Alireza Beiranvand,GK,IR Iran,30,Persepolis,1992,2,2,109,1.2,1,0.83,5,4,80.0,0,0,1,0,0.0,0,0,0,0,,GK,IR Iran,30-088,1992,1.2,1,0,0,0,0,1.8,0.35,0.8,0.64,5,16,31.3,22,3,40.9,33.5,9,77.8,53.1,19,0,0.0,0,0.0,4.7
2,Alisson,GK,Brazil,30,Liverpool,1992,4,4,379,4.2,2,0.47,7,5,71.4,2,1,0,2,50.0,0,0,0,0,,GK,Brazil,30-077,1992,4.2,2,0,0,0,0,1.9,0.28,-0.1,-0.02,5,14,35.7,74,12,13.5,23.4,19,21.1,25.6,44,0,0.0,7,1.81,22.0
3,Andries Noppert,GK,Netherlands,28,Heerenveen,1994,5,5,480,5.3,4,0.75,22,18,86.4,3,2,0,2,40.0,1,1,0,0,0.0,GK,Netherlands,28-255,1994,5.3,4,1,0,0,0,6.5,0.25,2.5,0.5,22,55,40.0,152,34,25.7,29.2,40,40.0,35.7,63,6,9.5,5,1.0,13.3
4,André Onana,GK,Cameroon,26,Inter,1996,1,1,80,0.9,1,1.13,3,2,66.7,0,0,1,0,0.0,0,0,0,0,,GK,Cameroon,26-260,1996,0.9,1,0,0,0,0,0.9,0.31,-0.1,-0.06,6,20,30.0,50,6,36.0,31.0,9,22.2,28.6,17,0,0.0,0,0.0,13.0


In [4]:
## Explore Match data
df_match_data.head(5)

Unnamed: 0,match,dayofweek,match_time,home_team,away_team,home_xg,away_xg,score,attendance,venue,referee,home_formation,away_formation,home_captain,away_captain,home_manager,away_manager,home_possession,away_possession,home_completed_passes,home_attempted_pases,away_completed_passes,away_attempted_pases,home_sot,away_sot,home_total_shots,away_total_shots,home_saves,away_saves,home_fouls,away_fouls,home_corners,away_corners,home_crosses,away_crosses,home_touches,away_touches,home_tackles,away_tackles,home_interceptions,away_interceptions,home_aerials_won,away_aerials_won,home_clearances,away_clearances,home_offsides,away_offsides,home_gks,away_gks,home_throw_ins,away_throw_ins,home_long_balls,away_long_balls
0,1,Sun,2022-11-20 19:00:00,Qatar,Ecuador,0.3,1.2,0–2,67372,Al Bayt Stadium,Daniele Orsato,5-3-2,4-4-2,Hassan Al-Haydos,Enner Valencia,Félix Sánchez,Gustavo Alfaro,47,53,377,469,430,522,0,2,5,5,1,0,15,15,1,3,8,14,549,601,10,14,2,12,14,13,18,7,3,4,4,7,20,17,51,70
1,2,Mon,2022-11-21 16:00:00,England,IR Iran,2.1,1.4,6–2,45334,Khalifa International Stadium,Raphael Claus,4-2-3-1,5-4-1,Harry Kane,Ehsan Hajsafi,Gareth Southgate,Carlos Queiroz,77,23,740,838,162,248,7,2,13,7,1,1,9,14,8,0,21,8,922,341,13,12,7,16,10,10,4,23,2,2,5,5,18,17,87,50
2,3,Mon,2022-11-21 19:00:00,Senegal,Netherlands,0.9,0.7,0–2,41721,Al Thumama Stadium,Wilton Sampaio,4-2-3-1,3-4-1-2,Kalidou Koulibaly,Virgil van Dijk,Aliou Cissé,Louis van Gaal,46,54,333,427,387,494,4,3,15,10,1,4,13,13,6,7,19,25,531,573,14,10,10,6,22,20,32,16,2,1,6,10,17,28,64,66
3,4,Mon,2022-11-21 22:00:00,United States,Wales,0.8,1.5,1–1,43418,Ahmed bin Ali Stadium,Abdulrahman Ibrahim Al Jassim,4-3-3,3-5-2,Tyler Adams,Gareth Bale,Gregg Berhalter,Rob Page,58,42,524,624,335,446,1,2,6,6,2,0,15,10,5,3,30,14,724,568,14,18,6,8,14,12,18,31,1,1,5,10,22,25,56,90
4,5,Tue,2022-11-22 13:00:00,Argentina,Saudi Arabia,2.2,0.1,1–2,88012,Lusail Iconic Stadium,Slavko Vinčič,4-4-2,4-1-4-1,Lionel Messi,Salman Al-Faraj,Lionel Scaloni,Hervé Renard,69,31,546,659,195,294,5,2,14,3,0,5,7,21,9,2,23,10,753,411,17,18,9,14,16,7,11,26,10,1,3,9,24,13,55,59


In [5]:
df_match_data.describe()

Unnamed: 0,match,home_xg,away_xg,attendance,home_possession,away_possession,home_completed_passes,home_attempted_pases,away_completed_passes,away_attempted_pases,home_sot,away_sot,home_total_shots,away_total_shots,home_saves,away_saves,home_fouls,away_fouls,home_corners,away_corners,home_crosses,away_crosses,home_touches,away_touches,home_tackles,away_tackles,home_interceptions,away_interceptions,home_aerials_won,away_aerials_won,home_clearances,away_clearances,home_offsides,away_offsides,home_gks,away_gks,home_throw_ins,away_throw_ins,home_long_balls,away_long_balls
count,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0,64.0
mean,32.5,1.379687,1.270312,53191.4375,50.890625,49.109375,444.84375,545.765625,427.515625,529.265625,4.0625,3.4375,11.125,11.265625,2.46875,2.640625,12.625,12.34375,4.484375,4.4375,16.640625,17.375,651.109375,633.0625,16.484375,15.84375,8.421875,8.296875,13.9375,13.09375,19.3125,18.203125,1.984375,1.984375,7.625,7.5,20.921875,19.765625,65.90625,65.125
std,18.618987,0.889655,0.953636,17108.666048,12.74427,12.74427,156.147084,156.710403,165.301071,169.349572,2.455153,2.617857,4.952152,5.853346,2.189214,1.938476,5.244801,3.788951,2.777416,2.782456,8.241142,6.936217,151.298099,165.782117,4.649602,5.909956,3.517777,3.27414,5.258025,5.019703,9.098884,9.123804,1.74112,1.713552,3.359422,2.678545,6.347865,5.479376,14.937668,14.217695
min,1.0,0.1,0.0,39089.0,18.0,19.0,165.0,245.0,162.0,248.0,0.0,0.0,2.0,0.0,0.0,0.0,3.0,5.0,0.0,0.0,2.0,4.0,356.0,341.0,8.0,5.0,2.0,3.0,6.0,5.0,4.0,3.0,0.0,0.0,0.0,2.0,9.0,8.0,41.0,39.0
25%,16.75,0.7,0.6,42424.0,41.75,40.75,332.75,424.5,322.5,417.5,2.75,2.0,8.0,7.0,1.0,1.0,9.0,10.0,2.0,2.0,11.0,13.0,526.75,530.0,13.0,12.0,7.0,5.0,10.0,10.0,12.75,12.0,1.0,1.0,5.75,5.0,16.0,15.0,55.0,55.75
50%,32.5,1.25,1.1,44093.0,53.0,47.0,446.5,552.5,396.5,494.0,4.0,3.0,10.0,10.5,2.0,2.0,13.0,12.0,4.5,4.0,16.5,16.0,652.5,604.0,16.0,14.5,8.0,8.0,13.5,12.0,19.0,17.0,2.0,2.0,7.0,8.0,21.0,20.0,64.0,64.0
75%,48.25,1.825,1.7,66851.5,59.25,58.25,532.25,630.75,509.5,612.5,6.0,5.0,14.0,13.0,3.25,4.0,15.0,14.25,6.0,6.0,22.0,21.25,735.75,702.75,20.25,19.0,10.0,11.0,18.0,15.0,24.0,22.25,3.0,3.0,9.0,10.0,24.0,23.25,74.75,75.0
max,64.0,4.0,5.7,88966.0,81.0,82.0,1009.0,1093.0,995.0,1106.0,10.0,11.0,25.0,32.0,10.0,8.0,30.0,24.0,12.0,14.0,46.0,35.0,1179.0,1190.0,28.0,37.0,19.0,16.0,26.0,33.0,45.0,50.0,10.0,7.0,18.0,13.0,40.0,33.0,99.0,106.0


In [6]:
# Remove penalty data from score
df_match_data['score'] = df_match_data['score'].str.replace(r"\(.\)","")
           
# Split match score               
df_match_data[['score_home', 'score_away']] = df_match_data.score.str.split("–", expand=True,)
# df_match_data['score_home'] = df_match_data['score_home'].str.replace(" ","").astype('int')
# df_match_data['score_away'] = df_match_data['score_away'].str.replace(" ","").astype('int')

fig = px.scatter(df_match_data, x="home_possession", y="score_home",
                 color="venue", size="attendance",
                 width=1000, height=800)
fig.show()

In [111]:
showImages('A', 'Senegal', 'Édouard Mendy')

FileNotFoundError: [WinError 3] The system cannot find the path specified: '../Data/FIFA World Cup 2022 Player Images/Images/Images/Group A/Senegal Players/Images_Édouard Mendy'

In [26]:
df_team_data

Unnamed: 0,team,players_used,avg_age,possession,games,games_starts,minutes,minutes_90s,goals,assists,goals_pens,pens_made,pens_att,cards_yellow,cards_red,goals_per90,assists_per90,goals_assists_per90,goals_pens_per90,goals_assists_pens_per90,xg,npxg,xg_assist,npxg_xg_assist,xg_per90,xg_assist_per90,xg_xg_assist_per90,npxg_per90,npxg_xg_assist_per90,gk_games,gk_games_starts,gk_minutes,gk_goals_against,gk_goals_against_per90,gk_shots_on_target_against,gk_saves,gk_save_pct,gk_wins,gk_ties,gk_losses,gk_clean_sheets,gk_clean_sheets_pct,gk_pens_att,gk_pens_allowed,gk_pens_saved,gk_pens_missed,gk_pens_save_pct,gk_free_kick_goals_against,gk_corner_kick_goals_against,gk_own_goals_against,gk_psxg,gk_psnpxg_per_shot_on_target_against,gk_psxg_net,gk_psxg_net_per90,gk_passes_completed_launched,gk_passes_launched,gk_passes_pct_launched,gk_passes,gk_passes_throws,gk_pct_passes_launched,gk_passes_length_avg,gk_goal_kicks,gk_pct_goal_kicks_launched,gk_goal_kick_length_avg,gk_crosses,gk_crosses_stopped,gk_crosses_stopped_pct,gk_def_actions_outside_pen_area,gk_def_actions_outside_pen_area_per90,gk_avg_distance_def_actions,shots,shots_on_target,shots_on_target_pct,shots_per90,shots_on_target_per90,goals_per_shot,goals_per_shot_on_target,average_shot_distance,shots_free_kicks,npxg_per_shot,xg_net,npxg_net,passes_completed,passes,passes_pct,passes_total_distance,passes_progressive_distance,passes_completed_short,passes_short,passes_pct_short,passes_completed_medium,passes_medium,passes_pct_medium,passes_completed_long,passes_long,passes_pct_long,pass_xa,xg_assist_net,assisted_shots,passes_into_final_third,passes_into_penalty_area,crosses_into_penalty_area,progressive_passes,passes_live,passes_dead,passes_free_kicks,through_balls,passes_switches,crosses,throw_ins,corner_kicks,corner_kicks_in,corner_kicks_out,corner_kicks_straight,passes_offsides,passes_blocked,sca,sca_per90,sca_passes_live,sca_passes_dead,sca_dribbles,sca_shots,sca_fouled,sca_defense,gca,gca_per90,gca_passes_live,gca_passes_dead,gca_dribbles,gca_shots,gca_fouled,gca_defense,tackles,tackles_won,tackles_def_3rd,tackles_mid_3rd,tackles_att_3rd,dribble_tackles,dribbles_vs,dribble_tackles_pct,dribbled_past,blocks,blocked_shots,blocked_passes,interceptions,tackles_interceptions,clearances,errors,touches,touches_def_pen_area,touches_def_3rd,touches_mid_3rd,touches_att_3rd,touches_att_pen_area,touches_live_ball,dribbles_completed,dribbles,dribbles_completed_pct,miscontrols,dispossessed,passes_received,progressive_passes_received,minutes_per_game,minutes_pct,minutes_per_start,games_complete,games_subs,minutes_per_sub,unused_subs,points_per_game,on_goals_for,on_goals_against,plus_minus,plus_minus_per90,on_xg_for,on_xg_against,xg_plus_minus,xg_plus_minus_per90,cards_yellow_red,fouls,fouled,offsides,pens_won,pens_conceded,own_goals,ball_recoveries,aerials_won,aerials_lost,aerials_won_pct
0,Argentina,24,28.4,57.4,7,77,690,7.7,15,8,11,4,5,17,0,1.96,1.04,3.0,1.43,2.48,15.2,11.4,7.8,19.3,1.98,1.02,3.0,1.49,2.51,7,7,690,8,1.04,13,6,53.8,4,2,1,3,42.9,2,2,0,0,0.0,0,0,1,5.4,0.26,-1.6,-0.21,15,78,19.2,147,34,38.8,34.4,39,53.8,46.0,87,12,13.8,4,0.52,12.1,95,41,43.2,12.39,5.35,0.12,0.27,18.3,3,0.12,-0.2,-0.4,3911,4625,84.6,60635,18633,2131,2336,91.2,1453,1636,88.8,233,431,54.1,7.1,0.2,76,242,64,7,217,4237,365,119,10,21,94,151,39,11,17,0,23,62,179,23.35,134,13,6,9,10,7,25,3.26,16,1,3,1,4,0,123,69,70,41,12,59,100,59.0,41,84,14,70,52,175,124,0,5388,352,1424,2716,1293,157,5383,45,112,40.2,99,81,3868,210,99,100,87,41,36,24,66,2.0,15,8,7,0.91,15.2,4.6,10.6,1.38,0,100,115,23,5,2,1,357,83,90,48.0
1,Australia,20,28.7,37.8,4,44,360,4.0,3,3,3,0,0,7,0,0.75,0.75,1.5,0.75,1.5,2.3,2.3,1.9,4.2,0.58,0.48,1.06,0.58,1.06,4,4,360,6,1.5,18,12,66.7,2,0,2,2,50.0,0,0,0,0,-1.0,0,1,0,5.5,0.31,-0.5,-0.13,38,85,44.7,127,13,50.4,38.5,37,56.8,46.2,63,3,4.8,6,1.5,15.5,26,8,30.8,6.5,2.0,0.12,0.38,18.5,0,0.09,0.7,0.7,1254,1696,73.9,22489,8922,546,643,84.9,499,612,81.5,145,306,47.4,1.1,1.1,17,77,15,2,68,1493,202,48,1,13,54,98,8,3,5,0,1,41,42,10.5,29,6,3,1,3,0,6,1.5,5,0,1,0,0,0,58,30,34,21,3,34,58,58.6,24,64,20,44,40,98,104,3,2155,306,882,910,378,46,2155,13,46,28.3,61,36,1236,65,90,100,82,25,19,19,40,1.5,4,6,-2,-0.5,2.3,7.2,-4.9,-1.23,0,52,34,1,0,0,0,200,72,72,50.0
2,Belgium,20,30.6,57.0,3,33,270,3.0,1,1,1,0,0,5,0,0.33,0.33,0.67,0.33,0.67,4.7,4.7,3.8,8.5,1.57,1.27,2.85,1.57,2.85,3,3,270,2,0.67,11,8,81.8,1,1,1,2,66.7,1,0,1,0,100.0,0,0,0,4.1,0.28,2.1,0.69,10,23,43.5,87,15,17.2,26.7,36,22.2,33.5,36,1,2.8,2,0.67,14.0,35,9,25.7,11.67,3.0,0.03,0.11,15.6,1,0.14,-3.7,-3.7,1598,1885,84.8,30387,9967,575,637,90.3,866,955,90.7,148,237,62.4,2.9,-2.8,29,102,18,2,99,1733,149,42,10,9,53,49,17,5,10,0,3,34,65,21.67,46,8,4,4,2,1,2,0.67,2,0,0,0,0,0,48,27,29,17,2,26,41,63.4,15,33,8,25,17,65,59,1,2172,251,828,995,369,52,2172,19,45,42.2,47,24,1579,95,90,100,79,19,14,25,29,1.33,1,2,-1,-0.33,4.7,4.6,0.2,0.05,0,30,35,3,0,1,0,132,33,28,54.1
3,Brazil,26,28.5,56.2,5,55,480,5.3,8,6,7,1,1,6,0,1.5,1.13,2.62,1.31,2.44,12.0,11.2,8.2,19.4,2.24,1.54,3.79,2.09,3.64,5,5,480,3,0.56,10,7,70.0,2,1,1,2,40.0,0,0,0,0,-1.0,0,0,0,3.1,0.31,0.1,0.02,10,22,45.5,102,22,14.7,24.7,27,25.9,29.6,52,0,0.0,9,1.69,20.3,95,40,42.1,17.81,7.5,0.07,0.18,17.5,7,0.12,-4.0,-4.2,2750,3203,85.9,45228,14850,1305,1430,91.3,1190,1336,89.1,180,297,60.6,7.9,-2.2,70,209,55,11,190,2942,253,78,7,15,106,102,37,21,5,0,8,40,170,31.87,127,9,7,15,9,3,13,2.44,9,0,1,2,1,0,88,51,38,29,21,33,56,58.9,23,60,10,50,40,128,63,1,3772,254,938,1839,1020,162,3771,32,110,29.1,88,58,2718,186,96,100,85,30,25,24,40,2.0,8,3,5,0.94,12.0,2.0,9.9,1.86,0,63,74,8,1,0,0,271,43,56,43.4
4,Cameroon,22,28.0,41.7,3,33,270,3.0,4,4,4,0,0,8,1,1.33,1.33,2.67,1.33,2.67,3.4,3.4,2.0,5.4,1.14,0.66,1.8,1.14,1.8,3,3,260,4,1.38,15,11,73.3,1,1,1,1,33.3,0,0,0,0,-1.0,0,0,0,4.9,0.33,0.9,0.31,29,67,43.3,108,17,42.6,34.9,34,61.8,46.8,57,3,5.3,0,0.0,8.9,28,16,57.1,9.33,5.33,0.14,0.25,16.4,1,0.13,0.6,0.6,960,1252,76.7,17831,7268,455,516,88.2,367,444,82.7,121,234,51.7,2.2,2.0,21,90,11,3,62,1105,145,45,4,10,53,47,12,5,5,0,2,24,50,16.67,32,5,7,4,2,0,7,2.33,5,0,1,1,0,0,42,20,27,13,2,12,27,44.4,15,22,6,16,40,82,71,1,1571,226,604,666,324,50,1571,24,55,43.6,39,24,943,58,90,100,83,20,12,19,28,1.33,4,4,0,0.0,3.4,5.8,-2.3,-0.78,1,32,38,2,0,0,0,142,42,36,53.8
5,Canada,19,28.2,52.0,3,33,270,3.0,1,1,1,0,1,8,0,0.33,0.33,0.67,0.33,0.67,4.2,3.6,2.8,6.4,1.41,0.95,2.35,1.18,2.13,3,3,270,7,2.33,15,8,53.3,0,0,3,0,0.0,0,0,0,0,-1.0,0,0,0,5.4,0.37,-1.6,-0.52,11,29,37.9,82,22,31.7,30.1,15,20.0,28.3,29,0,0.0,2,0.67,15.1,34,4,11.8,11.33,1.33,0.03,0.25,17.0,1,0.11,-3.2,-2.6,1307,1586,82.4,22064,7915,635,704,90.2,519,578,89.8,117,210,55.7,2.6,-1.8,27,103,24,12,84,1424,154,41,1,11,54,76,12,5,5,0,8,31,61,20.33,47,4,3,5,2,0,2,0.67,2,0,0,0,0,0,41,23,21,13,7,20,40,50.0,20,26,5,21,18,59,41,2,1889,155,517,946,441,56,1889,31,82,37.8,52,30,1289,83,90,100,78,18,15,26,30,0.0,2,7,-5,-1.67,4.2,3.9,0.3,0.11,0,33,37,8,0,0,0,145,30,21,58.8
6,Costa Rica,22,30.6,31.3,3,33,270,3.0,3,1,3,0,0,6,0,1.0,0.33,1.33,1.0,1.33,1.4,1.4,0.6,2.0,0.48,0.2,0.68,0.48,0.68,3,3,270,11,3.67,22,11,54.5,1,0,2,1,33.3,1,1,0,0,0.0,0,0,0,9.6,0.39,-1.4,-0.46,20,58,34.5,68,14,50.0,41.7,33,72.7,52.0,51,2,3.9,2,0.67,9.9,12,7,58.3,4.0,2.33,0.25,0.43,17.0,0,0.13,1.6,1.6,823,1101,74.8,15308,6006,362,425,85.2,352,411,85.6,92,199,46.2,0.4,0.4,7,35,7,2,38,953,135,42,1,15,21,43,1,1,0,0,13,22,20,6.67,13,2,0,3,2,0,3,1.0,1,0,0,2,0,0,67,35,43,20,4,33,65,50.8,32,42,14,28,31,98,83,1,1436,248,688,594,170,18,1436,17,37,45.9,43,31,812,36,90,100,82,19,14,19,30,1.0,3,11,-8,-2.67,1.4,10.1,-8.7,-2.9,0,24,37,13,0,1,0,142,38,30,55.9
7,Croatia,21,29.2,54.3,7,77,690,7.7,8,8,8,0,0,8,0,1.04,1.04,2.09,1.04,2.09,7.0,7.0,5.6,12.6,0.91,0.74,1.65,0.91,1.65,7,7,690,7,0.91,31,24,80.6,2,4,1,2,28.6,1,1,0,0,0.0,0,1,0,10.5,0.31,3.5,0.46,31,63,49.2,178,46,24.2,26.7,50,40.0,34.9,99,8,8.1,3,0.39,10.8,79,26,32.9,10.3,3.39,0.1,0.31,18.5,1,0.09,1.0,1.0,3766,4523,83.3,62507,20561,1911,2082,91.8,1413,1642,86.1,325,526,61.8,6.0,2.4,61,246,59,22,236,4153,359,102,12,36,153,161,30,12,5,0,11,85,132,17.22,104,9,6,7,4,2,14,1.83,13,1,0,0,0,0,132,83,59,56,17,56,101,55.4,45,77,28,49,53,185,143,0,5288,505,1540,2699,1114,152,5288,40,99,40.4,106,70,3724,231,99,100,89,46,32,23,70,1.43,8,7,1,0.13,7.0,11.0,-4.0,-0.53,0,90,90,11,0,1,0,409,87,87,50.0
8,Denmark,20,27.5,60.0,3,33,270,3.0,1,1,1,0,0,5,0,0.33,0.33,0.67,0.33,0.67,2.7,2.7,1.8,4.5,0.9,0.59,1.49,0.9,1.49,3,3,270,3,1.0,11,8,72.7,0,1,2,1,33.3,0,0,0,0,-1.0,0,0,0,3.7,0.34,0.7,0.24,9,24,37.5,71,12,26.8,30.2,16,31.3,35.8,38,0,0.0,5,1.67,19.2,34,10,29.4,11.33,3.33,0.03,0.1,17.1,0,0.09,-1.7,-1.7,1598,1944,82.2,28745,9562,711,803,88.5,670,753,89.0,176,292,60.3,1.8,-0.8,27,154,25,6,133,1796,143,27,0,29,67,73,21,3,9,8,5,27,57,19.0,42,7,1,7,0,0,1,0.33,0,0,0,1,0,0,42,27,18,20,4,18,39,46.2,21,43,16,27,23,65,49,0,2255,164,584,1162,525,68,2255,10,47,21.3,50,25,1570,128,90,100,80,20,13,27,30,0.33,1,3,-2,-0.67,2.7,3.9,-1.2,-0.39,0,28,23,5,0,0,0,171,51,48,51.5
9,Ecuador,18,25.8,53.3,3,33,270,3.0,4,2,3,1,1,3,0,1.33,0.67,2.0,1.0,1.67,3.7,2.9,1.8,4.7,1.24,0.6,1.84,0.98,1.58,3,3,270,3,1.0,4,1,50.0,1,1,1,1,33.3,1,1,0,0,0.0,0,0,0,2.1,0.27,-0.9,-0.31,11,40,27.5,53,6,47.2,38.3,21,71.4,52.0,26,1,3.8,2,0.67,13.0,29,10,34.5,9.67,3.33,0.1,0.3,18.2,1,0.11,0.3,0.1,1153,1473,78.3,19926,6755,548,615,89.1,468,544,86.0,112,235,47.7,1.6,0.2,20,60,17,10,52,1312,153,46,4,3,57,69,11,7,4,0,8,23,47,15.67,31,4,1,6,4,1,6,2.0,3,0,0,2,1,0,46,30,21,23,2,12,36,33.3,24,33,4,29,28,74,42,1,1761,119,578,879,316,49,1760,13,43,30.2,65,25,1145,51,90,100,85,22,11,16,33,1.33,4,3,1,0.33,3.7,2.6,1.2,0.39,0,50,37,8,1,1,0,135,50,49,50.5


In [31]:
def read_data_position(df, column, row):
    cols = df.columns.values.tolist()
    return df.loc[row, cols[column]]

In [33]:
data = read_data_position(df_team_data, 0, 1)
data

'Australia'

In [45]:
def value_count_attribute(df, attribute):
    print(df[attribute].value_counts())
    print(f"distribution of {attribute} is: {df[attribute].max()} - {df[attribute].min()}")

In [46]:
value_count_attribute(df_team_data, 'players_used')

players_used
20    9
21    8
24    4
22    4
19    2
18    2
26    1
25    1
23    1
Name: count, dtype: int64
distribution of players_used is: 26 - 18


In [24]:
df_team_data.columns.values, df_team_data.columns.values.shape

(array(['team', 'players_used', 'avg_age', 'possession', 'games',
        'games_starts', 'minutes', 'minutes_90s', 'goals', 'assists',
        'goals_pens', 'pens_made', 'pens_att', 'cards_yellow', 'cards_red',
        'goals_per90', 'assists_per90', 'goals_assists_per90',
        'goals_pens_per90', 'goals_assists_pens_per90', 'xg', 'npxg',
        'xg_assist', 'npxg_xg_assist', 'xg_per90', 'xg_assist_per90',
        'xg_xg_assist_per90', 'npxg_per90', 'npxg_xg_assist_per90',
        'gk_games', 'gk_games_starts', 'gk_minutes', 'gk_goals_against',
        'gk_goals_against_per90', 'gk_shots_on_target_against', 'gk_saves',
        'gk_save_pct', 'gk_wins', 'gk_ties', 'gk_losses',
        'gk_clean_sheets', 'gk_clean_sheets_pct', 'gk_pens_att',
        'gk_pens_allowed', 'gk_pens_saved', 'gk_pens_missed',
        'gk_pens_save_pct', 'gk_free_kick_goals_against',
        'gk_corner_kick_goals_against', 'gk_own_goals_against', 'gk_psxg',
        'gk_psnpxg_per_shot_on_target_against', 'g

In [9]:
df_tweets.head()

Unnamed: 0,Tweet Id,Tweet URL,Tweet Posted Time,Tweet Content,Tweet Type,Client,Retweets Received,Likes Received,Tweet Location,Tweet Language,User Id,Name,Username,User Bio,Verified or Non-Verified,Profile URL,User Followers,User Following,User Account Creation Date
0,"""1603309229871452160""",https://twitter.com/imrmkhan1/status/160330922...,15 Dec 2022 08:41:23,"""Field par Messi aur Mbappe… studio mein @Wayn...",ReTweet,"""Twitter for Android""",0,0,"""""",Hindi,"""1584973275628699648""","""iamrmkhan""",imrmkhan1,"""""",Non-Verified,https://twitter.com/imrmkhan1,11,12,25-Oct-2022 18:21:42
1,"""1603309229980487681""",https://twitter.com/Shahxaze1/status/160330922...,15 Dec 2022 08:41:23,"""𝐈𝐭. 𝐂𝐚𝐧𝐧𝐨𝐭. 𝐆𝐞𝐭. 𝐁𝐢𝐠𝐠𝐞𝐫. 𝐓𝐡𝐚𝐧. 𝐓𝐡𝐢𝐬. 🔥\n\nGet...",ReTweet,"""Twitter for iPhone""",0,0,"""""",English,"""1558508894925869056""","""Shahxaze""",Shahxaze1,"""Shah Rukh Khan Aze 🇦🇿""",Non-Verified,https://twitter.com/Shahxaze1,247,232,13-Aug-2022 17:41:00
2,"""1603309226469642240""",https://twitter.com/SorareAlexandre/status/160...,15 Dec 2022 08:41:22,"""🎁 CONCOURS SORARE 🎁\n\nJOUEUR À GAGNER : SEKO...",ReTweet,"""Twitter for Android""",0,0,"""""",French,"""1592150953871958016""","""Pawastyle45""",SorareAlexandre,"""Je viens de découvrir Sorare et je surkiffe. ...",Non-Verified,https://twitter.com/SorareAlexandre,18,154,14-Nov-2022 13:42:50
3,"""1603309226604236800""",https://twitter.com/Woozie178/status/160330922...,15 Dec 2022 08:41:22,"""It’ll be a tough night for Europe today.\n\n#...",ReTweet,"""Twitter for Android""",0,0,"""""",English,"""1492945970295808003""","""Woozie""",Woozie178,"""Truth seeker, nationalist, engineer, gamer""",Non-Verified,https://twitter.com/Woozie178,19,131,13-Feb-2022 19:37:47
4,"""1603309227392618496""",https://twitter.com/thug_boiiii/status/1603309...,15 Dec 2022 08:41:22,"""In defeat or in victory, always say Alhamduli...",ReTweet,"""Twitter for Android""",0,0,"""""",English,"""999001409231212544""","""Itx_madan 🇵🇰💪🇵🇸""",thug_boiiii,"""Content Creator.......https://www.instagram.c...",Non-Verified,https://twitter.com/thug_boiiii,803,778,22-May-2018 18:57:51


In [18]:
df_viewership  = pd.read_csv('../Data/FIFA World Cup 2022 Viewership/FIFA World Cup Global Audience Reach.csv', delimiter=',')
df_women_matches_1991_2023  = pd.read_csv('../Data/FIFA Womens World Cup Dataset/matches_1991_2023.csv', delimiter=',')
df_women_world_cup  = pd.read_csv('../Data/FIFA Womens World Cup Dataset/womens-world-cup.csv', delimiter=',')

In [11]:
df_viewership.head()

Unnamed: 0,Year,Hosts,Total_Attendance,Matches,Global_Reach,Global_Consecutive,Finals_Reach,Finals_Consecutive
0,2002,South Korea Japan,2705197,64,1500000000.0,,1100000000,
1,2006,Germany,3359439,64,2978360000.0,2156611000.0,871060228,592464845.0
2,2010,South Africa,3178856,64,3203443000.0,2211577000.0,909566803,619709456.0
3,2014,Brazil,3429873,64,3190800000.0,2130000000.0,1013000000,695000000.0
4,2018,Russia,3031768,64,3572000000.0,2650000000.0,1116900000,


In [13]:
df_historic_fifa_ranking.head()

Unnamed: 0,team,team_code,association,rank,previous_rank,points,previous_points
0,Brazil,BRA,CONMEBOL,1,1,1841.3,1837.56
1,Belgium,BEL,UEFA,2,2,1816.71,1821.92
2,Argentina,ARG,CONMEBOL,3,3,1773.88,1770.65
3,France,FRA,UEFA,4,4,1759.78,1764.85
4,England,ENG,UEFA,5,5,1728.47,1737.46


In [16]:
df_historic_matches_1930_2022.head()

Unnamed: 0,home_team,away_team,home_score,home_xg,home_penalty,away_score,away_xg,away_penalty,home_manager,home_captain,away_manager,away_captain,Attendance,Venue,Officials,Round,Date,Score,Referee,Notes,Host,Year,home_goal,away_goal,home_goal_long,away_goal_long,home_own_goal,away_own_goal,home_penalty_goal,away_penalty_goal,home_penalty_miss_long,away_penalty_miss_long,home_penalty_shootout_goal_long,away_penalty_shootout_goal_long,home_penalty_shootout_miss_long,away_penalty_shootout_miss_long,home_red_card,away_red_card,home_yellow_red_card,away_yellow_red_card,home_yellow_card_long,away_yellow_card_long,home_substitute_in_long,away_substitute_in_long
0,Argentina,France,3,3.3,4.0,3,2.2,2.0,Lionel Scaloni,Lionel Messi,Didier Deschamps,Hugo Lloris,88966,"Lusail Iconic Stadium, Lusail",Szymon Marciniak (Referee) · Paweł Sokolnicki ...,Final,2022-12-18,(4) 3–3 (2),Szymon Marciniak,Argentina won on penalty kicks following extra...,Qatar,2022,Ángel Di María · 36|Lionel Messi · 108,Kylian Mbappé · 81,['36&rsquor;|2:0|Ángel Di María|Assist:|Alexis...,['81&rsquor;|2:2|Kylian Mbappé|Assist:|Marcus ...,,,Lionel Messi (P) · 23,Kylian Mbappé (P) · 80|Kylian Mbappé (P) · 118,,,"['2|1:1|Lionel Messi', '4|2:1|Paulo Dybala', '...","['1|0:1|Kylian Mbappé', '7|3:2|Randal Kolo Mua...",,"['3|1:1|Kingsley Coman', '5|2:1|Aurélien Tchou...",,,,,"['45+7&rsquor;|2:0|Enzo Fernández', '90+8&rsqu...","['55&rsquor;|2:0|Adrien Rabiot', '87&rsquor;|2...",['64&rsquor;|2:0|Marcos Acuña|for Ángel Di Mar...,['41&rsquor;|2:0|Randal Kolo Muani|for Ousmane...
1,Croatia,Morocco,2,0.7,,1,1.2,,Zlatko Dalić,Luka Modrić,Hoalid Regragui,Hakim Ziyech,44137,"Khalifa International Stadium, Doha",Abdulrahman Ibrahim Al Jassim (Referee) · Tale...,Third-place match,2022-12-17,2–1,Abdulrahman Ibrahim Al Jassim,,Qatar,2022,Joško Gvardiol · 7|Mislav Oršić · 42,Achraf Dari · 9,['7&rsquor;|1:0|Joško Gvardiol|Assist:|Ivan Pe...,['9&rsquor;|1:1|Achraf Dari'],,,,,,,,,,,,,,,,"['69&rsquor;|2:1|Azzedine Ounahi', '84&rsquor;...",['61&rsquor;|2:1|Nikola Vlašić|for Andrej Kram...,['46&rsquor;|2:1|Ilias Chair|for Abdelhamid Sa...
2,France,Morocco,2,2.0,,0,0.9,,Didier Deschamps,Hugo Lloris,Hoalid Regragui,Romain Saïss,68294,"Al Bayt Stadium, Al Khor",César Arturo Ramos (Referee) · Alberto Morín (...,Semi-finals,2022-12-14,2–0,César Arturo Ramos,,Qatar,2022,Theo Hernández · 5|Randal Kolo Muani · 79,,"['5&rsquor;|1:0|Theo Hernández', '79&rsquor;|2...",,,,,,,,,,,,,,,,,['27&rsquor;|1:0|Sofiane Boufal'],['65&rsquor;|1:0|Marcus Thuram|for Olivier Gir...,['21&rsquor;|1:0|Selim Amallah|for Romain Saïs...
3,Argentina,Croatia,3,2.3,,0,0.5,,Lionel Scaloni,Lionel Messi,Zlatko Dalić,Luka Modrić,88966,"Lusail Iconic Stadium, Lusail",Daniele Orsato (Referee) · Ciro Carbone (AR1) ...,Semi-finals,2022-12-13,3–0,Daniele Orsato,,Qatar,2022,Julián Álvarez · 39|Julián Álvarez · 69,,"['39&rsquor;|2:0|Julián Álvarez', '69&rsquor;|...",,,,Lionel Messi (P) · 34,,,,,,,,,,,,"['68&rsquor;|2:0|Cristian Romero', '71&rsquor;...","['32&rsquor;|0:0|Mateo Kovačić', '32&rsquor;|0...",['62&rsquor;|2:0|Lisandro Martínez|for Leandro...,"['46&rsquor;|2:0|Mislav Oršić|for Borna Sosa',..."
4,Morocco,Portugal,1,1.4,,0,0.9,,Hoalid Regragui,Romain Saïss,Fernando Santos,Pepe,44198,"Al Thumama Stadium, ath-Thumāma",Facundo Tello (Referee) · Ezequiel Brailovsky ...,Quarter-finals,2022-12-10,1–0,Facundo Tello,,Qatar,2022,Youssef En-Nesyri · 42,,['42&rsquor;|1:0|Youssef En-Nesyri|Assist:|Yah...,,,,,,,,,,,,,,Walid Cheddira · 90+3,,"['70&rsquor;|1:0|Achraf Dari', '90+1&rsquor;|1...",['87&rsquor;|1:0|Vitinha'],['57&rsquor;|1:0|Achraf Dari|for Romain Saïss'...,['51&rsquor;|1:0|João Cancelo|for Raphaël Guer...


In [15]:
df_historic_world_cup

Unnamed: 0,Year,Host,Teams,Champion,Runner-Up,TopScorrer,Attendance,AttendanceAvg,Matches
0,2022,Qatar,32,Argentina,France,Kylian Mbappé - 8,3404252,53191,64
1,2018,Russia,32,France,Croatia,Harry Kane - 6,3031768,47371,64
2,2014,Brazil,32,Germany,Argentina,James Rodríguez - 6,3429873,53592,64
3,2010,South Africa,32,Spain,Netherlands,"Wesley Sneijder, Thomas Müller... - 5",3178856,49670,64
4,2006,Germany,32,Italy,France,Miroslav Klose - 5,3352605,52384,64
5,2002,"Korea Republic, Japan",32,Brazil,Germany,Ronaldo - 8,2705337,42271,64
6,1998,France,32,France,Brazil,Davor Šuker - 6,2903477,45367,64
7,1994,United States,24,Brazil,Italy,"Hristo Stoichkov, Oleg Salenko - 6",3587538,68991,52
8,1990,Italy,24,West Germany,Argentina,Salvatore Schillaci - 6,2516215,48389,52
9,1986,Mexico,24,Argentina,West Germany,Gary Lineker - 6,2394031,46039,52


In [19]:
df_women_matches_1991_2023.head()

Unnamed: 0,home_team,away_team,home_score,home_xg,home_penalty,away_score,away_xg,away_penalty,home_manager,home_captain,away_manager,away_captain,Attendance,Venue,Officials,Round,Date,Score,Referee,Notes,Host,Year,home_goal,away_goal,home_goal_long,away_goal_long,home_own_goal,away_own_goal,home_penalty_goal,away_penalty_goal,home_penalty_miss_long,away_penalty_miss_long,home_penalty_shootout_goal_long,away_penalty_shootout_goal_long,home_penalty_shootout_miss_long,away_penalty_shootout_miss_long,home_red_card,away_red_card,home_yellow_red_card,away_yellow_red_card,home_yellow_card_long,away_yellow_card_long,home_substitute_in_long,away_substitute_in_long
0,Spain,England,1,2.1,,0,0.5,,Jorge Vilda,Olga Carmona,Sarina Wiegman,Millie Bright,75784,"Accor Stadium, Sydney",Tori Penso (Referee) · Brooke Mayo (AR1) · Kat...,Final,2023-08-20,1–0,Tori Penso,,"Australia, New Zealand",2023,Olga Carmona · 29’,,['29’|1:0|Olga Carmona|Assist:|Mariona Caldent...,,,,,,['69’|1:0|Jennifer Hermoso|Penalty saved by Ma...,,,,,,,,,,['78’|1:0|Salma Paralluelo'],['55’|1:0|Lauren Hemp'],"['60’|1:0|Oihane Hernández|for Alba Redondo', ...","['46’|1:0|Lauren James|for Alessia Russo', '46..."
1,Sweden,Australia,2,1.8,,0,0.8,,Peter Gerhardsson,Kosovare Asllani,Tony Gustavsson,Sam Kerr,49461,"Suncorp Stadium, Brisbane",,Third-place match,2023-08-19,2–0,,,"Australia, New Zealand",2023,Kosovare Asllani · 62’,,['62’|2:0|Kosovare Asllani|Assist:|Stina Black...,,,,Fridolina Rolfö (P) · 30’,,,,,,,,,,,,"['88’|2:0|Elin Rubensson', '90+5’|2:0|Lina Hur...",['45+1’|1:0|Katrina Gorry'],['67’|2:0|Rebecka Blomqvist|for Stina Blackste...,"['60’|1:0|Cortnee Vine|for Hayley Raso', '60’|..."
2,Australia,England,1,1.4,,3,1.3,,Tony Gustavsson,Sam Kerr,Sarina Wiegman,Millie Bright,75784,"Accor Stadium, Sydney",Tori Penso (Referee) · Brooke Mayo (AR1) · Mij...,Semi-finals,2023-08-16,1–3,Tori Penso,,"Australia, New Zealand",2023,Sam Kerr · 63’,Ella Toone · 36’|Lauren Hemp · 71’|Alessia Rus...,['63’|1:1|Sam Kerr|Assist:|Katrina Gorry'],"['36’|0:1|Ella Toone', '71’|1:2|Lauren Hemp|As...",,,,,,,,,,,,,,,,"['10’|0:0|Alex Greenwood', '90+5’|1:3|Chloe Ke...","['72’|1:2|Cortnee Vine|for Hayley Raso', '81’|...","['87’|1:3|Chloe Kelly|for Alessia Russo', '90’..."
3,Spain,Sweden,2,1.6,,1,0.9,,Jorge Vilda,Olga Carmona,Peter Gerhardsson,Kosovare Asllani,43217,"Eden Park, Auckland",Edina Alves Batista (Referee) · Neuza Back (AR...,Semi-finals,2023-08-15,2–1,Edina Alves Batista,,"Australia, New Zealand",2023,Salma Paralluelo · 81’|Olga Carmona · 89’,Rebecka Blomqvist · 88’,"['81’|1:0|Salma Paralluelo', '89’|2:1|Olga Car...",['88’|1:1|Rebecka Blomqvist|Assist:|Lina Hurtig'],,,,,,,,,,,,,,,,,['57’|0:0|Salma Paralluelo|for Alexia Putellas...,['77’|0:0|Rebecka Blomqvist|for Stina Blackste...
4,Australia,France,0,1.6,7.0,0,2.0,6.0,Tony Gustavsson,Steph Catley,Hervé Renard,Wendie Renard,49461,"Suncorp Stadium, Brisbane",María Carvajal (Referee) · Leslie Vásquez (AR1...,Quarter-finals,2023-08-12,(7) 0–0 (6),María Carvajal,Australia won on penalty kicks following extra...,"Australia, New Zealand",2023,,,,,,,,,,,"['2|1:0|Caitlin Foord', '6|2:2|Sam Kerr', '8|3...","['3|1:1|Kadidiatou Diani', '5|1:2|Wendie Renar...","['4|1:1|Steph Catley', '10|3:3|Mackenzie Arnol...","['1|0:0|Selma Bacha', '9|3:3|Ève Périsset', '1...",,,,,['92’|0:0|Katrina Gorry'],,"['55’|0:0|Sam Kerr|for Emily van Egmond', '104...","['64’|0:0|Vicki Bècho|for Sandie Toletti', '12..."


In [20]:
df_women_world_cup.head()

Unnamed: 0,id,squad,year,players,age,possesion,matches_played,starts,min_playing_time,minutes_played_90s,goals,assists,non_penalty_goals,penalty_kicks_made,penalty_kicks_attempted,yellow_cards,red_cards,goals_per_90,assists_per_90,goals_plus_assists_per_90,goals_minus_penalty_kicks_per_90,goals_plus_assists_minus_penalty_kicks_per_90
0,1,Argentina,2019,18,26.8,34.7,3,33,270,3.0,2,1,1,1,1,3.0,0.0,0.67,0.33,1.0,0.33,0.67
1,2,Australia,2019,18,25.4,61.3,4,44,390,4.3,8,4,8,0,1,2.0,0.0,1.85,0.92,2.77,1.85,2.77
2,3,Brazil,2019,18,29.7,51.5,4,44,390,4.3,7,3,5,2,3,7.0,0.0,1.62,0.69,2.31,1.15,1.85
3,4,Cameroon,2019,20,27.7,36.0,4,44,360,4.0,3,3,3,0,0,6.0,0.0,0.75,0.75,1.5,0.75,1.5
4,5,Canada,2019,16,27.0,63.0,4,44,360,4.0,4,3,4,0,1,2.0,0.0,1.0,0.75,1.75,1.0,1.75


In [5]:
def nan_values(df: pd.DataFrame):
    cols = df.columns.values.tolist()
    rows = df.index.values.tolist()
    for col in cols:
        if df[col].isna().sum() == 0:
            pass
        else:
            print(f"for {col}: {df[col].isna().sum()} NaN values")
    for row in rows:
        row_nans = df.isnull().sum(axis=1).tolist()
        for i, row_nan_item in enumerate(row_nans):
            if row_nan_item == 0:
                pass
            else:
                print(f"for row {i}: {row_nan_item} NaN values")

In [7]:
df_tweets.size

1900000