# Feature 2: Match Statistics

In [68]:
import numpy as np
import pandas as pd
import joblib
from typing import List, Tuple, Dict
from IPython.display import display

In [89]:
# 1. Master Data 
master_data = joblib.load('master_data.pkl')
master_cols = list(master_data.columns)
master_shape = tuple(master_data.shape)

# 2. Teams & Matches
teams_matches = joblib.load('combined_teams_matches.pkl')
teams_matches_cols = list(teams_matches.columns)
teams_matches_shape = tuple(teams_matches.shape)

# 3. Players & Matches (2024)
pms_24 = joblib.load('pms_24.pkl')
pms_24_cols = list(pms_24.columns)
pms_24_shape = tuple(pms_24.shape)

# 4. Players & Matches (2025)
pms_25 = joblib.load('pms_25.pkl')
pms_25_cols = list(pms_25.columns)
pms_25_shape = tuple(pms_25.shape)

# 5. Players Data (2024)
players_24 = joblib.load('players_24.pkl')
players_24_cols = list(players_24.columns)
players_24_shape = tuple(players_24.shape)

# 6. Players Data (2025)
players_25 = joblib.load('players_25.pkl')
players_25_cols = list(players_25.columns)
players_25_shape = tuple(players_25.shape)

# 7. Teams Data (2024)
teams_24 = pd.read_csv('teams24.csv')
teams_24_cols = list(teams_24.columns)
teams_24_shape = tuple(teams_24.shape)

# 8. Teams Data (2025)
teams_25 = pd.read_csv('teams25.csv')
teams_25_cols = list(teams_25.columns)
teams_25_shape = tuple(teams_25.shape)

print(f"Shape of Master Data: {master_shape}")
print(f"Shape of Teams + Matches Data: {teams_matches_shape}\n")
print(f"Shape of Players + Matches (2024) Data: {pms_24_shape}")
print(f"Shape of Players + Matches (2025) Data: {pms_25_shape}\n")
print(f"Shape of Players (2024) Data: {players_24_shape}")
print(f"Shape of Players (2025) Data: {players_25_shape}\n")
print(f"Shape of Teams (2024) Data: {teams_24_shape}")
print(f"Shape of Teams (2025) Data: {teams_25_shape}")

Shape of Master Data: (550, 491)
Shape of Teams + Matches Data: (550, 115)

Shape of Players + Matches (2024) Data: (11567, 55)
Shape of Players + Matches (2025) Data: (5360, 65)

Shape of Players (2024) Data: (804, 7)
Shape of Players (2025) Data: (12657, 8)

Shape of Teams (2024) Data: (20, 13)
Shape of Teams (2025) Data: (20, 14)


## 1. Working with the Master Data

### i) Adding Match ID to the final Master Data

In [6]:
final_master_data = master_data.merge(
    teams_matches[['Date', 'gameweek', 'HomeTeam', 'AwayTeam', 'match_id']],
    on=['Date', 'gameweek', 'HomeTeam', 'AwayTeam'], 
    how='left',
    validate='m:1'
)
final_master_cols = list(final_master_data.columns)
final_master_shape = tuple(final_master_data.shape)

### ii) We will remove the betting columns which is not needed for our project

In [7]:
betting_cols = ['GBH', 'GBD', 'GBA', 'GB>2.5', 'GB<2.5', 'B365>2.5', 'B365<2.5', 'B365AHH', 'B365AHA', 'BbMxH', 'BbMxD', 'BbMxA', 
               'BbMx>2.5', 'BbAv>2.5', 'BbMx<2.5', 'BbAv<2.5', 'BbAHh', 'BbAvAHH', 'BbAvAHA', 'PSH', 'PSD', 'PSA', 'P>2.5', 'P<2.5', 
               'Max>2.5', 'Max<2.5', 'Avg>2.5', 'Avg<2.5', 'AHh', 'PAHH', 'PAHA', 'MaxAHH', 'MaxAHA', 'AvgAHH', 'AvgAHA', 
               'MaxCH', 'MaxCD', 'MaxCA', 'B365C>2.5', 'B365C<2.5', 'MaxC>2.5', 'MaxC<2.5', 'B365CAHH', 'B365CAHA', 
               'MaxCAHH', 'MaxCAHA', 'NormIP_Margin', 'NormIP_BbAvH', 'NormIP_BbAvD', 'NormIP_BbAvA', 'NormIP_AvgCH', 'NormIP_AvgCD', 
               'NormIP_AvgCA', 'NormIP_PSCH', 'NormIP_PSCD', 'NormIP_PSCA', 'NormIP_MaxH', 'NormIP_MaxD', 'NormIP_MaxA', 'NormIP_AvgH', 
               'NormIP_AvgD', 'NormIP_AvgA', 'NormIP_B365H', 'NormIP_B365D', 'NormIP_B365A', 'NormIP_B365CH', 'NormIP_B365CD', 
               'NormIP_B365CA', 'NormIP_AvgC>2.5', 'NormIP_AvgC<2.5', 'NormIP_PC>2.5', 'NormIP_PC<2.5', 'IP_AHO_AvgCAHH', 'IP_AHO_AvgCAHA', 
               'IP_AHO_PCAHH', 'IP_AHO_PCAHA']

usable_master_cols = [col for col in master_cols if col not in betting_cols]

final_master_data = master_data[usable_master_cols]

### iii) Analyzing the null values in the dataset to make note of unavailable statistics for specific matches 

In [8]:
null_ft_dict = dict(master_data.isnull().sum()[master_data.isnull().sum() > 0])

In [9]:
df_containing_null = master_data[master_data['gameweek'].isnull()]
# it was found that all the features containing NaN values correspond to the same three rows (index = 447, 463, 468)

### iv) Segregating all the usable features into different categories according to our needs

In [10]:
basic_stats = ['Date', 'season', 'gameweek', 'match_id', 'HomeTeam', 'AwayTeam', 'FTHG', 'FTAG', 'FTR', 'Referee', 'HS', 'AS', 
               'HST', 'AST', 'HC', 'AC', 'HF', 'AF', 'HY', 'AY', 'HR', 'AR', 'home_possession', 'away_possession',
               'home_accurate_passes', 'home_accurate_passes_pct', 'away_accurate_passes', 'away_accurate_passes_pct',
               'home_successful_dribbles', 'home_successful_dribbles_pct', 'away_successful_dribbles', 'away_successful_dribbles_pct', 
               'home_tackles_won', 'home_tackles_won_pct', 'away_tackles_won', 'away_tackles_won_pct', 'home_expected_goals_xg', 
               'away_expected_goals_xg', 'home_passes', 'away_passes', 'home_interceptions', 'away_interceptions',
              'home_keeper_saves', 'away_keeper_saves', 'home_duels_won', 'away_duels_won']

rolling_features = ['HT_AvgGF_L5', 'AT_AvgGF_L5', 'HT_AvgGA_L5', 'AT_AvgGA_L5', 'HT_AvgShots_L5', 'AT_AvgShots_L5', 'HT_ShotAccuracy_L5', 'AT_ShotAccuracy_L5',
                    'HT_ShotConversion_L5', 'AT_ShotConversion_L5', 'HT_CS_L5', 'AT_CS_L5', 'HT_WinRate_L5', 'AT_WinRate_L5']

gk_specific = ['HT_GK_L5_Avg_gk_accurate_passes', 'HT_GK_L5_Avg_gk_accurate_long_balls', 'HT_GK_L5_Avg_saves', 
               'HT_GK_L5_Avg_saves_inside_box', 'HT_GK_L5_Avg_goals_conceded', 'HT_GK_L5_Avg_team_goals_conceded', 
               'HT_GK_L5_Avg_xgot_faced', 'HT_GK_L5_Avg_goals_prevented', 'HT_GK_L5_Avg_sweeper_actions', 'HT_GK_L5_Avg_high_claim',
              'AT_GK_L5_Avg_gk_accurate_passes', 'AT_GK_L5_Avg_gk_accurate_long_balls', 'AT_GK_L5_Avg_saves', 
               'AT_GK_L5_Avg_saves_inside_box', 'AT_GK_L5_Avg_goals_conceded', 'AT_GK_L5_Avg_team_goals_conceded', 
               'AT_GK_L5_Avg_xgot_faced', 'AT_GK_L5_Avg_goals_prevented', 'AT_GK_L5_Avg_sweeper_actions', 'AT_GK_L5_Avg_high_claim']

def_specific = ['HT_DEF_L5_Avg_xg', 'HT_DEF_L5_Avg_xa', 'HT_DEF_L5_Avg_accurate_passes', 'HT_DEF_L5_Avg_accurate_long_balls', 
                'HT_DEF_L5_Avg_final_third_passes', 'HT_DEF_L5_Avg_tackles_won', 'HT_DEF_L5_Avg_interceptions', 
                'HT_DEF_L5_Avg_recoveries', 'HT_DEF_L5_Avg_blocks', 'HT_DEF_L5_Avg_clearances', 'HT_DEF_L5_Avg_headed_clearances',
                'HT_DEF_L5_Avg_dribbled_past', 'HT_DEF_L5_Avg_duels_won', 'HT_DEF_L5_Avg_ground_duels_won', 'HT_DEF_L5_Avg_aerial_duels_won',
                'HT_DEF_L5_Avg_was_fouled', 'HT_DEF_L5_Avg_fouls_committed', 'HT_DEF_L5_Avg_tackles_won_percentage',
                'AT_DEF_L5_Avg_xg', 'AT_DEF_L5_Avg_xa', 'AT_DEF_L5_Avg_accurate_passes', 'AT_DEF_L5_Avg_accurate_long_balls', 
                'AT_DEF_L5_Avg_final_third_passes', 'AT_DEF_L5_Avg_tackles_won', 'AT_DEF_L5_Avg_interceptions', 'AT_DEF_L5_Avg_recoveries', 
                'AT_DEF_L5_Avg_blocks', 'AT_DEF_L5_Avg_clearances', 'AT_DEF_L5_Avg_headed_clearances', 'AT_DEF_L5_Avg_dribbled_past', 
                'AT_DEF_L5_Avg_duels_won', 'AT_DEF_L5_Avg_ground_duels_won', 'AT_DEF_L5_Avg_aerial_duels_won', 'AT_DEF_L5_Avg_was_fouled', 
                'AT_DEF_L5_Avg_fouls_committed', 'AT_DEF_L5_Avg_tackles_won_percentage']

mid_specific = ['HT_MID_L5_Avg_goals', 'HT_MID_L5_Avg_assists', 'HT_MID_L5_Avg_xg', 'HT_MID_L5_Avg_xa', 'HT_MID_L5_Avg_accurate_passes', 
                'HT_MID_L5_Avg_accurate_crosses', 'HT_MID_L5_Avg_accurate_long_balls', 'HT_MID_L5_Avg_final_third_passes', 
                'HT_MID_L5_Avg_total_shots', 'HT_MID_L5_Avg_shots_on_target', 'HT_MID_L5_Avg_chances_created', 'HT_MID_L5_Avg_touches', 
                'HT_MID_L5_Avg_successful_dribbles', 'HT_MID_L5_Avg_corners', 'HT_MID_L5_Avg_penalties_scored', 
                'HT_MID_L5_Avg_penalties_missed', 'HT_MID_L5_Avg_tackles_won', 'HT_MID_L5_Avg_interceptions', 'HT_MID_L5_Avg_recoveries', 
                'HT_MID_L5_Avg_blocks', 'HT_MID_L5_Avg_clearances', 'HT_MID_L5_Avg_dribbled_past', 'HT_MID_L5_Avg_duels_won', 
                'HT_MID_L5_Avg_ground_duels_won', 'HT_MID_L5_Avg_aerial_duels_won', 'HT_MID_L5_Avg_was_fouled', 
                'HT_MID_L5_Avg_fouls_committed', 
                'AT_MID_L5_Avg_goals', 'AT_MID_L5_Avg_assists', 'AT_MID_L5_Avg_xg', 'AT_MID_L5_Avg_xa', 'AT_MID_L5_Avg_accurate_passes', 
                'AT_MID_L5_Avg_accurate_crosses', 'AT_MID_L5_Avg_accurate_long_balls', 'AT_MID_L5_Avg_final_third_passes', 
                'AT_MID_L5_Avg_total_shots', 'AT_MID_L5_Avg_shots_on_target', 'AT_MID_L5_Avg_chances_created', 'AT_MID_L5_Avg_touches', 
                'AT_MID_L5_Avg_successful_dribbles', 'AT_MID_L5_Avg_corners', 'AT_MID_L5_Avg_penalties_scored', 
                'AT_MID_L5_Avg_penalties_missed', 'AT_MID_L5_Avg_tackles_won', 'AT_MID_L5_Avg_interceptions', 'AT_MID_L5_Avg_recoveries', 
                'AT_MID_L5_Avg_blocks', 'AT_MID_L5_Avg_clearances', 'AT_MID_L5_Avg_dribbled_past', 'AT_MID_L5_Avg_duels_won', 
                'AT_MID_L5_Avg_ground_duels_won', 'AT_MID_L5_Avg_aerial_duels_won', 'AT_MID_L5_Avg_was_fouled', 
                'AT_MID_L5_Avg_fouls_committed',]

fwd_specific = ['HT_FWD_L5_Avg_goals', 'HT_FWD_L5_Avg_assists', 'HT_FWD_L5_Avg_xg', 'HT_FWD_L5_Avg_xa', 'HT_FWD_L5_Avg_xgot', 
                'HT_FWD_L5_Avg_accurate_passes', 'HT_FWD_L5_Avg_final_third_passes', 'HT_FWD_L5_Avg_total_shots', 
                'HT_FWD_L5_Avg_shots_on_target', 'HT_FWD_L5_Avg_chances_created', 'HT_FWD_L5_Avg_big_chances_missed', 
                'HT_FWD_L5_Avg_touches', 'HT_FWD_L5_Avg_touches_opposition_box', 'HT_FWD_L5_Avg_successful_dribbles', 
                'HT_FWD_L5_Avg_corners', 'HT_FWD_L5_Avg_offsides', 'HT_FWD_L5_Avg_penalties_scored', 'HT_FWD_L5_Avg_penalties_missed', 
                'HT_FWD_L5_Avg_duels_won', 'HT_FWD_L5_Avg_ground_duels_won', 'HT_FWD_L5_Avg_aerial_duels_won', 'HT_FWD_L5_Avg_was_fouled', 
                'HT_FWD_L5_Avg_fouls_committed', 
                'AT_FWD_L5_Avg_goals', 'AT_FWD_L5_Avg_assists', 'AT_FWD_L5_Avg_xg', 'AT_FWD_L5_Avg_xa', 'AT_FWD_L5_Avg_xgot', 
                'AT_FWD_L5_Avg_accurate_passes', 'AT_FWD_L5_Avg_final_third_passes', 'AT_FWD_L5_Avg_total_shots', 
                'AT_FWD_L5_Avg_shots_on_target', 'AT_FWD_L5_Avg_chances_created', 'AT_FWD_L5_Avg_big_chances_missed', 
                'AT_FWD_L5_Avg_touches', 'AT_FWD_L5_Avg_touches_opposition_box', 'AT_FWD_L5_Avg_successful_dribbles', 
                'AT_FWD_L5_Avg_corners', 'AT_FWD_L5_Avg_offsides', 'AT_FWD_L5_Avg_penalties_scored', 'AT_FWD_L5_Avg_penalties_missed', 
                'AT_FWD_L5_Avg_duels_won', 'AT_FWD_L5_Avg_ground_duels_won', 'AT_FWD_L5_Avg_aerial_duels_won', 'AT_FWD_L5_Avg_was_fouled', 
                'AT_FWD_L5_Avg_fouls_committed']

final_master_data = final_master_data[basic_stats + rolling_features]

### iv) Filling the NaN rows with its corresponding Game Week value using neighbouring matches

In [11]:
mask1 = (
    (final_master_data['Date'] == '2025-10-05') &
    (final_master_data['HomeTeam'] == 'Brentford') 
)

final_master_data.loc[mask1, 'gameweek'] = 7.0

mask2 = (
    (final_master_data['Date'] == '2025-10-25') &
    (final_master_data['HomeTeam'] == 'Newcastle')
)

final_master_data.loc[mask2, 'gameweek'] = 9.0

mask3 = (
    (final_master_data['Date'] == '2025-10-26') & 
    (final_master_data['HomeTeam'] == 'Arsenal')
)

final_master_data.loc[mask3, 'gameweek'] = 9.0

In [12]:
final_master_data.isnull().sum() # it has been found that some of the features are unavailable for some matches

Date                            0
season                          0
gameweek                        0
match_id                        3
HomeTeam                        0
AwayTeam                        0
FTHG                            0
FTAG                            0
FTR                             0
Referee                         0
HS                              0
AS                              0
HST                             0
AST                             0
HC                              0
AC                              0
HF                              0
AF                              0
HY                              0
AY                              0
HR                              0
AR                              0
home_possession                 3
away_possession                 3
home_accurate_passes            3
home_accurate_passes_pct        3
away_accurate_passes            3
away_accurate_passes_pct        3
home_successful_dribbles        3
home_successfu

In [13]:
final_master_shape = final_master_data.shape
final_master_cols = list(final_master_data.columns)
print(f"Final Shape of Master Data: {final_master_data.shape}")

Final Shape of Master Data: (550, 60)


## 2. Working with Players + Match Data (2024 & 2025)

### i) For each match, we will only showcase the players who have either scored or assisted. Any other player's data would simply be ignored and dropped from the dataset and will not be used for our feaature.

In [23]:
pms_24_ga = pms_24[(pms_24['goals'] > 0) | (pms_24['assists'] > 0)].reset_index(drop=True) # filtering the data with only players which have either scored or assisted
pms_24_cols_dict = dict(pms_24_ga.isnull().sum()[pms_24_ga.isnull().sum() == 0]) # only features having none of its values as NaN
usable_pms_24_cols = [key for key, value in pms_24_cols_dict.items()] # storing the features in a list
pms_24_ga = pms_24_ga[usable_pms_24_cols] # final players + match data for data analysis
pms_24_ga = pms_24_ga.rename(columns={'Game Week': 'gameweek'}, errors='ignore')
pms_24_ga_cols = list(pms_24_ga.columns)
pms_24_ga_shape = tuple(pms_24_ga.shape)
print(f"Final Shape of Players + Match Data (2024): {pms_24_ga_shape}")

Final Shape of Players + Match Data (2024): (1627, 55)


In [24]:
pms_25_ga = pms_25[(pms_25['goals'] > 0) | (pms_25['assists'] > 0)].reset_index(drop=True) # filtering the data with only players which have either scored or assisted
pms_25_cols_dict = dict(pms_25_ga.isnull().sum()[pms_25_ga.isnull().sum() == 0]) # only features having none of its values as NaN
usable_pms_25_cols = [key for key, value in pms_25_cols_dict.items()] # storing the features in a list
pms_25_ga = pms_25_ga[usable_pms_25_cols] # final players + match data for data analysis
pms_25_ga = pms_25_ga.rename(columns={'Game Week': 'gameweek'}, errors='ignore')
pms_25_ga_cols = list(pms_25_ga.columns)
pms_25_ga_shape = tuple(pms_25_ga.shape)
print(f"Final Shape of Players + Match Data (2024): {pms_25_ga_shape}")

Final Shape of Players + Match Data (2024): (665, 63)


## 3. Working with the Teams + Matches Data

### i) Rectifying the data modified earlier using masks

In [31]:
mask_date_1 = (
    (teams_matches['gameweek'] == 7) &
    (teams_matches['HomeTeam'] == 'Brentford')
)

teams_matches.loc[mask_date_1, 'Date'] = '2025-10-05'

mask_date_2 = (
    (teams_matches['gameweek'] == 9) &
    (teams_matches['HomeTeam'] == 'Newcastle')
)

teams_matches.loc[mask_date_2, 'Date'] = '2025-10-25'

mask_date_3 = (
    (teams_matches['gameweek'] == 9) &
    (teams_matches['HomeTeam'] == 'Arsenal')
)

teams_matches.loc[mask_date_3, 'Date'] = '2025-10-26'

### ii) Sorting the data by Date, in ascending order

In [32]:
final_teams_matches = (teams_matches.sort_values(by='Date', ascending=True).reset_index(drop=True))

## 4. Working with the Players Data (2024 & 2025)

### i) We will merge the Players + Match Data & Teams Data with the Players Data, to include the names of the players along with their team codes, team names and their position as the current data lacks all these features

In [104]:
players_24 = players_24.rename(columns={'team_code':'code'}, errors='ignore')
players_25 = players_25.rename(columns={'team_code':'code'}, errors='ignore')

In [106]:
# 2024
players_matches_24 = pms_24_ga.merge(
    players_24[['player_id', 'first_name', 'second_name', 'position', 'code']],
    on='player_id',
    how='left',
    validate='m:1'
)
players_matches_24['season'] = 2024
players_matches_24 = players_matches_24.merge(
    teams_24[['code', 'name']],
    on=['code'],
    how='left'
)

# 2025
players_25 = players_25.rename(columns={'Game Week':'gameweek'})
players_matches_25 = pms_25_ga.merge(
    players_25[['player_id', 'gameweek', 'first_name', 'second_name', 'position', 'code']],
    on=['player_id', 'gameweek'],
    how='left',
    validate='m:1'
)
players_matches_25['season'] = 2025
players_matches_25 = players_matches_25.merge(
    teams_25[['code', 'name']],
    on='code',
    how='left'
)

### ii) Keeping only the features which do not contain more than 60% of their values as 0.0

In [154]:
players_matches_24_cols = [key for key, value in dict(players_matches_24.eq(0.0).sum()).items() if value < (players_matches_24.shape[0]) * 0.6]
players_matches_25_cols = [key for key, value in dict(players_matches_25.eq(0.0).sum()).items() if value < (players_matches_25.shape[0]) * 0.6]
players_matches_24_final = players_matches_24[players_matches_24_cols]
players_matches_25_final = players_matches_25[players_matches_25_cols]
print(f"Original Shape of Players + Match (2024) Data: {players_matches_24.shape}")
print(f"New Shape of Players + Match (2024) Data: {players_matches_24_final.shape}\n")
print(f"Original Shape of Players + Match (2025) Data: {players_matches_25.shape}")
print(f"New Shape of Players + Match (2025) Data: {players_matches_25_final.shape}")

Original Shape of Players + Match (2024) Data: (1627, 61)
New Shape of Players + Match (2024) Data: (1627, 43)

Original Shape of Players + Match (2025) Data: (665, 69)
New Shape of Players + Match (2025) Data: (665, 36)


## Working with the Data according to the User

In [112]:
season = int(input("Enter Year: "))
if season == 2024:
    gw = int(input("Enter Matchday No: "))
    if gw >=1 and gw <= 38:
        display_matches = final_master_data[(final_master_data['season'] == season) & (final_master_data['gameweek'] == gw)][['season', 'gameweek', 'HomeTeam', 'AwayTeam', 'FTHG', 'FTAG']].reset_index(drop=True)
        hometeams = list(display_matches['HomeTeam'].unique())
        awayteams = list(display_matches['AwayTeam'].unique())
    else:
        print("Please enter a valid matchday number between 1 and 38.")
elif season == 2025:
    gw = int(input("Enter Matchday No: "))
    if gw >=1 and gw <= 17:
        display_matches = final_master_data[(final_master_data['season'] == season) & (final_master_data['gameweek'] == gw)][['season', 'gameweek', 'HomeTeam', 'AwayTeam', 'FTHG', 'FTAG']].reset_index(drop=True)
        hometeams = list(display_matches['HomeTeam'].unique())
        awayteams = list(display_matches['AwayTeam'].unique())
    else:
        print("Please enter a valid matchday number between 1 and 17 (current matchday)")
else: print("Please enter a valid year (2024 or 2025)")

display_matches

Enter Year:  2025
Enter Matchday No:  17


Unnamed: 0,season,gameweek,HomeTeam,AwayTeam,FTHG,FTAG
0,2025,17.0,Wolves,Brentford,0.0,2.0
1,2025,17.0,Tottenham,Liverpool,1.0,2.0
2,2025,17.0,Man City,West Ham,3.0,0.0
3,2025,17.0,Everton,Arsenal,0.0,1.0
4,2025,17.0,Bournemouth,Burnley,1.0,1.0
5,2025,17.0,Newcastle,Chelsea,2.0,2.0
6,2025,17.0,Brighton,Sunderland,0.0,0.0
7,2025,17.0,Leeds,Crystal Palace,4.0,1.0
8,2025,17.0,Aston Villa,Man United,2.0,1.0
9,2025,17.0,Fulham,Nott'm Forest,1.0,0.0


In [131]:
hometeam = input("Enter Home Team: ")
if hometeam not in hometeams:
    print(f"{hometeam} was the away team in matchday {gw}.")
awayteam = input("Enter Away Team: ")
if awayteam not in awayteams:
    print(f"{awayteam} was the home team in matchday {gw}.")

Enter Home Team:  Wolves
Enter Away Team:  Brentford


In [132]:
print("1. Basic Information")
print("2. Rolling Features")
choice = int(input("Enter your choice: "))
if choice == 1:
    match_stats_basic = final_master_data[
    (final_master_data['season'] == season) & 
    (final_master_data['gameweek'] == gw) & 
    (final_master_data['HomeTeam'] == hometeam) & 
    (final_master_data['AwayTeam'] == awayteam)][basic_stats]
    matchid = match_stats_basic['match_id'].iloc[0]
    display(match_stats_basic)
    print(f"Match Id: {matchid}")
if choice == 2:
    match_stats_l5 = final_master_data[
    (final_master_data['season'] == season) & 
    (final_master_data['gameweek'] == gw) & 
    (final_master_data['HomeTeam'] == hometeam) & 
    (final_master_data['AwayTeam'] == awayteam)][rolling_features]
    display(match_stats_l5)

1. Basic Information
2. Rolling Features


Enter your choice:  1


Unnamed: 0,Date,season,gameweek,match_id,HomeTeam,AwayTeam,FTHG,FTAG,FTR,Referee,...,home_expected_goals_xg,away_expected_goals_xg,home_passes,away_passes,home_interceptions,away_interceptions,home_keeper_saves,away_keeper_saves,home_duels_won,away_duels_won
540,2025-12-20,2025,17.0,25-26-prem-wolverhampton-wanderers-vs-brentford,Wolves,Brentford,0.0,2.0,A,M Donohue,...,1.29,1.28,358.0,470.0,10.0,17.0,4.0,2.0,49.0,47.0


Match Id: 25-26-prem-wolverhampton-wanderers-vs-brentford


In [157]:
pms_25_to_display = players_matches_25_final[(players_matches_25_final['match_id'] == matchid)]
display(pms_25_to_display)

Unnamed: 0,player_id,match_id,minutes_played,goals,assists,total_shots,xg,xa,shots_on_target,successful_dribbles,...,tackles,finish_min,team_goals_conceded,gameweek,first_name,second_name,position,code,season,name
643,107,25-26-prem-wolverhampton-wanderers-vs-brentford,90,2,0,3,0.48,0.28,3,1,...,1,90,0,17,Keane,Lewis-Potter,Defender,94,2025,Brentford
644,125,25-26-prem-wolverhampton-wanderers-vs-brentford,90,0,1,0,0.0,0.12,0,0,...,3,90,0,17,Vitaly,Janelt,Midfielder,94,2025,Brentford
645,121,25-26-prem-wolverhampton-wanderers-vs-brentford,26,0,1,1,0.26,0.07,0,0,...,1,90,0,17,Mikkel,Damsgaard,Midfielder,94,2025,Brentford
