# Feature 2: Match Statistics

In [1]:
import numpy as np
import pandas as pd
import joblib
from typing import List, Tuple, Dict
from IPython.display import display

In [2]:
# 1. Master Data 
master_data = joblib.load('master_data.pkl')
master_cols = list(master_data.columns)
master_shape = tuple(master_data.shape)

# 2. Teams & Matches
teams_matches = joblib.load('combined_tm.pkl')
teams_matches_cols = list(teams_matches.columns)
teams_matches_shape = tuple(teams_matches.shape)

# 3. Players & Matches (2024)
pms_24 = joblib.load('pms_24.pkl')
pms_24_cols = list(pms_24.columns)
pms_24_shape = tuple(pms_24.shape)

# 4. Players & Matches (2025)
pms_25 = joblib.load('pms_25.pkl')
pms_25_cols = list(pms_25.columns)
pms_25_shape = tuple(pms_25.shape)

# 5. Players Data (2024)
players_24 = joblib.load('players_24.pkl')
players_24_cols = list(players_24.columns)
players_24_shape = tuple(players_24.shape)

# 6. Players Data (2025)
players_25 = joblib.load('players_25.pkl')
players_25_cols = list(players_25.columns)
players_25_shape = tuple(players_25.shape)

# 7. Teams Data (2024)
teams_24 = pd.read_csv('teams24.csv')
teams_24_cols = list(teams_24.columns)
teams_24_shape = tuple(teams_24.shape)

# 8. Teams Data (2025)
teams_25 = pd.read_csv('teams25.csv')
teams_25_cols = list(teams_25.columns)
teams_25_shape = tuple(teams_25.shape)

print(f"Shape of Master Data: {master_shape}")
print(f"Shape of Teams + Matches Data: {teams_matches_shape}\n")
print(f"Shape of Players + Matches (2024) Data: {pms_24_shape}")
print(f"Shape of Players + Matches (2025) Data: {pms_25_shape}\n")
print(f"Shape of Players (2024) Data: {players_24_shape}")
print(f"Shape of Players (2025) Data: {players_25_shape}\n")
print(f"Shape of Teams (2024) Data: {teams_24_shape}")
print(f"Shape of Teams (2025) Data: {teams_25_shape}")

Shape of Master Data: (590, 396)
Shape of Teams + Matches Data: (590, 115)

Shape of Players + Matches (2024) Data: (11567, 55)
Shape of Players + Matches (2025) Data: (6527, 65)

Shape of Players (2024) Data: (804, 7)
Shape of Players (2025) Data: (15797, 8)

Shape of Teams (2024) Data: (20, 13)
Shape of Teams (2025) Data: (20, 14)


## 1. Working with the Master Data

### i) Merging

In [25]:
final_master_data = master_data.merge(
    teams_matches,
    on=['Date','season','gameweek','HomeTeam','AwayTeam','match_id'],
    how='left',
    validate='m:1'
)

# drop duplicate "_x" columns from master_data or "_y" columns from teams_matches
cols_to_drop = [col for col in final_master_data.columns if col.endswith('_x')]
final_master_data = final_master_data.drop(columns=cols_to_drop)

# optional: rename "_y" columns to remove suffix
final_master_data = final_master_data.rename(columns=lambda x: x.rstrip('_y'))

### ii) We will remove the betting columns which is not needed for our project

In [26]:
betting_cols = ['GBH', 'GBD', 'GBA', 'GB>2.5', 'GB<2.5', 'B365>2.5', 'B365<2.5', 'B365AHH', 'B365AHA', 'BbMxH', 'BbMxD', 'BbMxA', 
               'BbMx>2.5', 'BbAv>2.5', 'BbMx<2.5', 'BbAv<2.5', 'BbAHh', 'BbAvAHH', 'BbAvAHA', 'PSH', 'PSD', 'PSA', 'P>2.5', 'P<2.5', 
               'Max>2.5', 'Max<2.5', 'Avg>2.5', 'Avg<2.5', 'AHh', 'PAHH', 'PAHA', 'MaxAHH', 'MaxAHA', 'AvgAHH', 'AvgAHA', 
               'MaxCH', 'MaxCD', 'MaxCA', 'B365C>2.5', 'B365C<2.5', 'MaxC>2.5', 'MaxC<2.5', 'B365CAHH', 'B365CAHA', 
               'MaxCAHH', 'MaxCAHA', 'NormIP_Margin', 'NormIP_BbAvH', 'NormIP_BbAvD', 'NormIP_BbAvA', 'NormIP_AvgCH', 'NormIP_AvgCD', 
               'NormIP_AvgCA', 'NormIP_PSCH', 'NormIP_PSCD', 'NormIP_PSCA', 'NormIP_MaxH', 'NormIP_MaxD', 'NormIP_MaxA', 'NormIP_AvgH', 
               'NormIP_AvgD', 'NormIP_AvgA', 'NormIP_B365H', 'NormIP_B365D', 'NormIP_B365A', 'NormIP_B365CH', 'NormIP_B365CD', 
               'NormIP_B365CA', 'NormIP_AvgC>2.5', 'NormIP_AvgC<2.5', 'NormIP_PC>2.5', 'NormIP_PC<2.5', 'IP_AHO_AvgCAHH', 'IP_AHO_AvgCAHA', 
               'IP_AHO_PCAHH', 'IP_AHO_PCAHA']

usable_master_cols = [col for col in master_cols if col not in betting_cols]

final_master_data = master_data[usable_master_cols]

### iii) Analyzing the null values in the dataset to make note of unavailable statistics for specific matches 

In [27]:
null_ft_dict = dict(master_data.isnull().sum()[master_data.isnull().sum() > 0])

In [28]:
df_containing_null = master_data[master_data['gameweek'].isnull()]
# it was found that all the features containing NaN values correspond to the same three rows (index = 447, 463, 468)

### iv) Segregating all the usable features into different categories according to our needs

In [29]:
basic_stats = ['Date', 'season', 'gameweek', 'match_id', 'HomeTeam', 'AwayTeam', 'FTHG', 'FTAG', 'FTR', 'Referee', 'HS', 'AS', 
               'HST', 'AST', 'HC', 'AC', 'HF', 'AF', 'HY', 'AY', 'HR', 'AR', 'home_possession', 'away_possession',
               'home_accurate_passes', 'home_accurate_passes_pct', 'away_accurate_passes', 'away_accurate_passes_pct',
               'home_successful_dribbles', 'home_successful_dribbles_pct', 'away_successful_dribbles', 'away_successful_dribbles_pct', 
               'home_tackles_won', 'home_tackles_won_pct', 'away_tackles_won', 'away_tackles_won_pct', 'home_expected_goals_xg', 
               'away_expected_goals_xg', 'home_passes', 'away_passes', 'home_interceptions', 'away_interceptions',
              'home_keeper_saves', 'away_keeper_saves', 'home_duels_won', 'away_duels_won']

rolling_features = ['HT_AvgGF_L5', 'AT_AvgGF_L5', 'HT_AvgGA_L5', 'AT_AvgGA_L5', 'HT_AvgShots_L5', 'AT_AvgShots_L5', 'HT_ShotAccuracy_L5', 'AT_ShotAccuracy_L5',
                    'HT_ShotConversion_L5', 'AT_ShotConversion_L5', 'HT_CS_L5', 'AT_CS_L5', 'HT_WinRate_L5', 'AT_WinRate_L5']

gk_specific = ['HT_GK_L5_Avg_gk_accurate_passes', 'HT_GK_L5_Avg_gk_accurate_long_balls', 'HT_GK_L5_Avg_saves', 
               'HT_GK_L5_Avg_saves_inside_box', 'HT_GK_L5_Avg_goals_conceded', 'HT_GK_L5_Avg_team_goals_conceded', 
               'HT_GK_L5_Avg_xgot_faced', 'HT_GK_L5_Avg_goals_prevented', 'HT_GK_L5_Avg_sweeper_actions', 'HT_GK_L5_Avg_high_claim',
              'AT_GK_L5_Avg_gk_accurate_passes', 'AT_GK_L5_Avg_gk_accurate_long_balls', 'AT_GK_L5_Avg_saves', 
               'AT_GK_L5_Avg_saves_inside_box', 'AT_GK_L5_Avg_goals_conceded', 'AT_GK_L5_Avg_team_goals_conceded', 
               'AT_GK_L5_Avg_xgot_faced', 'AT_GK_L5_Avg_goals_prevented', 'AT_GK_L5_Avg_sweeper_actions', 'AT_GK_L5_Avg_high_claim']

def_specific = ['HT_DEF_L5_Avg_xg', 'HT_DEF_L5_Avg_xa', 'HT_DEF_L5_Avg_accurate_passes', 'HT_DEF_L5_Avg_accurate_long_balls', 
                'HT_DEF_L5_Avg_final_third_passes', 'HT_DEF_L5_Avg_tackles_won', 'HT_DEF_L5_Avg_interceptions', 
                'HT_DEF_L5_Avg_recoveries', 'HT_DEF_L5_Avg_blocks', 'HT_DEF_L5_Avg_clearances', 'HT_DEF_L5_Avg_headed_clearances',
                'HT_DEF_L5_Avg_dribbled_past', 'HT_DEF_L5_Avg_duels_won', 'HT_DEF_L5_Avg_ground_duels_won', 'HT_DEF_L5_Avg_aerial_duels_won',
                'HT_DEF_L5_Avg_was_fouled', 'HT_DEF_L5_Avg_fouls_committed', 'HT_DEF_L5_Avg_tackles_won_percentage',
                'AT_DEF_L5_Avg_xg', 'AT_DEF_L5_Avg_xa', 'AT_DEF_L5_Avg_accurate_passes', 'AT_DEF_L5_Avg_accurate_long_balls', 
                'AT_DEF_L5_Avg_final_third_passes', 'AT_DEF_L5_Avg_tackles_won', 'AT_DEF_L5_Avg_interceptions', 'AT_DEF_L5_Avg_recoveries', 
                'AT_DEF_L5_Avg_blocks', 'AT_DEF_L5_Avg_clearances', 'AT_DEF_L5_Avg_headed_clearances', 'AT_DEF_L5_Avg_dribbled_past', 
                'AT_DEF_L5_Avg_duels_won', 'AT_DEF_L5_Avg_ground_duels_won', 'AT_DEF_L5_Avg_aerial_duels_won', 'AT_DEF_L5_Avg_was_fouled', 
                'AT_DEF_L5_Avg_fouls_committed', 'AT_DEF_L5_Avg_tackles_won_percentage']

mid_specific = ['HT_MID_L5_Avg_goals', 'HT_MID_L5_Avg_assists', 'HT_MID_L5_Avg_xg', 'HT_MID_L5_Avg_xa', 'HT_MID_L5_Avg_accurate_passes', 
                'HT_MID_L5_Avg_accurate_crosses', 'HT_MID_L5_Avg_accurate_long_balls', 'HT_MID_L5_Avg_final_third_passes', 
                'HT_MID_L5_Avg_total_shots', 'HT_MID_L5_Avg_shots_on_target', 'HT_MID_L5_Avg_chances_created', 'HT_MID_L5_Avg_touches', 
                'HT_MID_L5_Avg_successful_dribbles', 'HT_MID_L5_Avg_corners', 'HT_MID_L5_Avg_penalties_scored', 
                'HT_MID_L5_Avg_penalties_missed', 'HT_MID_L5_Avg_tackles_won', 'HT_MID_L5_Avg_interceptions', 'HT_MID_L5_Avg_recoveries', 
                'HT_MID_L5_Avg_blocks', 'HT_MID_L5_Avg_clearances', 'HT_MID_L5_Avg_dribbled_past', 'HT_MID_L5_Avg_duels_won', 
                'HT_MID_L5_Avg_ground_duels_won', 'HT_MID_L5_Avg_aerial_duels_won', 'HT_MID_L5_Avg_was_fouled', 
                'HT_MID_L5_Avg_fouls_committed', 
                'AT_MID_L5_Avg_goals', 'AT_MID_L5_Avg_assists', 'AT_MID_L5_Avg_xg', 'AT_MID_L5_Avg_xa', 'AT_MID_L5_Avg_accurate_passes', 
                'AT_MID_L5_Avg_accurate_crosses', 'AT_MID_L5_Avg_accurate_long_balls', 'AT_MID_L5_Avg_final_third_passes', 
                'AT_MID_L5_Avg_total_shots', 'AT_MID_L5_Avg_shots_on_target', 'AT_MID_L5_Avg_chances_created', 'AT_MID_L5_Avg_touches', 
                'AT_MID_L5_Avg_successful_dribbles', 'AT_MID_L5_Avg_corners', 'AT_MID_L5_Avg_penalties_scored', 
                'AT_MID_L5_Avg_penalties_missed', 'AT_MID_L5_Avg_tackles_won', 'AT_MID_L5_Avg_interceptions', 'AT_MID_L5_Avg_recoveries', 
                'AT_MID_L5_Avg_blocks', 'AT_MID_L5_Avg_clearances', 'AT_MID_L5_Avg_dribbled_past', 'AT_MID_L5_Avg_duels_won', 
                'AT_MID_L5_Avg_ground_duels_won', 'AT_MID_L5_Avg_aerial_duels_won', 'AT_MID_L5_Avg_was_fouled', 
                'AT_MID_L5_Avg_fouls_committed',]

fwd_specific = ['HT_FWD_L5_Avg_goals', 'HT_FWD_L5_Avg_assists', 'HT_FWD_L5_Avg_xg', 'HT_FWD_L5_Avg_xa', 'HT_FWD_L5_Avg_xgot', 
                'HT_FWD_L5_Avg_accurate_passes', 'HT_FWD_L5_Avg_final_third_passes', 'HT_FWD_L5_Avg_total_shots', 
                'HT_FWD_L5_Avg_shots_on_target', 'HT_FWD_L5_Avg_chances_created', 'HT_FWD_L5_Avg_big_chances_missed', 
                'HT_FWD_L5_Avg_touches', 'HT_FWD_L5_Avg_touches_opposition_box', 'HT_FWD_L5_Avg_successful_dribbles', 
                'HT_FWD_L5_Avg_corners', 'HT_FWD_L5_Avg_offsides', 'HT_FWD_L5_Avg_penalties_scored', 'HT_FWD_L5_Avg_penalties_missed', 
                'HT_FWD_L5_Avg_duels_won', 'HT_FWD_L5_Avg_ground_duels_won', 'HT_FWD_L5_Avg_aerial_duels_won', 'HT_FWD_L5_Avg_was_fouled', 
                'HT_FWD_L5_Avg_fouls_committed', 
                'AT_FWD_L5_Avg_goals', 'AT_FWD_L5_Avg_assists', 'AT_FWD_L5_Avg_xg', 'AT_FWD_L5_Avg_xa', 'AT_FWD_L5_Avg_xgot', 
                'AT_FWD_L5_Avg_accurate_passes', 'AT_FWD_L5_Avg_final_third_passes', 'AT_FWD_L5_Avg_total_shots', 
                'AT_FWD_L5_Avg_shots_on_target', 'AT_FWD_L5_Avg_chances_created', 'AT_FWD_L5_Avg_big_chances_missed', 
                'AT_FWD_L5_Avg_touches', 'AT_FWD_L5_Avg_touches_opposition_box', 'AT_FWD_L5_Avg_successful_dribbles', 
                'AT_FWD_L5_Avg_corners', 'AT_FWD_L5_Avg_offsides', 'AT_FWD_L5_Avg_penalties_scored', 'AT_FWD_L5_Avg_penalties_missed', 
                'AT_FWD_L5_Avg_duels_won', 'AT_FWD_L5_Avg_ground_duels_won', 'AT_FWD_L5_Avg_aerial_duels_won', 'AT_FWD_L5_Avg_was_fouled', 
                'AT_FWD_L5_Avg_fouls_committed']

final_master_data = final_master_data[basic_stats + rolling_features]

In [43]:
teams_matches[teams_matches['season'] == 2024].groupby('HomeTeam').first()[['HT_elo']].sort_values(by='HT_elo', ascending=False)

Unnamed: 0_level_0,HT_elo
HomeTeam,Unnamed: 1_level_1
Arsenal,1991
Liverpool,1991
Man City,1958
Chelsea,1892
Aston Villa,1870
Newcastle,1866
Crystal Palace,1833
Brighton,1825
Brentford,1809
Bournemouth,1806


In [44]:
teams_matches['HT_elo'].dtypes

dtype('int64')

In [50]:
tr = joblib.load('master_data_transformed.pkl')
'HT_elo' in list(tr.columns)

True

In [49]:
'HT_elo' in list(tr.columns)

True

## 2. Working with Players + Match Data (2024 & 2025)

### i) For each match, we will only showcase the players who have either scored or assisted. Any other player's data would simply be ignored and dropped from the dataset and will not be used for our feaature.

In [31]:
pms_24_ga = pms_24[(pms_24['goals'] > 0) | (pms_24['assists'] > 0)].reset_index(drop=True) # filtering the data with only players which have either scored or assisted
pms_24_cols_dict = dict(pms_24_ga.isnull().sum()[pms_24_ga.isnull().sum() == 0]) # only features having none of its values as NaN
usable_pms_24_cols = [key for key, value in pms_24_cols_dict.items()] # storing the features in a list
pms_24_ga = pms_24_ga[usable_pms_24_cols] # final players + match data for data analysis
pms_24_ga = pms_24_ga.rename(columns={'Game Week': 'gameweek'}, errors='ignore')
pms_24_ga_cols = list(pms_24_ga.columns)
pms_24_ga_shape = tuple(pms_24_ga.shape)
print(f"Final Shape of Players + Match Data (2024): {pms_24_ga_shape}")

Final Shape of Players + Match Data (2024): (1627, 55)


In [32]:
pms_25_ga = pms_25[(pms_25['goals'] > 0) | (pms_25['assists'] > 0)].reset_index(drop=True) # filtering the data with only players which have either scored or assisted
pms_25_cols_dict = dict(pms_25_ga.isnull().sum()[pms_25_ga.isnull().sum() == 0]) # only features having none of its values as NaN
usable_pms_25_cols = [key for key, value in pms_25_cols_dict.items()] # storing the features in a list
pms_25_ga = pms_25_ga[usable_pms_25_cols] # final players + match data for data analysis
pms_25_ga = pms_25_ga.rename(columns={'Game Week': 'gameweek'}, errors='ignore')
pms_25_ga_cols = list(pms_25_ga.columns)
pms_25_ga_shape = tuple(pms_25_ga.shape)
print(f"Final Shape of Players + Match Data (2024): {pms_25_ga_shape}")

Final Shape of Players + Match Data (2024): (815, 63)


In [33]:
final_teams_matches = (teams_matches.sort_values(by='Date', ascending=True).reset_index(drop=True))

## 3. Working with the Players Data (2024 & 2025)

### i) We will merge the Players + Match Data & Teams Data with the Players Data, to include the names of the players along with their team codes, team names and their position as the current data lacks all these features

In [34]:
players_24 = players_24.rename(columns={'team_code':'code'}, errors='ignore')
players_25 = players_25.rename(columns={'team_code':'code'}, errors='ignore')

In [35]:
# 2024
players_matches_24 = pms_24_ga.merge(
    players_24[['player_id', 'first_name', 'second_name', 'position', 'code']],
    on='player_id',
    how='left',
    validate='m:1'
)
players_matches_24['season'] = 2024
players_matches_24 = players_matches_24.merge(
    teams_24[['code', 'name']],
    on='code',
    how='left'
)

# 2025
players_25 = players_25.rename(columns={'Game Week':'gameweek'})
players_matches_25 = pms_25_ga.merge(
    players_25[['player_id', 'gameweek', 'first_name', 'second_name', 'position', 'code']],
    on=['player_id', 'gameweek'],
    how='left',
    validate='m:1'
)
players_matches_25['season'] = 2025
players_matches_25 = players_matches_25.merge(
    teams_25[['code', 'name']],
    on='code',
    how='left'
)

### ii) Keeping only the features which do not contain more than 60% of their values as 0.0

In [36]:
players_matches_24_cols = [key for key, value in dict(players_matches_24.eq(0.0).sum()).items() if value < (players_matches_24.shape[0]) * 0.6]
players_matches_25_cols = [key for key, value in dict(players_matches_25.eq(0.0).sum()).items() if value < (players_matches_25.shape[0]) * 0.6]
players_matches_24_final = players_matches_24[players_matches_24_cols]
players_matches_25_final = players_matches_25[players_matches_25_cols]
print(f"Original Shape of Players + Match (2024) Data: {players_matches_24.shape}")
print(f"New Shape of Players + Match (2024) Data: {players_matches_24_final.shape}\n")
print(f"Original Shape of Players + Match (2025) Data: {players_matches_25.shape}")
print(f"New Shape of Players + Match (2025) Data: {players_matches_25_final.shape}")

Original Shape of Players + Match (2024) Data: (1627, 61)
New Shape of Players + Match (2024) Data: (1627, 43)

Original Shape of Players + Match (2025) Data: (815, 69)
New Shape of Players + Match (2025) Data: (815, 36)


## Working with the Data according to the User

In [42]:
final_master_data[(final_master_data['HomeTeam']=='Arsenal') | (final_master_data['AwayTeam']=='Arsenal')].tail(15)

Unnamed: 0,Date,season,gameweek,match_id,HomeTeam,AwayTeam,FTHG,FTAG,FTR,Referee,...,HT_AvgShots_L5,AT_AvgShots_L5,HT_ShotAccuracy_L5,AT_ShotAccuracy_L5,HT_ShotConversion_L5,AT_ShotConversion_L5,HT_CS_L5,AT_CS_L5,HT_WinRate_L5,AT_WinRate_L5
442,2025-10-04,2025,7,25-26-prem-arsenal-vs-west-ham-united,Arsenal,West Ham,2.0,0.0,H,J Brooks,...,15.310585,10.818975,0.29025,0.406001,0.117135,0.112327,0.376934,0.152254,0.630324,0.232961
450,2025-10-18,2025,8,25-26-prem-fulham-vs-arsenal,Fulham,Arsenal,0.0,1.0,A,A Taylor,...,11.344429,16.254194,0.334179,0.2816,0.108364,0.113503,0.122131,0.480271,0.300559,0.691636
465,2025-10-26,2025,9,25-26-prem-arsenal-vs-crystal-palace,Arsenal,Crystal Palace,1.0,0.0,H,T Bramall,...,16.21404,14.227871,0.286481,0.424609,0.105446,0.131433,0.562372,0.27565,0.740347,0.359151
474,2025-11-01,2025,10,25-26-prem-burnley-vs-arsenal,Burnley,Arsenal,0.0,2.0,A,C Kavanagh,...,8.28273,15.296372,0.42129,0.288477,0.216169,0.104642,0.215512,0.626999,0.38254,0.778692
480,2025-11-08,2025,11,25-26-prem-sunderland-vs-arsenal,Sunderland,Arsenal,2.0,2.0,D,C Pawson,...,10.687927,14.830744,0.272953,0.341898,0.115497,0.113403,0.340365,0.679687,0.476774,0.809953
498,2025-11-23,2025,12,25-26-prem-arsenal-vs-tottenham-hotspur,Arsenal,Tottenham,4.0,1.0,H,M Oliver,...,15.158479,8.858905,0.352454,0.368643,0.114045,0.167789,0.576999,0.24797,0.687584,0.344483
507,2025-11-30,2025,13,25-26-prem-chelsea-vs-arsenal,Chelsea,Arsenal,1.0,1.0,D,A Taylor,...,14.765153,15.423737,0.393738,0.36947,0.13212,0.13151,0.590933,0.493887,0.682526,0.732585
513,2025-12-03,2025,14,25-26-prem-arsenal-vs-brentford,Arsenal,Brentford,2.0,0.0,H,T Harrington,...,14.450627,11.701985,0.38658,0.399953,0.130656,0.140647,0.429148,0.122549,0.636557,0.509797
526,2025-12-06,2025,15,25-26-prem-aston-villa-vs-arsenal,Aston Villa,Arsenal,2.0,1.0,H,P Bankes,...,12.427314,14.516942,0.387223,0.396247,0.13982,0.130979,0.371725,0.498055,0.719487,0.680428
533,2025-12-13,2025,16,25-26-prem-arsenal-vs-wolverhampton-wanderers,Arsenal,Wolves,2.0,1.0,H,R Jones,...,14.574192,8.6989,0.420396,0.28392,0.123357,0.050668,0.439027,0.013417,0.599786,0.021932


In [65]:
season = int(input("Enter Year: "))
if season == 2024:
    gw = int(input("Enter Matchday No: "))
    if gw >=1 and gw <= 38:
        display_matches = final_master_data[(final_master_data['season'] == season) & (final_master_data['gameweek'] == gw)][['season', 'gameweek', 'HomeTeam', 'AwayTeam', 'FTHG', 'FTAG', 'match_id']].reset_index(drop=True)
        hometeams = list(display_matches['HomeTeam'].unique())
        awayteams = list(display_matches['AwayTeam'].unique())
    else:
        print("Please enter a valid matchday number between 1 and 38.")
elif season == 2025:
    gw = int(input("Enter Matchday No: "))
    if gw >=1 and gw <= 17:
        display_matches = final_master_data[(final_master_data['season'] == season) & (final_master_data['gameweek'] == gw)][['season', 'gameweek', 'HomeTeam', 'AwayTeam', 'FTHG', 'FTAG', 'match_id']].reset_index(drop=True)
        hometeams = list(display_matches['HomeTeam'].unique())
        awayteams = list(display_matches['AwayTeam'].unique())
    else:
        print("Please enter a valid matchday number between 1 and 17 (current matchday)")
else: print("Please enter a valid year (2024 or 2025)")

display_matches

Enter Year:  2025
Enter Matchday No:  7


Unnamed: 0,season,gameweek,HomeTeam,AwayTeam,FTHG,FTAG,match_id
0,2025,7,Bournemouth,Fulham,3.0,1.0,25-26-prem-afc-bournemouth-vs-fulham
1,2025,7,Leeds,Tottenham,1.0,2.0,25-26-prem-leeds-united-vs-tottenham-hotspur
2,2025,7,Arsenal,West Ham,2.0,0.0,25-26-prem-arsenal-vs-west-ham-united
3,2025,7,Man United,Sunderland,2.0,0.0,25-26-prem-manchester-united-vs-sunderland
4,2025,7,Chelsea,Liverpool,2.0,1.0,25-26-prem-chelsea-vs-liverpool
5,2025,7,Aston Villa,Burnley,2.0,1.0,25-26-prem-aston-villa-vs-burnley
6,2025,7,Everton,Crystal Palace,2.0,1.0,25-26-prem-everton-vs-crystal-palace
7,2025,7,Newcastle,Nott'm Forest,2.0,0.0,25-26-prem-newcastle-united-vs-nottingham-forest
8,2025,7,Wolves,Brighton,1.0,1.0,25-26-prem-wolverhampton-wanderers-vs-brighton...
9,2025,7,Brentford,Man City,0.0,1.0,25-26-prem-brentford-vs-manchester-city


In [66]:
hometeam = input("Enter Home Team: ")
if hometeam not in hometeams:
    print(f"{hometeam} was the away team in matchday {gw}.")
awayteam = input("Enter Away Team: ")
if awayteam not in awayteams:
    print(f"{awayteam} was the home team in matchday {gw}.")

Enter Home Team:  Brentford
Enter Away Team:  Man City


In [68]:
print("1. Basic Information")
print("2. Rolling Features")
choice = int(input("Enter your choice: "))
if choice == 1:
    match_stats_basic = final_master_data[
    (final_master_data['season'] == season) & 
    (final_master_data['gameweek'] == gw) & 
    (final_master_data['HomeTeam'] == hometeam) & 
    (final_master_data['AwayTeam'] == awayteam)][basic_stats]
    matchid = match_stats_basic['match_id'].iloc[0]
    display(match_stats_basic)
    print(f"Match Id: {matchid}")
if choice == 2:
    match_stats_l5 = final_master_data[
    (final_master_data['season'] == season) & 
    (final_master_data['gameweek'] == gw) & 
    (final_master_data['HomeTeam'] == hometeam) & 
    (final_master_data['AwayTeam'] == awayteam)][rolling_features]
    display(match_stats_l5)
if season == 2024:
    pms_24_to_display = players_matches_24_final[(players_matches_24_final['match_id'] == matchid)]
    display(pms_24_to_display)
else:
    pms_25_to_display = players_matches_25_final[(players_matches_25_final['match_id'] == matchid)]
    display(pms_25_to_display)

1. Basic Information
2. Rolling Features


Enter your choice:  2


Unnamed: 0,HT_AvgGF_L5,AT_AvgGF_L5,HT_AvgGA_L5,AT_AvgGA_L5,HT_AvgShots_L5,AT_AvgShots_L5,HT_ShotAccuracy_L5,AT_ShotAccuracy_L5,HT_ShotConversion_L5,AT_ShotConversion_L5,HT_CS_L5,AT_CS_L5,HT_WinRate_L5,AT_WinRate_L5
449,1.616537,2.301246,1.680312,0.895787,9.464707,13.793887,0.490594,0.389434,0.176434,0.162957,0.155631,0.37985,0.355331,0.569836


Unnamed: 0,player_id,match_id,minutes_played,goals,assists,total_shots,xg,xa,shots_on_target,successful_dribbles,...,tackles,finish_min,team_goals_conceded,gameweek,first_name,second_name,position,code,season,name
242,403,25-26-prem-brentford-vs-manchester-city,90,0,1,1,0.06,0.04,1,0,...,1,90,0,7,Joško,Gvardiol,Defender,43,2025,Man City
243,430,25-26-prem-brentford-vs-manchester-city,90,1,0,1,0.29,0.01,1,0,...,0,90,0,7,Erling,Haaland,Forward,43,2025,Man City
