In [47]:
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.svm import SVC
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score, f1_score

In [76]:
# Importing Data File
df_matches = pd.read_csv('epl-training.csv')
df_managers = pd.read_excel('PremierLeagueManagers.xlsx')
df_spending = pd.read_excel('Spending_data.xlsx')
# Ensure the match date is in datetime format
df_matches['Date'] = pd.to_datetime(df_matches['Date'],dayfirst = True)

# Sort the df_matches dataframe by ascending date order
df_matches = df_matches.sort_values(by='Date', ascending=True).reset_index(drop=True)

# Drop any rows where all the values are nan
df_matches = df_matches.dropna()

# Ensure Season_Start and Season_End are in datetime format
df_managers['Season_Start'] = pd.to_datetime(df_managers['Season_Start'],dayfirst = True)
df_managers['Season_End'] = pd.to_datetime(df_managers['Season_End'],dayfirst = True)
# Ensure Season_Start and Season_End are in datetime format
df_spending['Season_Start'] = pd.to_datetime(df_spending['Season_Start'],dayfirst = True)
df_spending['Season_End'] = pd.to_datetime(df_spending['Season_End'],dayfirst = True)
#Showing What Each Training Dataset Looks Like
display(df_matches)
display(df_managers)
display(df_spending)

df_matches.dtypes

Unnamed: 0,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,Referee,...,HST,AST,HC,AC,HF,AF,HY,AY,HR,AR
0,2000-08-19,Charlton,Man City,4.0,0.0,H,2.0,0.0,H,Rob Harris,...,14.0,4.0,6.0,6.0,13.0,12.0,1.0,2.0,0.0,0.0
1,2000-08-19,Chelsea,West Ham,4.0,2.0,H,1.0,0.0,H,Graham Barber,...,10.0,5.0,7.0,7.0,19.0,14.0,1.0,2.0,0.0,0.0
2,2000-08-19,Coventry,Middlesbrough,1.0,3.0,A,1.0,1.0,D,Barry Knight,...,3.0,9.0,8.0,4.0,15.0,21.0,5.0,3.0,1.0,0.0
3,2000-08-19,Derby,Southampton,2.0,2.0,D,1.0,2.0,A,Andy D'Urso,...,4.0,6.0,5.0,8.0,11.0,13.0,1.0,1.0,0.0,0.0
4,2000-08-19,Leeds,Everton,2.0,0.0,H,2.0,0.0,H,Dermot Gallagher,...,8.0,6.0,6.0,4.0,21.0,20.0,1.0,3.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9215,2024-05-19,Brighton,Man United,0.0,2.0,A,0.0,0.0,D,C Pawson,...,3.0,4.0,10.0,9.0,7.0,5.0,1.0,3.0,0.0,0.0
9216,2024-05-19,Brentford,Newcastle,2.0,4.0,A,0.0,3.0,A,S Hooper,...,5.0,7.0,15.0,11.0,3.0,0.0,4.0,4.0,0.0,0.0
9217,2024-05-19,Arsenal,Everton,2.0,1.0,H,1.0,1.0,D,M Oliver,...,5.0,2.0,8.0,11.0,8.0,1.0,4.0,3.0,0.0,0.0
9218,2024-05-19,Burnley,Nott'm Forest,1.0,2.0,A,0.0,2.0,A,G Scott,...,3.0,6.0,11.0,5.0,4.0,3.0,1.0,0.0,0.0,0.0


Unnamed: 0,Season_Start,Season_End,Manager,Club,Manager_Nationality
0,2024-08-16,2025-05-25,Mikel Arteta,Arsenal,Spain
1,2024-08-16,2025-05-25,Unai Emery,Aston Villa,Spain
2,2024-08-16,2025-05-25,Andoni Iraola,Bournemouth,Spain
3,2024-08-16,2025-05-25,Thomas Frank,Brentford,Denmark
4,2024-08-16,2025-05-25,Fabian Hürzeler,Brighton,Germany
...,...,...,...,...,...
696,2000-08-19,2001-05-19,David Pleat,Tottenham,England
697,2000-08-19,2001-05-19,George Graham,Tottenham,Scotland
698,2000-08-19,2001-05-19,Glenn Hoddle,Tottenham,England
699,2000-08-19,2001-05-19,Glenn Roeder,West Ham,England


Unnamed: 0,Team,Expenditure,Season_Start,Season_End
0,Brighton,244.20,2024-08-16,2025-05-25
1,Chelsea,238.50,2024-08-16,2025-05-25
2,Man United,214.50,2024-08-16,2025-05-25
3,Aston Villa,176.20,2024-08-16,2025-05-25
4,Tottenham,148.85,2024-08-16,2025-05-25
...,...,...,...,...
495,Man United,11.70,2000-08-19,2001-05-19
496,Leicester,11.70,2000-08-19,2001-05-19
497,Ipswich,9.90,2000-08-19,2001-05-19
498,Bradford,6.61,2000-08-19,2001-05-19


Date        datetime64[ns]
HomeTeam            object
AwayTeam            object
FTHG               float64
FTAG               float64
FTR                 object
HTHG               float64
HTAG               float64
HTR                 object
Referee             object
HS                 float64
AS                 float64
HST                float64
AST                float64
HC                 float64
AC                 float64
HF                 float64
AF                 float64
HY                 float64
AY                 float64
HR                 float64
AR                 float64
dtype: object

In [77]:
# Function to get the manager for a specific team and match date
def get_spending(team, match_date):
    spending_row = df_spending[
        (df_spending['Team'] == team) &
        (df_spending['Season_Start'] <= match_date) &
        (df_spending['Season_End'] >= match_date)
    ]
    return spending_row['Expenditure'].iloc[0] if not spending_row.empty else None

# Add HomeManager and AwayManager columns to df_matches
df_matches['HomeSpending'] = df_matches.apply(lambda row: get_spending(row['HomeTeam'], row['Date']), axis=1)
df_matches['AwaySpending'] = df_matches.apply(lambda row: get_spending(row['AwayTeam'], row['Date']), axis=1)

In [78]:
# Calculating Rolling Average Statistics
k = 2 # Number of matches to look backwards to.

# Function to Calculate Rolling Average Statistics for the Past k Matches. Each row's kAvg does not include the current results.
def generate_kAvg(df, attribute, HomeTeam=True):
    if HomeTeam:
        group_by_team = "HomeTeam"
    elif not HomeTeam:
        group_by_team = "AwayTeam"
    
    kattribute = "k" + attribute
    df[kattribute] = (
    df.groupby(group_by_team)[attribute]     # Group by HomeTeam or AwayTeam
    .transform(lambda x: x.shift(1).rolling(window=k, min_periods=1).mean())  # Shift by 1 to exclude the current match, and create a rolling window of up to k past matches
    )
    df[kattribute] = df[kattribute].fillna(0) # Filling nan values with 0

Home_kAvg_features = ['FTHG','HTHG','HS','HC','HF','HY','HR']
Away_kAvg_features = ['FTAG','HTAG','AS','AC','AF','AY','AR']

for feature in Home_kAvg_features:
    generate_kAvg(df_matches, feature,True)
for feature in Away_kAvg_features:
    generate_kAvg(df_matches, feature,False)
    
display(df_matches)

Unnamed: 0,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,Referee,...,kHF,kHY,kHR,kFTAG,kHTAG,kAS,kAC,kAF,kAY,kAR
0,2000-08-19,Charlton,Man City,4.0,0.0,H,2.0,0.0,H,Rob Harris,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,2000-08-19,Chelsea,West Ham,4.0,2.0,H,1.0,0.0,H,Graham Barber,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,2000-08-19,Coventry,Middlesbrough,1.0,3.0,A,1.0,1.0,D,Barry Knight,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,2000-08-19,Derby,Southampton,2.0,2.0,D,1.0,2.0,A,Andy D'Urso,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,2000-08-19,Leeds,Everton,2.0,0.0,H,2.0,0.0,H,Dermot Gallagher,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9215,2024-05-19,Brighton,Man United,0.0,2.0,A,0.0,0.0,D,C Pawson,...,5.5,2.0,0.0,1.0,0.5,7.5,11.0,2.5,0.5,0.0
9216,2024-05-19,Brentford,Newcastle,2.0,4.0,A,0.0,3.0,A,S Hooper,...,5.0,2.0,0.0,3.0,1.5,22.0,7.5,10.0,2.5,0.0
9217,2024-05-19,Arsenal,Everton,2.0,1.0,H,1.0,1.0,D,M Oliver,...,5.0,2.0,0.0,0.5,0.5,10.0,16.5,7.5,3.5,0.0
9218,2024-05-19,Burnley,Nott'm Forest,1.0,2.0,A,0.0,2.0,A,G Scott,...,4.0,1.5,0.0,1.5,0.5,11.5,8.0,3.5,1.5,0.0


In [79]:
# Function to get the manager for a specific team and match date
def get_manager(team, match_date):
    manager_row = df_managers[
        (df_managers['Club'] == team) &
        (df_managers['Season_Start'] <= match_date) &
        (df_managers['Season_End'] >= match_date)
    ]
    return manager_row['Manager'].iloc[0] if not manager_row.empty else None

# Add HomeManager and AwayManager columns to df_matches
df_matches['HomeManager'] = df_matches.apply(lambda row: get_manager(row['HomeTeam'], row['Date']), axis=1)
df_matches['AwayManager'] = df_matches.apply(lambda row: get_manager(row['AwayTeam'], row['Date']), axis=1)

In [80]:
# Create a new column to store home wins and away wins
df_matches['HomeWin'] = (df_matches['FTR'] == 'H').astype(int)
df_matches['AwayWin'] = (df_matches['FTR'] == 'A').astype(int)

# Calculate rolling win rate for home games
df_matches['HomeWinRate'] = (
    df_matches.groupby('HomeTeam')['HomeWin']    # Group by HomeTeam
    .cumsum()                                    # Cumulative sum of home wins
    / df_matches.groupby('HomeTeam').cumcount()  # Divide by cumulative games played
    .add(1)                                      # To avoid division by zero
)

# Calculate rolling win rate for away games
df_matches['AwayWinRate'] = (
    df_matches.groupby('AwayTeam')['AwayWin']    # Group by AwayTeam
    .cumsum()                                    # Cumulative sum of away wins
    / df_matches.groupby('AwayTeam').cumcount()  # Divide by cumulative games played
    .add(1)                                      # To avoid division by zero
)

# Display relevant columns
display(df_matches)

Unnamed: 0,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,Referee,...,kAC,kAF,kAY,kAR,HomeManager,AwayManager,HomeWin,AwayWin,HomeWinRate,AwayWinRate
0,2000-08-19,Charlton,Man City,4.0,0.0,H,2.0,0.0,H,Rob Harris,...,0.0,0.0,0.0,0.0,Alan Curbishley,Joe Royle,1,0,1.000000,0.000000
1,2000-08-19,Chelsea,West Ham,4.0,2.0,H,1.0,0.0,H,Graham Barber,...,0.0,0.0,0.0,0.0,Claudio Ranieri,Glenn Roeder,1,0,1.000000,0.000000
2,2000-08-19,Coventry,Middlesbrough,1.0,3.0,A,1.0,1.0,D,Barry Knight,...,0.0,0.0,0.0,0.0,Gordon Strachan,Terry Venables,0,1,0.000000,1.000000
3,2000-08-19,Derby,Southampton,2.0,2.0,D,1.0,2.0,A,Andy D'Urso,...,0.0,0.0,0.0,0.0,Jim Smith,Stuart Gray,0,0,0.000000,0.000000
4,2000-08-19,Leeds,Everton,2.0,0.0,H,2.0,0.0,H,Dermot Gallagher,...,0.0,0.0,0.0,0.0,David O'Leary,Walter Smith,1,0,1.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9215,2024-05-19,Brighton,Man United,0.0,2.0,A,0.0,0.0,D,C Pawson,...,11.0,2.5,0.5,0.0,Roberto De Zerbi,Erik ten Hag,0,1,0.345324,0.513043
9216,2024-05-19,Brentford,Newcastle,2.0,4.0,A,0.0,3.0,A,S Hooper,...,7.5,10.0,2.5,0.0,Thomas Frank,Eddie Howe,0,1,0.370968,0.248227
9217,2024-05-19,Arsenal,Everton,2.0,1.0,H,1.0,1.0,D,M Oliver,...,16.5,7.5,3.5,0.0,Mikel Arteta,Sean Dyche,1,0,0.679654,0.266811
9218,2024-05-19,Burnley,Nott'm Forest,1.0,2.0,A,0.0,2.0,A,G Scott,...,8.0,3.5,1.5,0.0,Vincent Kompany,Nuno Herlander Simões Espírito Santo,0,1,0.306818,0.136364


In [81]:
# Number of matches to look back
n = 5

# Precompute rolling averages for all attributes
def compute_pairwise_rolling(data, attributes, n):
    for attribute in attributes:
        # Group by HomeTeam and AwayTeam
        grouped = data.groupby(['HomeTeam', 'AwayTeam'])[attribute]
        # Calculate rolling averages and align the index
        rolling_avg = grouped.apply(lambda x: x.shift(1).rolling(window=n, min_periods=1).mean())
        # Align the new column with the original DataFrame's index
        data[f"n{attribute}_Pairwise"] = rolling_avg.values
    return data

# List of attributes to calculate rolling averages for
attributes = ['FTHG', 'HTHG', 'HS', 'HC', 'HF', 'HY', 'HR',
              'FTAG', 'HTAG', 'AS', 'AC', 'AF', 'AY', 'AR']

# Apply the optimized function
df_matches = compute_pairwise_rolling(df_matches, attributes, n)

display(df_matches)

Unnamed: 0,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,Referee,...,nHF_Pairwise,nHY_Pairwise,nHR_Pairwise,nFTAG_Pairwise,nHTAG_Pairwise,nAS_Pairwise,nAC_Pairwise,nAF_Pairwise,nAY_Pairwise,nAR_Pairwise
0,2000-08-19,Charlton,Man City,4.0,0.0,H,2.0,0.0,H,Rob Harris,...,,,,,,,,,,
1,2000-08-19,Chelsea,West Ham,4.0,2.0,H,1.0,0.0,H,Graham Barber,...,12.000000,1.000000,0.0,0.00,0.000000,8.000000,4.000000,20.0,4.000000,1.000000
2,2000-08-19,Coventry,Middlesbrough,1.0,3.0,A,1.0,1.0,D,Barry Knight,...,15.000000,2.500000,0.0,1.00,1.000000,8.000000,3.500000,17.0,2.000000,0.500000
3,2000-08-19,Derby,Southampton,2.0,2.0,D,1.0,2.0,A,Andy D'Urso,...,13.666667,2.333333,0.0,1.00,0.666667,9.333333,5.333333,18.0,2.333333,0.333333
4,2000-08-19,Leeds,Everton,2.0,0.0,H,2.0,0.0,H,Dermot Gallagher,...,14.250000,2.500000,0.0,0.75,0.500000,8.500000,5.500000,17.5,2.750000,0.250000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9215,2024-05-19,Brighton,Man United,0.0,2.0,A,0.0,0.0,D,C Pawson,...,8.200000,1.400000,0.0,0.80,0.600000,7.200000,2.600000,9.0,1.800000,0.000000
9216,2024-05-19,Brentford,Newcastle,2.0,4.0,A,0.0,3.0,A,S Hooper,...,8.200000,1.400000,0.0,0.60,0.600000,8.600000,2.600000,8.8,2.000000,0.000000
9217,2024-05-19,Arsenal,Everton,2.0,1.0,H,1.0,1.0,D,M Oliver,...,,,,,,,,,,
9218,2024-05-19,Burnley,Nott'm Forest,1.0,2.0,A,0.0,2.0,A,G Scott,...,14.000000,4.000000,1.0,2.00,0.000000,20.000000,3.000000,16.0,3.000000,1.000000


In [82]:
# Team Names
le_teams = LabelEncoder()

# Using the same label encoder for the home and away teams.
le_teams.fit(pd.concat([df_matches['HomeTeam'],df_matches['AwayTeam']])) 

# Using the label encoder to encode the home and away teams
df_matches['HomeTeam_Encoded'] = le_teams.transform(df_matches['HomeTeam'])
df_matches['AwayTeam_Encoded'] = le_teams.transform(df_matches['AwayTeam'])

# FTR and HTR
le_results = LabelEncoder()
le_results.fit(pd.concat([df_matches['FTR'],df_matches['HTR']]))

# Using the label encoder to encode the FTR and HTR
df_matches['FTR_Encoded'] = le_results.transform(df_matches['FTR'])
df_matches['HTR_Encoded'] = le_results.transform(df_matches['HTR'])

# Referee
le_referee = LabelEncoder()
le_referee.fit(df_matches['Referee'])

df_matches['Referee_Encoded'] = le_referee.transform(df_matches['Referee'])

# Managers
le_managers = LabelEncoder() 
le_managers.fit(pd.concat([df_matches['HomeManager'],df_matches['AwayManager']]))
df_matches['HomeManager_Encoded'] = le_managers.transform(df_matches['HomeManager'])
df_matches['AwayManager_Encoded'] = le_managers.transform(df_matches['AwayManager'])

display(df_matches)

Unnamed: 0,Date,HomeTeam,AwayTeam,FTHG,FTAG,FTR,HTHG,HTAG,HTR,Referee,...,nAF_Pairwise,nAY_Pairwise,nAR_Pairwise,HomeTeam_Encoded,AwayTeam_Encoded,FTR_Encoded,HTR_Encoded,Referee_Encoded,HomeManager_Encoded,AwayManager_Encoded
0,2000-08-19,Charlton,Man City,4.0,0.0,H,2.0,0.0,H,Rob Harris,...,,,,12,26,2,2,143,2,75
1,2000-08-19,Chelsea,West Ham,4.0,2.0,H,1.0,0.0,H,Graham Barber,...,20.0,4.000000,1.000000,13,43,2,2,65,29,58
2,2000-08-19,Coventry,Middlesbrough,1.0,3.0,A,1.0,1.0,D,Barry Knight,...,17.0,2.000000,0.500000,14,28,0,1,19,59,158
3,2000-08-19,Derby,Southampton,2.0,2.0,D,1.0,2.0,A,Andy D'Urso,...,18.0,2.333333,0.333333,16,36,1,0,12,74,154
4,2000-08-19,Leeds,Everton,2.0,0.0,H,2.0,0.0,H,Dermot Gallagher,...,17.5,2.750000,0.250000,22,17,2,2,46,35,168
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9215,2024-05-19,Brighton,Man United,0.0,2.0,A,0.0,0.0,D,C Pawson,...,9.0,1.800000,0.000000,9,27,0,1,24,130,46
9216,2024-05-19,Brentford,Newcastle,2.0,4.0,A,0.0,3.0,A,S Hooper,...,8.8,2.000000,0.000000,8,29,0,0,151,159,43
9217,2024-05-19,Arsenal,Everton,2.0,1.0,H,1.0,1.0,D,M Oliver,...,,,,0,17,2,1,95,103,143
9218,2024-05-19,Burnley,Nott'm Forest,1.0,2.0,A,0.0,2.0,A,G Scott,...,16.0,3.000000,1.000000,10,31,0,0,59,166,108


In [83]:
input_features = ['HomeTeam_Encoded','AwayTeam_Encoded','HomeManager_Encoded',
                 'AwayManager_Encoded','Referee_Encoded','kFTHG', 'kHTHG','kHS', 'kHC', 'kHF', 'kHY', 
                  'kHR', 'kFTAG', 'kHTAG', 'kAS', 'kAC', 'kAF', 'kAY', 'kAR', 
                  'HomeWinRate', 'AwayWinRate', 
                  'nFTHG_Pairwise', 'nHTHG_Pairwise', 'nHS_Pairwise', 'nHC_Pairwise',
                   'nHF_Pairwise', 'nHY_Pairwise', 'nHR_Pairwise', 'nFTAG_Pairwise',
                   'nHTAG_Pairwise', 'nAS_Pairwise', 'nAC_Pairwise', 'nAF_Pairwise',
                   'nAY_Pairwise', 'nAR_Pairwise','HomeSpending','AwaySpending']
df_input = df_matches[input_features]
df_input = df_matches[input_features].fillna(0)
display(df_input)
output_features = ['FTR_Encoded']
df_output = df_matches['FTR_Encoded']
display(df_output)

Unnamed: 0,HomeTeam_Encoded,AwayTeam_Encoded,HomeManager_Encoded,AwayManager_Encoded,Referee_Encoded,kFTHG,kHTHG,kHS,kHC,kHF,...,nHR_Pairwise,nFTAG_Pairwise,nHTAG_Pairwise,nAS_Pairwise,nAC_Pairwise,nAF_Pairwise,nAY_Pairwise,nAR_Pairwise,HomeSpending,AwaySpending
0,12,26,2,75,143,0.0,0.0,0.0,0.0,0.0,...,0.0,0.00,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,20.80,20.28
1,13,43,29,58,65,0.0,0.0,0.0,0.0,0.0,...,0.0,0.00,0.000000,8.000000,4.000000,20.0,4.000000,1.000000,52.39,18.65
2,14,28,59,158,19,0.0,0.0,0.0,0.0,0.0,...,0.0,1.00,1.000000,8.000000,3.500000,17.0,2.000000,0.500000,0.00,0.00
3,16,36,74,154,12,0.0,0.0,0.0,0.0,0.0,...,0.0,1.00,0.666667,9.333333,5.333333,18.0,2.333333,0.333333,13.33,0.00
4,22,17,35,168,46,0.0,0.0,0.0,0.0,0.0,...,0.0,0.75,0.500000,8.500000,5.500000,17.5,2.750000,0.250000,53.15,34.48
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
9215,9,27,130,46,24,1.0,0.0,13.5,13.0,5.5,...,0.0,0.80,0.600000,7.200000,2.600000,9.0,1.800000,0.000000,109.50,0.00
9216,8,29,159,43,151,1.0,0.0,8.0,5.5,5.0,...,0.0,0.60,0.600000,8.600000,2.600000,8.8,2.000000,0.000000,72.35,148.10
9217,0,17,103,143,95,4.0,1.0,26.0,14.0,5.0,...,0.0,0.00,0.000000,0.000000,0.000000,0.0,0.000000,0.000000,235.10,40.50
9218,10,31,166,108,59,1.0,0.0,14.0,13.0,4.0,...,1.0,2.00,0.000000,20.000000,3.000000,16.0,3.000000,1.000000,111.05,127.70


0       2
1       2
2       0
3       1
4       2
       ..
9215    0
9216    0
9217    2
9218    0
9219    0
Name: FTR_Encoded, Length: 9220, dtype: int32

In [84]:
# Features and target
X = df_input
y = df_output

# Train/test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# Scaling
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# SVM
svm_model = SVC(kernel='rbf', C=1.0, gamma='scale', class_weight='balanced')
svm_model.fit(X_train, y_train)
y_pred = svm_model.predict(X_test)

# Predictions
print("Accuracy:", accuracy_score(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred, zero_division=0))



Accuracy: 0.48734801183043047
Classification Report:
               precision    recall  f1-score   support

           0       0.46      0.54      0.50       874
           1       0.31      0.35      0.33       745
           2       0.64      0.53      0.58      1424

    accuracy                           0.49      3043
   macro avg       0.47      0.47      0.47      3043
weighted avg       0.51      0.49      0.49      3043



In [89]:
# Fit the LabelEncoder during preprocessing
le_teams = LabelEncoder()
le_teams.fit(pd.concat([df_matches['HomeTeam'], df_matches['AwayTeam']]))

def predict_match(home_team, away_team):
    # Encode the team names
    home_team_encoded = le_teams.transform([home_team])[0]
    away_team_encoded = le_teams.transform([away_team])[0]

    # Scale the features
    match_features_scaled = scaler.transform(df_input)

    # Predict the outcome
    predicted_result = svm_model.predict(match_features_scaled)

    # Map encoded result to human-readable outcome
    result_mapping = {0: "A", 1: "D", 2: "H"}
    return result_mapping[predicted_result[0]]

# Test Prediction
home_team = 'Bournemouth'
away_team = 'Liverpool'
outcome = predict_match(home_team, away_team)
print(f"The predicted outcome for {home_team} vs {away_team} is: {outcome}")



The predicted outcome for Bournemouth vs Liverpool is: H
