In [2]:
import pandas as pd
import numpy as np
import random
from IPython.display import display, HTML
from sklearn.svm import SVR
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [3]:
euro = pd.read_csv('Euro_2024_Matches.csv')
euro.head()

Unnamed: 0,stadium,attendance,home_team,away_team,home_goals,away_goals,Home Expected goals(xG),Home Total shots,Home Shots on target,Home Big chances,...,Away Red cards,Away Tackles won,Away Interceptions,Away Blocks,Away Clearances,Away Keeper saves,Away Duels won,Away Ground duels won,Away Aerial duels won,Away Successful dribbles
0,Fuball Arena Mnchen,65052,Germany,Scotland,5,1,2.15,20,10,1,...,1,8(53%),6,5,22,5,41,34(58%),7(44%),5(56%)
1,Cologne Stadium,41676,Hungary,Switzerland,1,3,1.25,6,2,3,...,0,9(69%),6,1,17,1,41,26(45%),15(48%),5(38%)
2,Olympiastadion Berlin,68844,Spain,Croatia,3,0,2.01,11,5,4,...,0,10(71%),9,3,4,2,47,37(51%),10(34%),10(77%)
3,BVB Stadion Dortmund,60512,Italy,Albania,2,1,1.56,17,5,5,...,0,5(63%),13,6,15,3,26,16(39%),10(63%),2(29%)
4,Volksparkstadion,48117,Poland,Netherlands,1,2,1.32,12,7,2,...,0,11(79%),3,1,21,6,54,38(56%),16(64%),14(70%)


In [4]:
euro.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 51 entries, 0 to 50
Data columns (total 86 columns):
 #   Column                          Non-Null Count  Dtype  
---  ------                          --------------  -----  
 0   stadium                         51 non-null     object 
 1   attendance                      51 non-null     object 
 2   home_team                       51 non-null     object 
 3   away_team                       51 non-null     object 
 4   home_goals                      51 non-null     int64  
 5   away_goals                      51 non-null     int64  
 6   Home Expected goals(xG)         51 non-null     float64
 7   Home Total shots                51 non-null     int64  
 8   Home Shots on target            51 non-null     int64  
 9   Home Big chances                51 non-null     int64  
 10  Home Big chances missed         51 non-null     int64  
 11  Home Accurate passes            51 non-null     object 
 12  Home Fouls committed            51 non

In [5]:
euro['new_column'] = [0]*36 + [1]*(len(euro) - 36)

euro['knockoutstage_winner'] = pd.Series([0] * len(euro), dtype='int64')

# Set the 37th to 50th entries to the specified values
specified_values = [1, 1, 1, 1, 1, 1, -1, -1, 1, -1, 1, 1, 1, 1]
euro.loc[36:49, 'knockoutstage_winner'] = specified_values

# Display the DataFrame to check the new columns
euro.head()

Unnamed: 0,stadium,attendance,home_team,away_team,home_goals,away_goals,Home Expected goals(xG),Home Total shots,Home Shots on target,Home Big chances,...,Away Interceptions,Away Blocks,Away Clearances,Away Keeper saves,Away Duels won,Away Ground duels won,Away Aerial duels won,Away Successful dribbles,new_column,knockoutstage_winner
0,Fuball Arena Mnchen,65052,Germany,Scotland,5,1,2.15,20,10,1,...,6,5,22,5,41,34(58%),7(44%),5(56%),0,0
1,Cologne Stadium,41676,Hungary,Switzerland,1,3,1.25,6,2,3,...,6,1,17,1,41,26(45%),15(48%),5(38%),0,0
2,Olympiastadion Berlin,68844,Spain,Croatia,3,0,2.01,11,5,4,...,9,3,4,2,47,37(51%),10(34%),10(77%),0,0
3,BVB Stadion Dortmund,60512,Italy,Albania,2,1,1.56,17,5,5,...,13,6,15,3,26,16(39%),10(63%),2(29%),0,0
4,Volksparkstadion,48117,Poland,Netherlands,1,2,1.32,12,7,2,...,3,1,21,6,54,38(56%),16(64%),14(70%),0,0


In [6]:
int_columns=euro.select_dtypes(include=['int64']).columns

euro_filtered =euro[int_columns]
columns_list=euro_filtered.columns.tolist()
columns_list=columns_list[:-2]
columns_list

['home_goals',
 'away_goals',
 'Home Total shots',
 'Home Shots on target',
 'Home Big chances',
 'Home Big chances missed',
 'Home Fouls committed',
 'Home Corners',
 'Home Total shots.',
 'Home Shots off target',
 'Home Shots on target.',
 'Home Blocked shots',
 'Home Hit woodwork',
 'Home Shots inside box',
 'Home Shots outside box',
 'Home Passes',
 'Home Own half',
 'Home Opposition half',
 'Home Throws',
 'Home Touches in opposition box',
 'Home Offsides',
 'Home Yellow cards',
 'Home Red cards',
 'Home Interceptions',
 'Home Blocks',
 'Home Clearances',
 'Home Keeper saves',
 'Home Duels won',
 'Away Total shots',
 'Away Shots on target',
 'Away Big chances',
 'Away Big chances missed',
 'Away Fouls committed',
 'Away Corners',
 'Away Total shots.',
 'Away Shots off target',
 'Away Shots on target.',
 'Away Blocked shots',
 'Away Hit woodwork',
 'Away Shots inside box',
 'Away Shots outside box',
 'Away Passes',
 'Away Own half',
 'Away Opposition half',
 'Away Throws',
 'Away T

In [7]:
view= euro[['home_team','away_team','home_goals','away_goals','Home Passes','Away Passes','Home Big chances',
           'Away Big chances']]

In [8]:
def calculate_and_sort(column_home,column_away,new_column_name):
    home=view.groupby('home_team')[column_home].sum()
    away=view.groupby('away_team')[column_away].sum()
    total=home.add(away,fill_value=0).reset_index()
    total.columns=['team',new_column_name]
    sorted_total=total.sort_values(by=new_column_name,ascending=False).reset_index(drop=True)
    return sorted_total

sorted_goals=calculate_and_sort('home_goals','away_goals','total_goals')

sorted_passes=calculate_and_sort('Home Passes','Away Passes','total_passes')

sorted_big_chances=calculate_and_sort('Home Big chances', 'Away Big chances','total_big_chances')

print('Teams sorted by total goals:\n',sorted_goals.to_string(index=False))
print("\nTeams sorted by total passes:\n",sorted_passes.to_string(index=False))
print("\nTeams sorted by total big chances:\n",sorted_big_chances.to_string(index=False))

Teams sorted by total goals:
        team  total_goals
      Spain           15
    Germany           11
Netherlands           10
    Turkiye            8
Switzerland            8
    England            8
    Austria            7
    Georgia            5
   Portugal            5
   Slovakia            4
     France            4
    Romania            4
     Poland            3
    Albania            3
      Italy            3
    Czechia            3
    Croatia            3
    Hungary            2
   Scotland            2
    Denmark            2
   Slovenia            2
    Belgium            2
    Ukraine            2
     Serbia            1

Teams sorted by total passes:
        team  total_passes
    England          4223
      Spain          4037
   Portugal          3599
     France          3195
    Germany          3161
Netherlands          2855
Switzerland          2339
      Italy          2338
    Turkiye          2258
    Denmark          2203
    Belgium          2064
 

In [10]:
import warnings
warnings.filterwarnings("ignore",category=FutureWarning)
euro['result']=euro['home_goals']- euro['away_goals']
euro['result']=euro['result'].apply(lambda x:1 if x>0 else (0 if x==0 else -1))

X=euro[columns_list]
y_home=view['home_goals']
y_away=view['away_goals']

X_train_home, X_test_home, y_train_home, y_test_home = train_test_split(X, y_home, test_size=0.2, random_state=333)
X_train_away, X_test_away, y_train_away, y_test_away = train_test_split(X, y_away, test_size=0.2, random_state=333)

model_home = LinearRegression()
model_home.fit(X_train_home, y_train_home)

model_away = LinearRegression()
model_away.fit(X_train_away, y_train_away)

y_pred_home = model_home.predict(X_test_home)
y_pred_away = model_away.predict(X_test_away)


y_pred_home = y_pred_home.round().astype(int)
y_pred_away = y_pred_away.round().astype(int)

results = X_test_home.copy()
results['home_team'] = view.loc[X_test_home.index, 'home_team']
results['away_team'] = view.loc[X_test_home.index, 'away_team']
results['actual_home_goals'] = y_test_home
results['predicted_home_goals'] = y_pred_home
results['actual_away_goals'] = y_test_away
results['predicted_away_goals'] = y_pred_away
results['new_column'] = euro.loc[X_test_home.index, 'new_column']
results['knockoutstage_winner'] = euro.loc[X_test_home.index, 'knockoutstage_winner']

results['predicted_winner'] = results.apply(
    lambda row: row['home_team'] if row['predicted_home_goals'] > row['predicted_away_goals']
    else (row['away_team'] if row['predicted_home_goals'] < row['predicted_away_goals']
    else (row['home_team'] if row['new_column'] == 1 else row['away_team'])
    if row['new_column'] == 1 else 'Draw'), axis=1)

results['actual_winner'] = results.apply(
    lambda row: row['home_team'] if row['actual_home_goals'] > row['actual_away_goals']
    else (row['away_team'] if row['actual_home_goals'] < row['actual_away_goals']
    else (row['home_team'] if row['knockoutstage_winner'] == 1 else row['away_team'])
    if row['new_column'] == 1 else 'Draw'), axis=1)


def evaluate_performance(mse, r2):
    if mse > 3:
        return 'poor'
    elif mse > 1.5:
        return 'fair'
    elif mse > 0.75:
        return 'good'
    elif mse > 0.3:
        return 'great'
    else:
        return 'excellent'

mse_home = mean_squared_error(y_test_home, y_pred_home)
r2_home = r2_score(y_test_home, y_pred_home)
mse_away = mean_squared_error(y_test_away, y_pred_away)
r2_away = r2_score(y_test_away, y_pred_away)

# Convert MSE to evaluation categories
evaluation_home = evaluate_performance(mse_home, r2_home)
evaluation_away = evaluate_performance(mse_away, r2_away)

# Determine if predictions match actual outcomes or are close for ties
results['prediction_accuracy'] = results.apply(lambda row: 
    'Correct' if row['predicted_winner'] == row['actual_winner']
    else 'Close' if abs(row['predicted_home_goals'] - row['predicted_away_goals'] - row['actual_home_goals'] + row['actual_away_goals']) < 0.001
    else 'Incorrect', axis=1)

# Calculate percentage of correctly predicted winners
results['correct_prediction'] = results['predicted_winner'] == results['actual_winner']
percentage_correct = results['correct_prediction'].mean() * 100

# Create a final results DataFrame
final_results = results[['home_team', 'away_team', 'actual_home_goals', 'predicted_home_goals', 'actual_away_goals', 'predicted_away_goals', 'actual_winner', 'predicted_winner', 'prediction_accuracy']].copy()

# Display final predicted results as a DataFrame with styled output
print("Final Predicted Results:")
styled_final_results = final_results.style.set_table_styles([
    {'selector': 'th',
     'props': [('background-color', 'lightgrey'), ('color', 'black'), ('font-weight', 'bold'), ('text-align', 'center')]},
    {'selector': 'td',
     'props': [('text-align', 'center')]},
    {'selector': 'tr:nth-child(even)',
     'props': [('background-color', 'whitesmoke')]},
    {'selector': 'tr:nth-child(odd)',
     'props': [('background-color', 'white')]},
])

# Display styled results
display(styled_final_results)

# Print evaluation
print("\nEvaluation:")
print(f"Home Goals - Evaluation: {evaluation_home}")
print(f"Away Goals - Evaluation: {evaluation_away}")
print(f"Percentage of Correctly Predicted Winners: {percentage_correct:.2f}%")

Final Predicted Results:


Unnamed: 0,home_team,away_team,actual_home_goals,predicted_home_goals,actual_away_goals,predicted_away_goals,actual_winner,predicted_winner,prediction_accuracy
18,Slovakia,Ukraine,1,1,2,2,Ukraine,Ukraine,Correct
6,Serbia,England,0,0,1,1,England,England,Correct
38,England,Slovakia,2,2,1,2,England,England,Correct
32,Slovakia,Romania,1,1,1,1,Draw,Draw,Correct
11,Portugal,Czechia,2,1,1,1,Portugal,Draw,Incorrect
22,Turkiye,Portugal,0,0,3,3,Portugal,Portugal,Correct
4,Poland,Netherlands,1,1,2,2,Netherlands,Netherlands,Correct
14,Scotland,Switzerland,1,1,1,1,Draw,Draw,Correct
37,Germany,Denmark,2,2,0,0,Germany,Germany,Correct
41,Portugal,Slovenia,0,-1,0,1,Portugal,Slovenia,Incorrect



Evaluation:
Home Goals - Evaluation: excellent
Away Goals - Evaluation: excellent
Percentage of Correctly Predicted Winners: 72.73%


In [11]:
model_home = SVR(kernel='linear')
model_home.fit(X_train_home, y_train_home)

# Create and train the SVM model for away goals
model_away = SVR(kernel='linear')
model_away.fit(X_train_away, y_train_away)

# Make predictions on the test set for home and away goals
y_pred_home = model_home.predict(X_test_home)
y_pred_away = model_away.predict(X_test_away)

# Round predictions to the nearest integer
y_pred_home = y_pred_home.round().astype(int)
y_pred_away = y_pred_away.round().astype(int)

# Combine results into a DataFrame for comparison
results = X_test_home.copy()
results['home_team'] = euro.loc[X_test_home.index, 'home_team']
results['away_team'] = euro.loc[X_test_home.index, 'away_team']
results['actual_home_goals'] = y_test_home
results['predicted_home_goals'] = y_pred_home
results['actual_away_goals'] = y_test_away
results['predicted_away_goals'] = y_pred_away
results['new_column'] = euro.loc[X_test_home.index, 'new_column']
results['knockoutstage_winner'] = euro.loc[X_test_home.index, 'knockoutstage_winner']

# Determine predicted winners, actual winners, and handle draws
results['predicted_winner'] = results.apply(
    lambda row: row['home_team'] if row['predicted_home_goals'] > row['predicted_away_goals']
    else (row['away_team'] if row['predicted_home_goals'] < row['predicted_away_goals']
    else (row['home_team'] if row['new_column'] == 1 else row['away_team'])
    if row['knockoutstage_winner'] == 1 else 'Draw'), axis=1)

results['actual_winner'] = results.apply(
    lambda row: row['home_team'] if row['actual_home_goals'] > row['actual_away_goals']
    else (row['away_team'] if row['actual_home_goals'] < row['actual_away_goals']
    else (row['home_team'] if row['knockoutstage_winner'] == 1 else row['away_team'])
    if row['knockoutstage_winner'] == 1 else 'Draw'), axis=1)

# Evaluate predictions
def evaluate_performance(mse, r2):
    if mse > 3:
        return 'poor'
    elif mse > 1.5:
        return 'fair'
    elif mse > 0.75:
        return 'good'
    elif mse > 0.3:
        return 'great'
    else:
        return 'excellent'

mse_home = mean_squared_error(y_test_home, y_pred_home)
r2_home = r2_score(y_test_home, y_pred_home)
mse_away = mean_squared_error(y_test_away, y_pred_away)
r2_away = r2_score(y_test_away, y_pred_away)

# Convert MSE to evaluation categories
evaluation_home = evaluate_performance(mse_home, r2_home)
evaluation_away = evaluate_performance(mse_away, r2_away)

# Determine if predictions match actual outcomes or are close for ties
results['prediction_accuracy'] = results.apply(lambda row: 
    'Correct' if row['predicted_winner'] == row['actual_winner']
    else 'Close' if abs(row['predicted_home_goals'] - row['predicted_away_goals'] - row['actual_home_goals'] + row['actual_away_goals']) < 0.001
    else 'Incorrect', axis=1)

# Calculate percentage of correctly predicted winners
results['correct_prediction'] = results['predicted_winner'] == results['actual_winner']
percentage_correct = results['correct_prediction'].mean() * 100

# Create a final results DataFrame
final_results = results[['home_team', 'away_team', 'actual_home_goals', 'predicted_home_goals', 'actual_away_goals', 'predicted_away_goals', 'actual_winner', 'predicted_winner', 'prediction_accuracy']].copy()

# Display final predicted results as a DataFrame with styled output
print("Final Predicted Results:")
styled_final_results = final_results.style.set_table_styles([
    {'selector': 'th',
     'props': [('background-color', 'lightgrey'), ('color', 'black'), ('font-weight', 'bold'), ('text-align', 'center')]},
    {'selector': 'td',
     'props': [('text-align', 'center')]},
    {'selector': 'tr:nth-child(even)',
     'props': [('background-color', 'whitesmoke')]},
    {'selector': 'tr:nth-child(odd)',
     'props': [('background-color', 'white')]},
])

# Display styled results
display(styled_final_results)

# Print evaluation
print("\nEvaluation:")
print(f"Home Goals - Evaluation: {evaluation_home}")
print(f"Away Goals - Evaluation: {evaluation_away}")
print(f"Percentage of Correctly Predicted Winners: {percentage_correct:.2f}%")

Final Predicted Results:


Unnamed: 0,home_team,away_team,actual_home_goals,predicted_home_goals,actual_away_goals,predicted_away_goals,actual_winner,predicted_winner,prediction_accuracy
18,Slovakia,Ukraine,1,1,2,3,Ukraine,Ukraine,Correct
6,Serbia,England,0,0,1,0,England,Draw,Incorrect
38,England,Slovakia,2,1,1,1,England,England,Correct
32,Slovakia,Romania,1,1,1,1,Draw,Draw,Correct
11,Portugal,Czechia,2,2,1,2,Portugal,Draw,Incorrect
22,Turkiye,Portugal,0,0,3,3,Portugal,Portugal,Correct
4,Poland,Netherlands,1,1,2,1,Netherlands,Draw,Incorrect
14,Scotland,Switzerland,1,1,1,1,Draw,Draw,Correct
37,Germany,Denmark,2,2,0,0,Germany,Germany,Correct
41,Portugal,Slovenia,0,0,0,1,Portugal,Slovenia,Incorrect



Evaluation:
Home Goals - Evaluation: excellent
Away Goals - Evaluation: great
Percentage of Correctly Predicted Winners: 63.64%


In [12]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

# Initialize XGBRegressor models for home and away goals
model_home = XGBRegressor(objective='reg:squarederror', random_state=333)
model_away = XGBRegressor(objective='reg:squarederror', random_state=333)

# Train the models
model_home.fit(X_train_home, y_train_home)
model_away.fit(X_train_away, y_train_away)

# Make predictions
y_pred_home = model_home.predict(X_test_home)
y_pred_away = model_away.predict(X_test_away)

# Round predictions to the nearest integer
y_pred_home = y_pred_home.round().astype(int)
y_pred_away = y_pred_away.round().astype(int)

# Calculate MSE thresholds for evaluation categories
def evaluate_performance(mse):
    if mse > 3:
        return 'poor'
    elif mse > 1.5:
        return 'fair'
    elif mse > 0.75:
        return 'good'
    elif mse > 0.3:
        return 'great'
    else:
        return 'excellent'

# Calculate MSE and R^2 for home and away goals
mse_home = mean_squared_error(y_test_home, y_pred_home)
r2_home = r2_score(y_test_home, y_pred_home)
mse_away = mean_squared_error(y_test_away, y_pred_away)
r2_away = r2_score(y_test_away, y_pred_away)

# Evaluate performance using MSE for both home and away goals
evaluation_home = evaluate_performance(mse_home)
evaluation_away = evaluate_performance(mse_away)

# Create DataFrame for results
results = X_test_home.copy().reset_index(drop=True)
results['home_team'] = euro.loc[X_test_home.index, 'home_team'].reset_index(drop=True)
results['away_team'] = euro.loc[X_test_home.index, 'away_team'].reset_index(drop=True)
results['actual_home_goals'] = y_test_home.reset_index(drop=True)
results['predicted_home_goals'] = y_pred_home
results['actual_away_goals'] = y_test_away.reset_index(drop=True)
results['predicted_away_goals'] = y_pred_away

# Determine predicted winners and actual winners
results['predicted_winner'] = results.apply(lambda row: row['home_team'] if row['predicted_home_goals'] > row['predicted_away_goals'] else row['away_team'], axis=1)
results['actual_winner'] = results.apply(lambda row: row['home_team'] if row['actual_home_goals'] > row['actual_away_goals'] else row['away_team'], axis=1)

# Calculate percentage of correctly predicted winners
results['correct_prediction'] = results['predicted_winner'] == results['actual_winner']
percentage_correct = results['correct_prediction'].mean() * 100

# Determine prediction accuracy
results['prediction_accuracy'] = results.apply(lambda row: 
    'Correct' if row['predicted_winner'] == row['actual_winner']
    else 'Incorrect', axis=1)

# Styling the results DataFrame
styled_results = results[['home_team', 'away_team', 'actual_home_goals', 'predicted_home_goals', 'actual_away_goals', 'predicted_away_goals', 'actual_winner', 'predicted_winner', 'prediction_accuracy']].style \
    .set_table_styles([
        {'selector': 'th',
         'props': [('background-color', 'lightgrey'), ('color', 'black'), ('font-weight', 'bold'), ('text-align', 'center')]},
        {'selector': 'td',
         'props': [('text-align', 'center')]},
        {'selector': 'tr:nth-child(even)',
         'props': [('background-color', 'whitesmoke')]},
        {'selector': 'tr:nth-child(odd)',
         'props': [('background-color', 'white')]},
    ]) 

# Display styled results
print("Final Predicted Results:")
display(styled_results)

# Print evaluation
print("\nEvaluation:")
print(f"Home Goals - Evaluation: {evaluation_home}")
print(f"Away Goals - Evaluation: {evaluation_away}")
print(f"Percentage of Correctly Predicted Winners: {percentage_correct:.2f}%")

Final Predicted Results:


Unnamed: 0,home_team,away_team,actual_home_goals,predicted_home_goals,actual_away_goals,predicted_away_goals,actual_winner,predicted_winner,prediction_accuracy
0,Slovakia,Ukraine,1,1,2,2,Ukraine,Ukraine,Correct
1,Serbia,England,0,0,1,1,England,England,Correct
2,England,Slovakia,2,2,1,1,England,England,Correct
3,Slovakia,Romania,1,1,1,1,Romania,Romania,Correct
4,Portugal,Czechia,2,2,1,1,Portugal,Portugal,Correct
5,Turkiye,Portugal,0,0,3,3,Portugal,Portugal,Correct
6,Poland,Netherlands,1,1,2,2,Netherlands,Netherlands,Correct
7,Scotland,Switzerland,1,1,1,1,Switzerland,Switzerland,Correct
8,Germany,Denmark,2,2,0,0,Germany,Germany,Correct
9,Portugal,Slovenia,0,0,0,0,Slovenia,Slovenia,Correct



Evaluation:
Home Goals - Evaluation: excellent
Away Goals - Evaluation: excellent
Percentage of Correctly Predicted Winners: 100.00%


In [13]:
models = {
    'Linear Regression': LinearRegression(),
    'SVR': SVR(kernel='linear'),
    'XGBoost': XGBRegressor(objective='reg:squarederror', random_state=555)
}

results = {}

# Train and evaluate models
for model_name, model in models.items():
    if model_name == 'Linear Regression' or model_name == 'SVR':
        model.fit(X_train_home, y_train_home)
        y_pred_home = model.predict(X_test_home)
        model.fit(X_train_away, y_train_away)
        y_pred_away = model.predict(X_test_away)
    else:
        model.fit(X_train_home, y_train_home)
        y_pred_home = model.predict(X_test_home)
        model.fit(X_train_away, y_train_away)
        y_pred_away = model.predict(X_test_away)
    
    # Round predictions to integers for goal counts
    y_pred_home_rounded = y_pred_home.round().astype(int)
    y_pred_away_rounded = y_pred_away.round().astype(int)
    
    # Count correct predictions
    correct_home = sum((y_pred_home_rounded > y_pred_away_rounded) == (y_test_home > y_test_away))
    correct_away = sum((y_pred_away_rounded > y_pred_home_rounded) == (y_test_away > y_test_home))
    
    # Store results
    results[model_name] = {
        'Home Goals': correct_home,
        'Away Goals': correct_away
    }

# Determine the most effective model
best_model_home = max(results, key=lambda x: results[x]['Home Goals'])
best_model_away = max(results, key=lambda x: results[x]['Away Goals'])

# Display results
for model_name, result in results.items():
    print(f"Model: {model_name}")
    print(f"  Correct Home Goals Predictions: {result['Home Goals']}")
    print(f"  Correct Away Goals Predictions: {result['Away Goals']}")
    print()

print(f"Most effective model for Home Goals: {best_model_home}")
print(f"Most effective model for Away Goals: {best_model_away}")

Model: Linear Regression
  Correct Home Goals Predictions: 9
  Correct Away Goals Predictions: 9

Model: SVR
  Correct Home Goals Predictions: 9
  Correct Away Goals Predictions: 8

Model: XGBoost
  Correct Home Goals Predictions: 11
  Correct Away Goals Predictions: 11

Most effective model for Home Goals: XGBoost
Most effective model for Away Goals: XGBoost


In [14]:
warnings.filterwarnings('ignore')

# Define the features
features = [
    'Home Total shots', 'Home Shots on target', 'Home Big chances', 'Home Big chances missed', 'Home Fouls committed',
    'Home Corners', 'Home Total shots.', 'Home Shots off target', 'Home Shots on target.', 'Home Blocked shots',
    'Home Hit woodwork', 'Home Shots inside box', 'Home Shots outside box', 'Home Passes', 'Home Own half',
    'Home Opposition half', 'Home Throws', 'Home Touches in opposition box', 'Home Offsides', 'Home Yellow cards',
    'Home Red cards', 'Home Interceptions', 'Home Blocks', 'Home Clearances', 'Home Keeper saves', 'Home Duels won',
    'Away Total shots', 'Away Shots on target', 'Away Big chances', 'Away Big chances missed', 'Away Fouls committed',
    'Away Corners', 'Away Total shots.', 'Away Shots off target', 'Away Shots on target.', 'Away Blocked shots',
    'Away Hit woodwork', 'Away Shots inside box', 'Away Shots outside box', 'Away Passes', 'Away Own half',
    'Away Opposition half', 'Away Throws', 'Away Touches in opposition box', 'Away Offsides', 'Away Yellow cards',
    'Away Red cards', 'Away Interceptions', 'Away Blocks', 'Away Clearances', 'Away Keeper saves', 'Away Duels won'
]

# Function to prepare the data for both teams
def prepare_team_data(matches, team_name):
    home_matches = matches[matches['home_team'] == team_name][features + ['home_goals']].rename(columns={'home_goals': 'goals'})
    away_matches = matches[matches['away_team'] == team_name][features + ['away_goals']].rename(columns={'away_goals': 'goals'})
    team_data = pd.concat([home_matches, away_matches], ignore_index=True)
    return team_data

portugal_data = prepare_team_data(euro, 'Portugal')
france_data = prepare_team_data(euro, 'France')

# Combine the data
combined_data = pd.concat([portugal_data, france_data], ignore_index=True)

# Separate features and target variable
X = combined_data[features]
y = combined_data['goals']

# Train Linear Regression models
model_goals = LinearRegression()
model_goals.fit(X, y)

# Predict goals for a hypothetical match between England and Spain
portugal_predict = portugal_data.iloc[0][features].values.reshape(1, -1)
france_predict = france_data.iloc[0][features].values.reshape(1, -1)

predicted_home_goals = model_goals.predict(portugal_predict).astype(int)[0]
predicted_away_goals = model_goals.predict(france_predict).astype(int)[0]

# Determine predicted winner
if predicted_home_goals > predicted_away_goals:
    predicted_winner = 'Portugal'
elif predicted_home_goals < predicted_away_goals:
    predicted_winner = 'France'
else:
    predicted_winner = 'Draw'

# Print or display the predicted goals and winner
print(f"Predicted Home Goals (Portugal): {predicted_home_goals}")
print(f"Predicted Away Goals (France): {predicted_away_goals}")
print(f"Predicted Winner: {predicted_winner}")


Predicted Home Goals (Portugal): 2
Predicted Away Goals (France): 0
Predicted Winner: Portugal


In [16]:
warnings.filterwarnings('ignore')

# Train SVM model
model_goals = SVR()
model_goals.fit(X, y)

# Predict goals for a hypothetical match between England and Spain
portugal_predict = portugal_data.iloc[0][features].values.reshape(1, -1)
france_predict = france_data.iloc[0][features].values.reshape(1, -1)

predicted_home_goals = model_goals.predict(portugal_predict).astype(int)[0]
predicted_away_goals = model_goals.predict(france_predict).astype(int)[0]

# Get feature importances (weights) based on correlation with goals
feature_weights = {}
for feature in features:
    portugal_correlation = abs(portugal_data[feature].corr(portugal_data['goals']))
    france_correlation = abs(france_data[feature].corr(france_data['goals']))
    feature_weights[feature] = portugal_correlation + france_correlation

# Calculate scores based on feature weights
portugal_score = (feature_weights['Home Keeper saves'] +
                 feature_weights['Home Shots on target'] +
                 feature_weights['Home Total shots'] +
                 feature_weights['Home Big chances'] -
                 feature_weights['Home Big chances missed'])

france_score = (feature_weights['Away Keeper saves'] +
               feature_weights['Away Shots on target'] +
               feature_weights['Away Total shots'] +
               feature_weights['Away Big chances'] -
               feature_weights['Away Big chances missed'])

# Determine predicted winner based on scores
if portugal_score > france_score:
    predicted_winner = 'Portugal'
elif portugal_score < france_score:
    predicted_winner = 'France'
else:
    # In case of a draw, make a random choice
    predicted_winner = random.choice(['Portugal', 'France'])

# Print or display the predicted goals and winner
print(f"Predicted Home Goals (Portugal): {predicted_home_goals}")
print(f"Predicted Away Goals (France): {predicted_away_goals}")
print(f"Predicted Winner: {predicted_winner}")

Predicted Home Goals (Portugal): 0
Predicted Away Goals (France): 0
Predicted Winner: France


In [17]:
model_goals = XGBRegressor(objective='reg:squarederror')
model_goals.fit(X, y)

# Predict goals for a hypothetical match between England and Spain
portugal_predict = portugal_data.iloc[0][features].values.reshape(1, -1)
france_predict = france_data.iloc[0][features].values.reshape(1, -1)

predicted_home_goals = model_goals.predict(pd.DataFrame(portugal_predict, columns=features)).astype(int)[0]
predicted_away_goals = model_goals.predict(pd.DataFrame(france_predict, columns=features)).astype(int)[0]

# Determine predicted winner based on goal differential
if predicted_home_goals > predicted_away_goals:
    predicted_winner = 'Portugal'
elif predicted_home_goals < predicted_away_goals:
    predicted_winner = 'France'
else:
    # Calculate scores based on feature weights for each team
    portugal_score = (england_features['keeper_saves'] +
                     england_features['shots_on_target'] +
                     england_features['total_shots'] +
                     england_features['big_chances'] -
                     england_features['big_chances_missed'])

    france_score = (spain_features['keeper_saves'] +
                   spain_features['shots_on_target'] +
                   spain_features['total_shots'] +
                   spain_features['big_chances'] -
                   spain_features['big_chances_missed'])

    # Determine predicted winner based on scores
    if portugal_score > france_score:
        predicted_winner = 'Portugal'
    elif portugal_score < france_score:
        predicted_winner = 'France'
    else:
        # If still tied in weighted score, make a random choice
        predicted_winner = random.choice(['Portugal', 'France'])

# Print or display the predicted goals and winner
print(f"Predicted Home Goals (Portugal): {predicted_home_goals}")
print(f"Predicted Away Goals (France): {predicted_away_goals}")
print(f"Predicted Winner: {predicted_winner}")

Predicted Home Goals (Portugal): 1
Predicted Away Goals (France): 0
Predicted Winner: Portugal
