In [47]:
import os
from util import util
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
from joblib import dump, load
data_folder = os.path.join('.', 'files/data')
leagues = ['E0', 'E1', 'E2', 'E3', 'I1', 'SP1', 'D1', 'F1', 'N1']


## Fetch data

In [18]:
data = util.fetch_data(data_folder, 2005, 2025, leagues)
data.describe()

Fetching data for 0506 E0
Fetching data for 0506 E1
Fetching data for 0506 E2
Fetching data for 0506 E3
Fetching data for 0506 I1
Fetching data for 0506 SP1
Fetching data for 0506 D1
Fetching data for 0506 F1
Fetching data for 0506 N1
Fetching data for 0607 E0
Fetching data for 0607 E1
Fetching data for 0607 E2
Fetching data for 0607 E3
Fetching data for 0607 I1
Fetching data for 0607 SP1
Fetching data for 0607 D1
Fetching data for 0607 F1
Fetching data for 0607 N1
Fetching data for 0708 E0
Fetching data for 0708 E1
Fetching data for 0708 E2
Fetching data for 0708 E3
Fetching data for 0708 I1
Fetching data for 0708 SP1
Fetching data for 0708 D1
Fetching data for 0708 F1
Fetching data for 0708 N1
No data for 0708 N1 https://www.football-data.co.uk/mmz4281/0708/N1.csv
Fetching data for 0809 E0
Fetching data for 0809 E1
Fetching data for 0809 E2
Fetching data for 0809 E3
Fetching data for 0809 I1
Fetching data for 0809 SP1
Fetching data for 0809 D1
Fetching data for 0809 F1
Fetching data 

Unnamed: 0,Date,FTHG,FTAG,HTHG,HTAG,HS,AS,HST,AST,HF,AF,HC,AC,HY,AY,HR,AR
count,72748,72748.0,72748.0,72744.0,72744.0,69364.0,69364.0,69058.0,69058.0,68605.0,68605.0,68989.0,68989.0,69378.0,69379.0,69379.0,69379.0
mean,2015-03-16 18:18:41.152471552,1.50143,1.173833,0.663972,0.513582,13.134724,10.699109,5.150265,4.162892,12.400219,12.896611,5.718955,4.640696,1.637926,1.943268,0.08246,0.111749
min,2005-07-29 00:00:00,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25%,2010-04-13 00:00:00,1.0,0.0,0.0,0.0,10.0,7.0,3.0,2.0,9.0,10.0,4.0,3.0,1.0,1.0,0.0,0.0
50%,2015-02-28 00:00:00,1.0,1.0,0.0,0.0,13.0,10.0,5.0,4.0,12.0,12.0,5.0,4.0,1.0,2.0,0.0,0.0
75%,2019-12-26 00:00:00,2.0,2.0,1.0,1.0,16.0,13.0,7.0,6.0,15.0,16.0,7.0,6.0,2.0,3.0,0.0,0.0
max,2024-12-15 00:00:00,10.0,13.0,7.0,6.0,46.0,45.0,24.0,23.0,48.0,77.0,26.0,21.0,11.0,9.0,3.0,4.0
std,,1.266918,1.125692,0.820266,0.722636,5.033386,4.486435,2.775056,2.441338,4.4624,4.558298,2.945662,2.626113,1.279434,1.354867,0.290411,0.337757


## Prep and Train

In [19]:
def categorize_preds(pred_arr, hw, hl):
    categories = np.where(pred_arr > hw, 1, 
                          np.where((pred_arr <= hw) & (pred_arr > hl), 0, 
                                   -1))
    return np.array(categories)


def categorize_goal_diff(y_test):
    categories = np.where(y_test > 0, 1, 
                        np.where((y_test == 0 ), 0, 
                                -1))
    return np.array(categories)

def remove_uncertain(predictions, targets):
    # Create a boolean mask where predictions is not 0
    mask = predictions != 0
    
    # Use the mask to filter both predictions and targets
    filtered_predictions = predictions[mask]
    filtered_targets = targets[mask]
    
    return filtered_predictions, filtered_targets

In [20]:
data = util.clean_data(data)
draw_factor = data['FTR'].value_counts(normalize=True)['D']
ELO = util.ELO(data, init_rating=1500, draw_factor=draw_factor, k_factor=32, home_advantage=50)
data = ELO.perform_simulations(data)

for league in leagues:
	league_data = data[data['Div'] == league]
	league_data = util.add_form_column(league_data, 'FTHG', 'FTAG', n=5, operation='Sum', regard_opponent=False, include_current=False)
	league_data['Diff_goals_scored'] = league_data['FTHG_Sum_5'] - league_data['FTAG_Sum_5']
	league_data = util.add_form_column(league_data, 'FTHG', 'FTAG', n=5, operation='Sum', regard_opponent=True, include_current=False)
	league_data['Diff_goals_conceded'] = league_data['FTHG_Sum_5_opponent'] - league_data['FTAG_Sum_5_opponent']
	league_data['Home Goal Difference last 5'] = league_data['FTHG_Sum_5'] - league_data['FTHG_Sum_5_opponent']
	league_data['Away Goal Difference last 5'] = league_data['FTAG_Sum_5'] - league_data['FTAG_Sum_5_opponent']
	league_data['Diff_goal_diff'] = league_data['Home Goal Difference last 5'] - league_data['Away Goal Difference last 5']
	league_data = util.add_form_column(league_data, 'Home', 'Away', n=5, operation='Points', regard_opponent=False, include_current=False)
	league_data['Diff_points'] = league_data['Home_Points_5'] - league_data['Away_Points_5']
	league_data = util.add_form_column(league_data, 'Home ELO', 'Away ELO', n=5, operation='Change', regard_opponent=False, include_current=True)
	league_data['Diff_change_in_ELO'] = league_data['Home ELO_Change_5'] - league_data['Away ELO_Change_5']
	league_data = util.add_form_column(league_data, 'Home ELO', 'Away ELO', n=5, operation='Mean', regard_opponent=True, include_current=False)
	league_data['Diff_opposition_mean_ELO'] = league_data['Home ELO_Mean_5_opponent'] - league_data['Away ELO_Mean_5_opponent']
	league_data = util.add_form_column(league_data, 'HST', 'AST', n=5, operation='Sum', regard_opponent=False, include_current=False)
	league_data['Diff_shots_on_target_attempted'] = league_data['HST_Sum_5'] - league_data['AST_Sum_5']
	league_data = util.add_form_column(league_data, 'HST', 'AST', n=5, operation='Sum', regard_opponent=True, include_current=False)
	league_data['Diff_shots_on_target_allowed'] = league_data['HST_Sum_5_opponent'] - league_data['AST_Sum_5_opponent']
	league_data = util.add_form_column(league_data, 'HS', 'AS', n=5, operation='Sum', regard_opponent=False, include_current=False)
	league_data['Diff_shots_attempted'] = league_data['HS_Sum_5'] - league_data['AS_Sum_5']
	league_data = util.add_form_column(league_data, 'HS', 'AS', n=5, operation='Sum', regard_opponent=True, include_current=False)
	league_data['Diff_shots_allowed'] = league_data['HS_Sum_5_opponent'] - league_data['AS_Sum_5_opponent']
	league_data = util.add_form_column(league_data, 'HC', 'AC', n=5, operation='Sum', regard_opponent=False, include_current=False)
	league_data['Diff_corners_awarded'] = league_data['HC_Sum_5'] - league_data['AC_Sum_5']
	league_data = util.add_form_column(league_data, 'HC', 'AC', n=5, operation='Sum', regard_opponent=True, include_current=False)
	league_data['Diff_corners_conceded'] = league_data['HC_Sum_5_opponent'] - league_data['AC_Sum_5_opponent']
	league_data = util.add_form_column(league_data, 'HF', 'AF', n=5, operation='Sum', regard_opponent=False, include_current=False)
	league_data['Diff_fouls_commited'] = league_data['HF_Sum_5'] - league_data['AF_Sum_5']
	league_data = util.add_form_column(league_data, 'HF', 'AF', n=5, operation='Sum', regard_opponent=True, include_current=False)
	league_data['Diff_fouls_suffered'] = league_data['HF_Sum_5_opponent'] - league_data['AF_Sum_5_opponent']
	league_data = util.add_form_column(league_data, 'HY', 'AY', n=5, operation='Sum', regard_opponent=False, include_current=False)
	league_data['Diff_yellow_cards'] = league_data['HY_Sum_5'] - league_data['AY_Sum_5']
	league_data = util.add_form_column(league_data, 'HR', 'AR', n=5, operation='Sum', regard_opponent=False, include_current=False)
	league_data['Diff_red_cards'] = league_data['HR_Sum_5'] - league_data['AR_Sum_5']
	columns_to_remove = [
		"FTR",
		"HTHG",
		"HTAG",
		"HTR",
		"HS",
		"AS",
		"HST",
		"AST",
		"HF",
		"AF",
		"HC",
		"AC",
		"HY",
		"AY",
		"HR",
		"AR",
		"Home ELO",
		"Away ELO",
		"FTHG_Sum_5",
		"FTAG_Sum_5",
		"FTHG_Sum_5_opponent",
		"FTAG_Sum_5_opponent",
		"Home Goal Difference last 5",
		"Away Goal Difference last 5",
		"Home_Points_5",
		"Away_Points_5",
		"Home ELO_Change_5",
		"Away ELO_Change_5",
		"Home ELO_Mean_5_opponent",
		"Away ELO_Mean_5_opponent",
		"HST_Sum_5",
		"AST_Sum_5",
		"HST_Sum_5_opponent",
		"AST_Sum_5_opponent",
		"HS_Sum_5",
		"AS_Sum_5",
		"HS_Sum_5_opponent",
		"AS_Sum_5_opponent",
		"HC_Sum_5",
		"AC_Sum_5",
		"HC_Sum_5_opponent",
		"AC_Sum_5_opponent",
		"HF_Sum_5",
		"AF_Sum_5",
		"HF_Sum_5_opponent",
		"AF_Sum_5_opponent",
		"HY_Sum_5",
		"AY_Sum_5",
		"HR_Sum_5",
		"AR_Sum_5",
	]
	league_data.drop(columns=columns_to_remove, inplace=True)

	league_data["Outcome"] = league_data.apply(
		lambda row: (row["FTHG"] - row["FTAG"]),
		axis=1,
	)

	X = league_data.copy().drop(
		columns=["Outcome", "FTHG", "FTAG", "Season", "Div", "Date", "HomeTeam", "AwayTeam"],
	)
	y = league_data["Outcome"]
	rf = RandomForestRegressor(n_estimators=200, random_state=42)
	X_train, X_test, y_train, y_test = train_test_split(
		X, y, test_size=0.2, random_state=42)
	rf.fit(X_train,y_train)
	predictions = rf.predict(X_test)
	print('Ordinary stats for model for', league)
	categorized_preds = categorize_preds(predictions, 1, -1)
	categorized_goal_diff = categorize_goal_diff(y_test)

	report = classification_report(categorized_goal_diff, categorized_preds)
	print(report)
	filtered_predictions, filtered_targets = remove_uncertain(categorized_preds, categorized_goal_diff)
	print('Filtered (-1,1) stats for model for', league)
	report = classification_report(filtered_predictions, filtered_targets)
	print(report)
	file_path = f'files/models/{league}_model.joblib'
	dump(rf, file_path)
	print(f'Saved model for {league} to {file_path}')
	
	

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[new_column_name_home] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[new_column_name_away] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[new_column_name_home] = pd.to_numeric(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer

Ordinary stats for model for E0
              precision    recall  f1-score   support

          -1       0.63      0.16      0.26       456
           0       0.26      0.75      0.39       347
           1       0.69      0.36      0.48       673

    accuracy                           0.39      1476
   macro avg       0.53      0.43      0.37      1476
weighted avg       0.57      0.39      0.39      1476

Filtered (-1,1) stats for model for E0
              precision    recall  f1-score   support

          -1       0.63      0.63      0.63       116
           0       0.00      0.00      0.00         0
           1       0.91      0.69      0.79       355

    accuracy                           0.68       471
   macro avg       0.51      0.44      0.47       471
weighted avg       0.84      0.68      0.75       471

Saved model for E0 to files/models/E0_model.joblib


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[new_column_name_home] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[new_column_name_away] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata

Ordinary stats for model for E1
              precision    recall  f1-score   support

          -1       0.67      0.01      0.01       626
           0       0.26      0.92      0.41       563
           1       0.63      0.11      0.19       958

    accuracy                           0.29      2147
   macro avg       0.52      0.35      0.20      2147
weighted avg       0.54      0.29      0.20      2147

Filtered (-1,1) stats for model for E1
              precision    recall  f1-score   support

          -1       0.17      0.67      0.27         6
           0       0.00      0.00      0.00         0
           1       1.00      0.63      0.77       175

    accuracy                           0.63       181
   macro avg       0.39      0.43      0.35       181
weighted avg       0.97      0.63      0.76       181



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


Saved model for E1 to files/models/E1_model.joblib


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[new_column_name_home] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[new_column_name_away] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[new_column_name_home] = pd.to_numeric(
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer

Ordinary stats for model for E2
              precision    recall  f1-score   support

          -1       0.47      0.01      0.02       634
           0       0.26      0.92      0.40       527
           1       0.61      0.13      0.21       952

    accuracy                           0.29      2113
   macro avg       0.45      0.35      0.21      2113
weighted avg       0.48      0.29      0.20      2113

Filtered (-1,1) stats for model for E2
              precision    recall  f1-score   support

          -1       0.17      0.47      0.25        17
           0       0.00      0.00      0.00         0
           1       0.96      0.61      0.75       198

    accuracy                           0.60       215
   macro avg       0.38      0.36      0.33       215
weighted avg       0.90      0.60      0.71       215

Saved model for E2 to files/models/E2_model.joblib


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[new_column_name_home] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[new_column_name_away] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata

Ordinary stats for model for E3
              precision    recall  f1-score   support

          -1       0.60      0.00      0.01       661
           0       0.28      0.94      0.43       599
           1       0.52      0.06      0.10       861

    accuracy                           0.29      2121
   macro avg       0.47      0.34      0.18      2121
weighted avg       0.48      0.29      0.17      2121

Filtered (-1,1) stats for model for E3
              precision    recall  f1-score   support

          -1       0.19      0.60      0.29         5
           0       0.00      0.00      0.00         0
           1       1.00      0.52      0.69        92

    accuracy                           0.53        97
   macro avg       0.40      0.37      0.32        97
weighted avg       0.96      0.53      0.67        97

Saved model for E3 to files/models/E3_model.joblib


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[new_column_name_home] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[new_column_name_away] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata

Ordinary stats for model for I1
              precision    recall  f1-score   support

          -1       0.67      0.13      0.22       453
           0       0.27      0.77      0.40       382
           1       0.63      0.31      0.42       637

    accuracy                           0.37      1472
   macro avg       0.52      0.40      0.35      1472
weighted avg       0.55      0.37      0.35      1472

Filtered (-1,1) stats for model for I1
              precision    recall  f1-score   support

          -1       0.59      0.67      0.62        90
           0       0.00      0.00      0.00         0
           1       0.93      0.63      0.75       313

    accuracy                           0.64       403
   macro avg       0.51      0.43      0.46       403
weighted avg       0.86      0.64      0.73       403

Saved model for I1 to files/models/I1_model.joblib


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[new_column_name_home] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[new_column_name_away] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata

Ordinary stats for model for SP1
              precision    recall  f1-score   support

          -1       0.61      0.09      0.16       417
           0       0.26      0.84      0.39       347
           1       0.77      0.30      0.44       714

    accuracy                           0.37      1478
   macro avg       0.55      0.41      0.33      1478
weighted avg       0.60      0.37      0.35      1478

Filtered (-1,1) stats for model for SP1
              precision    recall  f1-score   support

          -1       0.60      0.61      0.61        62
           0       0.00      0.00      0.00         0
           1       0.96      0.77      0.85       283

    accuracy                           0.74       345
   macro avg       0.52      0.46      0.49       345
weighted avg       0.90      0.74      0.81       345

Saved model for SP1 to files/models/SP1_model.joblib


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[new_column_name_home] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[new_column_name_away] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata

Ordinary stats for model for D1
              precision    recall  f1-score   support

          -1       0.64      0.13      0.21       343
           0       0.27      0.79      0.40       284
           1       0.64      0.29      0.40       500

    accuracy                           0.36      1127
   macro avg       0.52      0.40      0.34      1127
weighted avg       0.55      0.36      0.34      1127

Filtered (-1,1) stats for model for D1
              precision    recall  f1-score   support

          -1       0.59      0.64      0.61        69
           0       0.00      0.00      0.00         0
           1       0.91      0.64      0.75       223

    accuracy                           0.64       292
   macro avg       0.50      0.43      0.45       292
weighted avg       0.83      0.64      0.72       292

Saved model for D1 to files/models/D1_model.joblib


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[new_column_name_home] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[new_column_name_away] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata

Ordinary stats for model for F1
              precision    recall  f1-score   support

          -1       0.60      0.06      0.10       368
           0       0.31      0.88      0.46       376
           1       0.60      0.19      0.29       540

    accuracy                           0.36      1284
   macro avg       0.50      0.38      0.28      1284
weighted avg       0.51      0.36      0.29      1284

Filtered (-1,1) stats for model for F1
              precision    recall  f1-score   support

          -1       0.38      0.60      0.47        35
           0       0.00      0.00      0.00         0
           1       0.95      0.60      0.74       172

    accuracy                           0.60       207
   macro avg       0.45      0.40      0.40       207
weighted avg       0.86      0.60      0.69       207

Saved model for F1 to files/models/F1_model.joblib


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[new_column_name_home] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data[new_column_name_away] = None
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata

Ordinary stats for model for N1
              precision    recall  f1-score   support

          -1       0.62      0.27      0.38       142
           0       0.29      0.64      0.40       114
           1       0.66      0.46      0.54       187

    accuracy                           0.45       443
   macro avg       0.52      0.46      0.44       443
weighted avg       0.55      0.45      0.45       443

Filtered (-1,1) stats for model for N1
              precision    recall  f1-score   support

          -1       0.72      0.62      0.67        63
           0       0.00      0.00      0.00         0
           1       0.87      0.66      0.75       131

    accuracy                           0.64       194
   macro avg       0.53      0.43      0.47       194
weighted avg       0.82      0.64      0.72       194

Saved model for N1 to files/models/N1_model.joblib


  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


## Lagre nåværende data om alle lag

In [41]:
df_tmp = []
for league in leagues:
	league_data = util.fetch_data(data_folder, 2024, 2025, [league])
	teams = league_data['HomeTeam'].unique()
	for team in teams:
		#ELO, goals scored, goals conceded, goal difference, points, change in ELO, opposition mean ELO, shots on target attempted, shots on target allowed, shot attempted, shots allows, corner awarded, corners conceded, fouls commited, fouls suffered, yellow cards, red cards
		elo = ELO.ratings[team]
		last_five_matches = util.get_all_matches_of_team(data, team).tail(5)
		goals_scored = 0
		goals_conceded = 0
		goal_difference = 0
		points = 0
		change_in_ELO = 0 #, ta ELO ved nåværende minus første kamp ELO
		oppoisition_mean_ELO = 0 #Er først sum
		shots_on_target_attemped = 0
		shots_on_target_allowed = 0
		shots_attempted = 0
		shots_allowed = 0
		corners_awarded = 0
		corners_allowed = 0
		fouls_commited = 0
		fouls_suffered = 0
		yellow_cards = 0
		red_cards = 0
		i = 0
		for index, match in last_five_matches.iterrows():
			i += 1
			if team == match['HomeTeam']:
				goals_scored += match['FTHG']
				goals_conceded += match['FTAG']
				goal_difference += match['FTHG'] - match['FTAG']
				if match['FTR'] == 'H':
					points += 3
				elif match['FTR'] == 'D':
					points += 1
				if i == len(last_five_matches):
					change_in_ELO = elo - match['Home ELO']
				oppoisition_mean_ELO += match['Away ELO']
				shots_on_target_attemped += match['HST']
				shots_on_target_allowed += match['AST']
				shots_attempted += match['HS']
				shots_allowed += match['AS']
				corners_awarded += match['HC']
				corners_allowed += match['AC']
				fouls_commited +=  match['HF']
				fouls_suffered += match['AF']
				yellow_cards += match['HY']
				red_cards += match['HR']
				
			elif team == match['AwayTeam']:
				goals_scored += match['FTAG']
				goals_conceded += match['FTHG']
				goal_difference += match['FTAG'] - match['FTHG']
				if match['FTR'] == 'A':
					points += 3
				elif match['FTR'] == 'D':
					points += 1
				if i == len(last_five_matches):
					change_in_ELO = elo - match['Away ELO']
				oppoisition_mean_ELO += match['Home ELO']
				shots_on_target_attemped += match['AST']
				shots_on_target_allowed += match['HST']
				shots_attempted += match['AS']
				shots_allowed += match['HS']
				corners_awarded += match['AC']
				corners_allowed += match['HC']
				fouls_commited +=  match['AF']
				fouls_suffered += match['HF']
				yellow_cards += match['AY']
				red_cards += match['AR']
		oppoisition_mean_ELO = oppoisition_mean_ELO / len(last_five_matches)
		df_dict = {
			'Div': league,
			'Team': team,
			'ELO': elo,
			'Goals scored': goals_scored,
			'Goals conceded': goals_conceded,
			'Goals difference': goal_difference,
			'Points': points,
			'Change in ELO': change_in_ELO,
			'Opposition mean ELO': oppoisition_mean_ELO,
			'Shots on target attempted': shots_on_target_attemped,
			'Shots on target allows': shots_on_target_allowed,
			'Shots attemped': shots_attempted,
			'Shots allowed': shots_allowed,
			'Corners awarded': corners_awarded,
			'Corners allowed': corners_allowed,
			'Fouls commited': fouls_commited,
			'Fouls suffered': fouls_suffered,
			'Yellow cards': yellow_cards,
			'Red cards': red_cards
		}	
		df_tmp.append(df_dict)
df_final = pd.DataFrame(df_tmp)
df_final.to_csv('files/data/current_data.csv', index=False)
		

Fetching data for 2425 E0
Fetching data for 2425 E1
Fetching data for 2425 E2
Fetching data for 2425 E3
Fetching data for 2425 I1
Fetching data for 2425 SP1
Fetching data for 2425 D1
Fetching data for 2425 F1
Fetching data for 2425 N1


## Get stats for team

In [46]:
def get_team_row(team_name):
    df = pd.read_csv('files/data/current_data.csv')
    # Filter the DataFrame for the given team name
    team_row = df.loc[df['Team'] == team_name]

    if not team_row.empty:
        return team_row.iloc[0]  # Return the first row (should be the only one if unique)
    else:
        return None  # Return None if the team is not found
    
print(get_team_row('Man United'))
print('_________')
print(get_team_row('Man City'))

Div                                   E0
Team                          Man United
ELO                          1614.352197
Goals scored                         9.0
Goals conceded                       7.0
Goals difference                     2.0
Points                                 7
Change in ELO                  25.819413
Opposition mean ELO          1615.921566
Shots on target attempted           21.0
Shots on target allows              20.0
Shots attemped                      54.0
Shots allowed                       54.0
Corners awarded                     12.0
Corners allowed                     30.0
Fouls commited                      54.0
Fouls suffered                      53.0
Yellow cards                         6.0
Red cards                            0.0
Name: 0, dtype: object
_________
Div                                   E0
Team                            Man City
ELO                          1761.080021
Goals scored                         6.0
Goals conceded          

## Get diff_stats for two competing teams

In [None]:
def get_diff_dataframe(home_team, away_team) -> pd.DataFrame:
	home_row = get_team_row(home_team)
	away_row = get_team_row(away_team)
	diff_df = {
		'ELO diff': home_row['ELO'] - away_row['ELO'],
		'Diff_goals_scored': home_row['Goals scored'] - away_row['Goals scored'],
		'Diff_goals_conceded': home_row['Goals conceded'] - away_row['Goals conceded'],
		'Diff_goal_diff': home_row['Goals difference'] - away_row['Goals difference'],
		'Diff_points': home_row['Points'] - away_row['Points'],
		'Diff_change_in_ELO': home_row['Change in ELO'] - away_row['Change in ELO'],
		'Diff_opposition_mean_ELO': home_row['Opposition mean ELO'] - away_row['Opposition mean ELO'],
		'Diff_shots_on_target_attempted': home_row['Shots on target attempted'] - away_row['Shots on target attempted'],
		'Diff_shots_on_target_allowed': home_row['Shots on target allows'] - away_row['Shots on target allows'],
		'Diff_shots_attempted': home_row['Shots attemped'] - away_row['Shots attemped'],
		'Diff_shots_allowed': home_row['Shots allowed'] - away_row['Shots allowed'],
		'Diff_corners_awarded': home_row['Corners awarded'] - away_row['Corners awarded'],
		'Diff_corners_conceded': home_row['Corners allowed'] - away_row['Corners allowed'],
		'Diff_fouls_commited': home_row['Fouls commited'] - away_row['Fouls commited'],
		'Diff_fouls_suffered': home_row['Fouls suffered'] - away_row['Fouls suffered'],
		'Diff_yellow_cards': home_row['Yellow cards'] - away_row['Yellow cards'],
		'Diff_red_cards': home_row['Red cards'] - away_row['Red cards']
	}
	return pd.DataFrame([diff_df])

get_diff_dataframe('Bournemouth', 'West Ham')

Unnamed: 0,ELO diff,Diff_goals_scored,Diff_goals_conceded,Diff_goal_diff,Diff_points,Diff_change_in_ELO,Diff_opposition_mean_ELO,Diff_shots_on_target_attempted,Diff_shots_on_target_allowed,Diff_shots_attempted,Diff_shots_allowed,Diff_corners_awarded,Diff_corners_conceded,Diff_fouls_commited,Diff_fouls_suffered,Diff_yellow_cards,Diff_red_cards
0,-146.727824,3.0,-3.0,6.0,3,51.638826,0.871191,0.0,-3.0,-16.0,-7.0,-25.0,10.0,5.0,2.0,-4.0,-1.0


In [59]:
model = load('files/models/E0_model.joblib')
input = get_diff_dataframe('Tottenham', "Liverpool").values

pred = model.predict(input)
pred



array([-1.09])

# Alt under er kladd

In [21]:
'''data = util.clean_data(data)
draw_factor = data['FTR'].value_counts(normalize=True)['D']
ELO = util.ELO(data, init_rating=1500, draw_factor=draw_factor, k_factor=32, home_advantage=50)
data = ELO.perform_simulations(data)
#for league in leagues:

data = util.add_form_column(data, 'FTHG', 'FTAG', n=5, operation='Sum', regard_opponent=False, include_current=False)
data['Diff_goals_scored'] = data['FTHG_Sum_5'] - data['FTAG_Sum_5']
data = util.add_form_column(data, 'FTHG', 'FTAG', n=5, operation='Sum', regard_opponent=True, include_current=False)
data['Diff_goals_conceded'] = data['FTHG_Sum_5_opponent'] - data['FTAG_Sum_5_opponent']
data['Home Goal Difference last 5'] = data['FTHG_Sum_5'] - data['FTHG_Sum_5_opponent']
data['Away Goal Difference last 5'] = data['FTAG_Sum_5'] - data['FTAG_Sum_5_opponent']
data['Diff_goal_diff'] = data['Home Goal Difference last 5'] - data['Away Goal Difference last 5']
data = util.add_form_column(data, 'Home', 'Away', n=5, operation='Points', regard_opponent=False, include_current=False)
data['Diff_points'] = data['Home_Points_5'] - data['Away_Points_5']
data = util.add_form_column(data, 'Home ELO', 'Away ELO', n=5, operation='Change', regard_opponent=False, include_current=True)
data['Diff_change_in_ELO'] = data['Home ELO_Change_5'] - data['Away ELO_Change_5']
data = util.add_form_column(data, 'Home ELO', 'Away ELO', n=5, operation='Mean', regard_opponent=True, include_current=False)
data['Diff_opposition_mean_ELO'] = data['Home ELO_Mean_5_opponent'] - data['Away ELO_Mean_5_opponent']
data = util.add_form_column(data, 'HST', 'AST', n=5, operation='Sum', regard_opponent=False, include_current=False)
data['Diff_shots_on_target_attempted'] = data['HST_Sum_5'] - data['AST_Sum_5']
data = util.add_form_column(data, 'HST', 'AST', n=5, operation='Sum', regard_opponent=True, include_current=False)
data['Diff_shots_on_target_allowed'] = data['HST_Sum_5_opponent'] - data['AST_Sum_5_opponent']
data = util.add_form_column(data, 'HS', 'AS', n=5, operation='Sum', regard_opponent=False, include_current=False)
data['Diff_shots_attempted'] = data['HS_Sum_5'] - data['AS_Sum_5']
data = util.add_form_column(data, 'HS', 'AS', n=5, operation='Sum', regard_opponent=True, include_current=False)
data['Diff_shots_allowed'] = data['HS_Sum_5_opponent'] - data['AS_Sum_5_opponent']
data = util.add_form_column(data, 'HC', 'AC', n=5, operation='Sum', regard_opponent=False, include_current=False)
data['Diff_corners_awarded'] = data['HC_Sum_5'] - data['AC_Sum_5']
data = util.add_form_column(data, 'HC', 'AC', n=5, operation='Sum', regard_opponent=True, include_current=False)
data['Diff_corners_conceded'] = data['HC_Sum_5_opponent'] - data['AC_Sum_5_opponent']
data = util.add_form_column(data, 'HF', 'AF', n=5, operation='Sum', regard_opponent=False, include_current=False)
data['Diff_fouls_commited'] = data['HF_Sum_5'] - data['AF_Sum_5']
data = util.add_form_column(data, 'HF', 'AF', n=5, operation='Sum', regard_opponent=True, include_current=False)
data['Diff_fouls_suffered'] = data['HF_Sum_5_opponent'] - data['AF_Sum_5_opponent']
data = util.add_form_column(data, 'HY', 'AY', n=5, operation='Sum', regard_opponent=False, include_current=False)
data['Diff_yellow_cards'] = data['HY_Sum_5'] - data['AY_Sum_5']
data = util.add_form_column(data, 'HR', 'AR', n=5, operation='Sum', regard_opponent=False, include_current=False)
data['Diff_red_cards'] = data['HR_Sum_5'] - data['AR_Sum_5']
columns_to_remove = [
	"FTR",
	"HTHG",
	"HTAG",
	"HTR",
	"HS",
	"AS",
	"HST",
	"AST",
	"HF",
	"AF",
	"HC",
	"AC",
	"HY",
	"AY",
	"HR",
	"AR",
	"Home ELO",
	"Away ELO",
	"FTHG_Sum_5",
	"FTAG_Sum_5",
	"FTHG_Sum_5_opponent",
	"FTAG_Sum_5_opponent",
	"Home Goal Difference last 5",
	"Away Goal Difference last 5",
	"Home_Points_5",
	"Away_Points_5",
	"Home ELO_Change_5",
	"Away ELO_Change_5",
	"Home ELO_Mean_5_opponent",
	"Away ELO_Mean_5_opponent",
	"HST_Sum_5",
	"AST_Sum_5",
	"HST_Sum_5_opponent",
	"AST_Sum_5_opponent",
	"HS_Sum_5",
	"AS_Sum_5",
	"HS_Sum_5_opponent",
	"AS_Sum_5_opponent",
	"HC_Sum_5",
	"AC_Sum_5",
	"HC_Sum_5_opponent",
	"AC_Sum_5_opponent",
	"HF_Sum_5",
	"AF_Sum_5",
	"HF_Sum_5_opponent",
	"AF_Sum_5_opponent",
	"HY_Sum_5",
	"AY_Sum_5",
	"HR_Sum_5",
	"AR_Sum_5",
]
data.drop(columns=columns_to_remove, inplace=True)
#data.to_csv("files/data/Prepared_data_E0.csv", index=False)'''

'data = util.clean_data(data)\ndraw_factor = data[\'FTR\'].value_counts(normalize=True)[\'D\']\nELO = util.ELO(data, init_rating=1500, draw_factor=draw_factor, k_factor=32, home_advantage=50)\ndata = ELO.perform_simulations(data)\n#for league in leagues:\n\ndata = util.add_form_column(data, \'FTHG\', \'FTAG\', n=5, operation=\'Sum\', regard_opponent=False, include_current=False)\ndata[\'Diff_goals_scored\'] = data[\'FTHG_Sum_5\'] - data[\'FTAG_Sum_5\']\ndata = util.add_form_column(data, \'FTHG\', \'FTAG\', n=5, operation=\'Sum\', regard_opponent=True, include_current=False)\ndata[\'Diff_goals_conceded\'] = data[\'FTHG_Sum_5_opponent\'] - data[\'FTAG_Sum_5_opponent\']\ndata[\'Home Goal Difference last 5\'] = data[\'FTHG_Sum_5\'] - data[\'FTHG_Sum_5_opponent\']\ndata[\'Away Goal Difference last 5\'] = data[\'FTAG_Sum_5\'] - data[\'FTAG_Sum_5_opponent\']\ndata[\'Diff_goal_diff\'] = data[\'Home Goal Difference last 5\'] - data[\'Away Goal Difference last 5\']\ndata = util.add_form_column(d

## Train model

In [22]:
'''data["Outcome"] = data.apply(
	lambda row: (row["FTHG"] - row["FTAG"]),
	axis=1,
)

X = data.copy().drop(
	columns=["Outcome", "FTHG", "FTAG", "Season", "Div", "Date", "HomeTeam", "AwayTeam"],
)
y = data["Outcome"]

rf = RandomForestRegressor(n_estimators=200, random_state=42)

##----------
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42)
rf.fit(X_train,y_train)
predictions = rf.predict(X_test)

def categorize_preds(pred_arr, hw, hl):
    categories = np.where(pred_arr > hw, 1, 
                          np.where((pred_arr <= hw) & (pred_arr > hl), 0, 
                                   -1))
    return np.array(categories)


def categorize_goal_diff(y_test):
    categories = np.where(y_test > 0, 1, 
                        np.where((y_test == 0 ), 0, 
                                -1))
    return np.array(categories)

categorized_preds = categorize_preds(predictions, 1, -1)
categorized_goal_diff = categorize_goal_diff(y_test)

report = classification_report(categorized_goal_diff, categorized_preds)
print(report)

def remove_uncertain(predictions, targets):
    # Create a boolean mask where predictions is not 0
    mask = predictions != 0
    
    # Use the mask to filter both predictions and targets
    filtered_predictions = predictions[mask]
    filtered_targets = targets[mask]
    
    return filtered_predictions, filtered_targets

filtered_predictions, filtered_targets = remove_uncertain(categorized_preds, categorized_goal_diff)

report = classification_report(filtered_predictions, filtered_targets)
print(report)

##---------
data['Predicted_Outcome'] = cross_val_predict(rf, X, y, cv=5)
# Correctly predicted over 0
correct_home_win = ((data['Predicted_Outcome'] > 0) & (data['Outcome'] > 0)).sum()

# Correctly predicted below 0
correct_away_win = ((data['Predicted_Outcome'] < 0) & (data['Outcome'] < 0)).sum()

# Predicted over 0 but was under 0
wrongly_predicted_home_win = ((data['Predicted_Outcome'] > 0) & (data['Outcome'] <= 0)).sum()

# Predicted below 0 but was over 0
wrongly_predicted_away_win = ((data['Predicted_Outcome'] < 0) & (data['Outcome'] >= 0)).sum()
print('Home wins correct: ', correct_home_win, 'Wrongly predict home win: ', wrongly_predicted_home_win, ' Amount of home wins picks correct: ', correct_home_win/(wrongly_predicted_home_win+correct_home_win))
print('Away wins correct: ', correct_away_win, 'Wrongly predict away win: ', wrongly_predicted_away_win, ' Amount of away wins picks correct: ', correct_away_win/(wrongly_predicted_away_win+correct_away_win))
print('Correct picks: ', (correct_home_win+correct_away_win), 'Amount correct: ', (correct_home_win+correct_away_win)/(correct_home_win+correct_away_win+wrongly_predicted_home_win+wrongly_predicted_away_win))

data = data[(data['Predicted_Outcome'] <= -1) | (data['Predicted_Outcome'] >= 1)]
correct_home_win = ((data['Predicted_Outcome'] > 0) & (data['Outcome'] > 0)).sum()

# Correctly predicted below 0
correct_away_win = ((data['Predicted_Outcome'] < 0) & (data['Outcome'] < 0)).sum()

# Predicted over 0 but was under 0
wrongly_predicted_home_win = ((data['Predicted_Outcome'] > 0) & (data['Outcome'] <= 0)).sum()

# Predicted below 0 but was over 0
wrongly_predicted_away_win = ((data['Predicted_Outcome'] < 0) & (data['Outcome'] >= 0)).sum()
print('Home wins correct: ', correct_home_win, 'Wrongly predict home win: ', wrongly_predicted_home_win, ' Amount of home wins picks correct: ', correct_home_win/(wrongly_predicted_home_win+correct_home_win))
print('Away wins correct: ', correct_away_win, 'Wrongly predict away win: ', wrongly_predicted_away_win, ' Amount of away wins picks correct: ', correct_away_win/(wrongly_predicted_away_win+correct_away_win))
print('Correct picks: ', (correct_home_win+correct_away_win), 'Amount correct: ', (correct_home_win+correct_away_win)/(correct_home_win+correct_away_win+wrongly_predicted_home_win+wrongly_predicted_away_win))
'''
'''


SyntaxError: incomplete input (3385044623.py, line 85)

In [70]:
'''X_full = data.drop(columns=["Outcome", "FTHG", "FTAG", "Season", "Div", "Date", "HomeTeam", "AwayTeam"])
data["Predicted_Outcome"] = rf.predict(X_full)

# Step 3: Classify predictions based on thresholds
def classify_prediction(pred):
    if pred >= 1:
        return "Predicted Home Win"
    elif pred <= -1:
        return "Predicted Away Win"
    else:
        return "Too Close To Call"

# Apply the classification function to the predictions
data["Prediction_Category"] = data["Predicted_Outcome"].apply(classify_prediction)

# Step 4: Display or save the updated dataset
print(data[["Date", "HomeTeam", 'AwayTeam', "Outcome", "Predicted_Outcome", "Prediction_Category"]].tail(20))

correct_home_win = ((data['Predicted_Outcome'] > 0) & (data['Outcome'] > 0)).sum()

# Correctly predicted below 0
correct_away_win = ((data['Predicted_Outcome'] < 0) & (data['Outcome'] < 0)).sum()

# Predicted over 0 but was under 0
wrongly_predicted_home_win = ((data['Predicted_Outcome'] > 0) & (data['Outcome'] <= 0)).sum()

# Predicted below 0 but was over 0
wrongly_predicted_away_win = ((data['Predicted_Outcome'] < 0) & (data['Outcome'] >= 0)).sum()
print('Home wins correct: ', correct_home_win, 'Wrongly predict home win: ', wrongly_predicted_home_win, ' Amount of home wins picks correct: ', correct_home_win/(wrongly_predicted_home_win+correct_home_win))
print('Away wins correct: ', correct_away_win, 'Wrongly predict away win: ', wrongly_predicted_away_win, ' Amount of away wins picks correct: ', correct_away_win/(wrongly_predicted_away_win+correct_away_win))
print('Correct picks: ', (correct_home_win+correct_away_win), 'Amount correct: ', (correct_home_win+correct_away_win)/(correct_home_win+correct_away_win+wrongly_predicted_home_win+wrongly_predicted_away_win))

data = data[(data['Predicted_Outcome'] <= -1) | (data['Predicted_Outcome'] >= 1)]
correct_home_win = ((data['Predicted_Outcome'] > 0) & (data['Outcome'] > 0)).sum()

# Correctly predicted below 0
correct_away_win = ((data['Predicted_Outcome'] < 0) & (data['Outcome'] < 0)).sum()

# Predicted over 0 but was under 0
wrongly_predicted_home_win = ((data['Predicted_Outcome'] > 0) & (data['Outcome'] <= 0)).sum()

# Predicted below 0 but was over 0
wrongly_predicted_away_win = ((data['Predicted_Outcome'] < 0) & (data['Outcome'] >= 0)).sum()
print('Home wins correct: ', correct_home_win, 'Wrongly predict home win: ', wrongly_predicted_home_win, ' Amount of home wins picks correct: ', correct_home_win/(wrongly_predicted_home_win+correct_home_win))
print('Away wins correct: ', correct_away_win, 'Wrongly predict away win: ', wrongly_predicted_away_win, ' Amount of away wins picks correct: ', correct_away_win/(wrongly_predicted_away_win+correct_away_win))
print('Correct picks: ', (correct_home_win+correct_away_win), 'Amount correct: ', (correct_home_win+correct_away_win)/(correct_home_win+correct_away_win+wrongly_predicted_home_win+wrongly_predicted_away_win))'''

           Date        HomeTeam        AwayTeam  Outcome  Predicted_Outcome  \
7358 2024-12-05          Fulham        Brighton      2.0              1.565   
7359 2024-12-05     Bournemouth       Tottenham      1.0              0.515   
7360 2024-12-07     Aston Villa     Southampton      1.0              1.250   
7361 2024-12-07       Brentford       Newcastle      2.0              0.900   
7362 2024-12-07  Crystal Palace        Man City      0.0             -0.385   
7363 2024-12-07      Man United   Nott'm Forest     -1.0             -0.520   
7364 2024-12-08          Fulham         Arsenal      0.0             -0.230   
7365 2024-12-08         Ipswich     Bournemouth     -1.0             -0.865   
7366 2024-12-08       Leicester        Brighton      0.0             -0.150   
7367 2024-12-08       Tottenham         Chelsea     -1.0             -0.655   
7368 2024-12-09        West Ham          Wolves      1.0              0.990   
7369 2024-12-14         Arsenal         Everton     