In [15]:
import pandas as pd
import numpy as np

In [16]:
# Load the datasets
teams = pd.read_csv('data/teams.csv').convert_dtypes()
results = pd.read_csv('data/results.csv').convert_dtypes()
fixtures = pd.read_csv('data/fixtures.csv').convert_dtypes()
players = pd.read_csv('data/players.csv').convert_dtypes()
startingXI = pd.read_csv('data/startingXI.csv').convert_dtypes()
odds = pd.read_csv('data/odds.csv').convert_dtypes()

In [17]:
results.columns

Index(['SeasonID', 'Gameweek', 'MatchID', 'HomeTeamID', 'HomeScore',
       'HomeShots', 'AwayTeamID', 'AwayScore', 'AwayShots'],
      dtype='object')

In [18]:

# Calculate total goals scored and conceded by each team in home and away matches
team_stats_home = results.groupby('HomeTeamID').agg({'HomeScore': 'sum', 'AwayScore': 'sum'}).reset_index()
team_stats_home = team_stats_home.rename(columns={'HomeTeamID': 'team_id', 'HomeScore': 'home_goals_scored', 'AwayScore': 'home_goals_conceded'})

team_stats_away = results.groupby('AwayTeamID').agg({'HomeScore': 'sum', 'AwayScore': 'sum'}).reset_index()
team_stats_away = team_stats_away.rename(columns={'AwayTeamID': 'team_id', 'HomeScore': 'away_goals_conceded', 'AwayScore': 'away_goals_scored'})

team_stats = team_stats_home.merge(team_stats_away, on='team_id')

# Calculate average goals per match
team_stats['avg_home_goals_scored'] = team_stats['home_goals_scored'] / results['HomeTeamID'].value_counts().sort_index().values
team_stats['avg_home_goals_conceded'] = team_stats['home_goals_conceded'] / results['HomeTeamID'].value_counts().sort_index().values
team_stats['avg_away_goals_scored'] = team_stats['away_goals_scored'] / results['AwayTeamID'].value_counts().sort_index().values
team_stats['avg_away_goals_conceded'] = team_stats['away_goals_conceded'] / results['AwayTeamID'].value_counts().sort_index().values



In [19]:
team_stats

Unnamed: 0,team_id,home_goals_scored,home_goals_conceded,away_goals_conceded,away_goals_scored,avg_home_goals_scored,avg_home_goals_conceded,avg_away_goals_scored,avg_away_goals_conceded
0,1,99,61,74,73,1.833333,1.12963,1.351852,1.37037
1,2,64,107,147,43,1.185185,1.981481,0.796296,2.722222
2,3,66,108,105,60,1.222222,2.0,1.111111,1.944444
3,4,122,38,46,113,2.259259,0.703704,2.092593,0.851852
4,5,130,48,59,113,2.407407,0.888889,2.092593,1.092593
5,6,106,53,56,105,1.962963,0.981481,1.944444,1.037037
6,7,95,60,83,98,1.759259,1.111111,1.814815,1.537037
7,8,148,48,53,102,2.740741,0.888889,1.888889,0.981481
8,9,61,79,93,52,1.12963,1.462963,0.962963,1.722222
9,10,57,110,140,34,1.055556,2.037037,0.62963,2.592593


In [20]:
# Merge team statistics with fixtures
fixtures_with_stats = fixtures.merge(team_stats[['team_id', 'avg_home_goals_scored', 'avg_home_goals_conceded']], left_on='HomeTeamID', right_on='team_id')


fixtures_with_stats = fixtures_with_stats.merge(team_stats[['team_id', 'avg_away_goals_scored', 'avg_away_goals_conceded']], left_on='AwayTeamID', right_on='team_id', suffixes=('_home', '_away'))

# Calculate expected goals for each team
fixtures_with_stats['expected_home_goals'] = (fixtures_with_stats['avg_home_goals_scored'] + fixtures_with_stats['avg_away_goals_conceded']) / 2
fixtures_with_stats['expected_away_goals'] = (fixtures_with_stats['avg_away_goals_scored'] + fixtures_with_stats['avg_home_goals_conceded']) / 2

# Predict goals using Poisson distribution
np.random.seed(42)
fixtures_with_stats['predicted_home_goals'] = np.random.poisson(fixtures_with_stats['expected_home_goals'])
fixtures_with_stats['predicted_away_goals'] = np.random.poisson(fixtures_with_stats['expected_away_goals'])

# Display the predicted outcomes
predicted_fixtures_with_stats = fixtures_with_stats[['HomeTeamID', 'AwayTeamID', 'predicted_home_goals', 'predicted_away_goals']]

In [21]:
predict_actual_results = fixtures_with_stats[['MatchID', 'team_id_home', 'team_id_away', 'predicted_home_goals', 'predicted_away_goals']].merge(results[['MatchID','HomeScore', 'AwayScore']], on='MatchID')

In [22]:
predict_actual_results

Unnamed: 0,MatchID,team_id_home,team_id_away,predicted_home_goals,predicted_away_goals,HomeScore,AwayScore
0,757,2,1,2,2,1,2
1,758,28,3,1,1,5,1
2,759,27,4,0,4,0,1
3,760,26,5,0,1,2,6
4,761,25,6,2,3,0,3
...,...,...,...,...,...,...,...
751,1508,10,19,0,2,0,6
752,1509,11,18,3,1,5,2
753,1510,12,17,1,0,3,3
754,1511,13,16,0,1,5,1


In [23]:
# Calculate actual and predicted match points: win 3 , draw 1 , loss 0

predict_actual_results['Actual_HomePoints'] = predict_actual_results.apply(lambda x: 3 if x['HomeScore'] > x['AwayScore'] else (1 if x['HomeScore'] == x['AwayScore'] else 0), axis=1)
predict_actual_results['Actual_AwayPoints'] = predict_actual_results.apply(lambda x: 3 if x['HomeScore'] < x['AwayScore'] else (1 if x['HomeScore'] == x['AwayScore'] else 0), axis=1)

predict_actual_results['Pred_HomePoints'] = predict_actual_results.apply(lambda x: 3 if x['predicted_home_goals'] > x['predicted_away_goals'] else (1 if x['predicted_home_goals'] == x['predicted_away_goals'] else 0), axis=1)
predict_actual_results['Pred_AwayPoints'] = predict_actual_results.apply(lambda x: 3 if x['predicted_home_goals'] < x['predicted_away_goals'] else (1 if x['predicted_home_goals'] == x['predicted_away_goals'] else 0), axis=1)


In [26]:
predict_actual_results

Unnamed: 0,MatchID,team_id_home,team_id_away,predicted_home_goals,predicted_away_goals,HomeScore,AwayScore,Actual_HomePoints,Actual_AwayPoints,Pred_HomePoints,Pred_AwayPoints
0,757,2,1,2,2,1,2,0,3,1,1
1,758,28,3,1,1,5,1,3,0,1,1
2,759,27,4,0,4,0,1,0,3,0,3
3,760,26,5,0,1,2,6,0,3,0,3
4,761,25,6,2,3,0,3,0,3,0,3
...,...,...,...,...,...,...,...,...,...,...,...
751,1508,10,19,0,2,0,6,0,3,0,3
752,1509,11,18,3,1,5,2,3,0,3,0
753,1510,12,17,1,0,3,3,1,1,3,0
754,1511,13,16,0,1,5,1,3,0,0,3


In [27]:
predict_actual_results['Correct Match'] = predict_actual_results.apply(lambda x: 1 if x['Actual_HomePoints'] == x['Pred_HomePoints'] else 0, axis=1)

In [28]:
predict_actual_results['Correct Match'].value_counts()

Correct Match
0    433
1    323
Name: count, dtype: int64