In [None]:
import pandas as pd
import numpy as np

# Load dataset (update the path if necessary)
file_path = 'datasets_versions/EPL_dataset_9_20250305.csv'
data = pd.read_csv(file_path)

# League-wide averages for goals
avg_home_scored = data['FTHG'].sum() / 1900
avg_away_scored = data['FTAG'].sum() / 1900
avg_home_conceded = avg_away_scored
avg_away_conceded = avg_home_scored

print(f"Average Home Goals Scored: {avg_home_scored:.2f}")
print(f"Average Away Goals Scored: {avg_away_scored:.2f}")

# Group data by teams for Home and Away statistics
home_stats = data.groupby('HomeTeam').agg(
    HGS=('FTHG', 'sum'),  # Home Goals Scored
    HGC=('FTAG', 'sum')   # Home Goals Conceded
).reset_index()

away_stats = data.groupby('AwayTeam').agg(
    AGS=('FTAG', 'sum'),  # Away Goals Scored
    AGC=('FTHG', 'sum')   # Away Goals Conceded
).reset_index()

team_stats = home_stats.merge(away_stats, left_on='HomeTeam', right_on='AwayTeam', how='outer')
team_stats.rename(columns={'HomeTeam': 'Team'}, inplace=True)

# Calculate Attacking and Defensive Strengths // 13 games Home / 13 games Away x 5 seasons = 95
team_stats['HAS'] = (team_stats['HGS'] / 95) / avg_home_scored  # Home Attacking Strength
team_stats['HDS'] = (team_stats['HGC'] / 95) / avg_home_conceded  # Home Defensive Strength
team_stats['AAS'] = (team_stats['AGS'] / 95) / avg_away_scored  # Away Attacking Strength
team_stats['ADS'] = (team_stats['AGC'] / 95) / avg_away_conceded  # Away Defensive Strength

# Droping any unnecessary columns
team_stats.drop(columns=['AwayTeam'], inplace=True)

# Assigning a unique ID for each team
team_stats.insert(0, 'ID', range(1, len(team_stats) + 1))

# Display the table
print("\n=== Team Strength Table ===")
print(team_stats[['ID', 'Team', 'HAS', 'HDS', 'AAS', 'ADS']].to_string(index=False))

output_path = 'datasets_versions/EPL_dataset_with_team_strength.csv'
team_stats.to_csv(output_path, index=False)
print(f"\nDataset saved as {output_path}")


Average Home Goals Scored: 1.56
Average Away Goals Scored: 1.31

=== Team Strength Table ===
 ID             Team      HAS      HDS      AAS      ADS
  1          Arsenal 1.319865 0.827642 1.245480 0.700337
  2      Aston Villa 1.084175 1.084773 0.916031 0.936027
  3      Bournemouth 0.464646 0.691041 0.498192 0.787879
  4        Brentford 0.579125 0.586581 0.610687 0.632997
  5         Brighton 0.861953 0.964243 0.964243 0.936027
  6          Burnley 0.505051 0.948172 0.610687 0.794613
  7          Chelsea 1.090909 0.811571 1.253515 0.888889
  8   Crystal Palace 0.808081 0.948172 0.795500 1.016835
  9          Everton 0.760943 0.956207 0.763359 1.070707
 10           Fulham 0.478114 0.650864 0.530333 0.579125
 11            Leeds 0.491582 0.771394 0.634793 0.774411
 12        Leicester 0.848485 0.779429 0.980313 0.814815
 13        Liverpool 1.515152 0.634793 1.470470 0.740741
 14            Luton 0.188552 0.297308 0.192849 0.323232
 15         Man City 1.811448 0.626758 1.647248 0.55