### Using the below files for web scraping

https://www.football-data.co.uk/mmz4281/2425/E0.csv \
https://www.football-data.co.uk/mmz4281/2425/E1.csv \
https://www.football-data.co.uk/mmz4281/2425/E2.csv \
https://www.football-data.co.uk/mmz4281/2324/E0.csv \
https://www.football-data.co.uk/mmz4281/2324/E1.csv \
https://www.football-data.co.uk/mmz4281/2324/E2.csv \
https://www.football-data.co.uk/mmz4281/2223/E0.csv \
https://www.football-data.co.uk/mmz4281/2223/E1.csv \
https://www.football-data.co.uk/mmz4281/2223/E2.csv

In [1]:
import pandas as pd
import numpy as np

### Transforming CSV files as dataframes /
#### Total 3 seasons - Total 3 leagues in each seaeson - 1 dataframe per league

In [2]:
starting_year = 22
ending_year = 25

In [3]:
if(starting_year > ending_year):
    starting_year, ending_year = ending_year, starting_year
    
league_dictionary = {"Spanish La Liga": "SP1",
                     "Spanish Segunda Division": "SP2",
                     "German Bundesliga": "D1",
                     "English Premier League": "E0",
                     "English League 1": "E1",
                     "English League 2": "E2"}
premiership_season_data = {} # dictionary containing season-wise data 

In [4]:
for season in range(starting_year, ending_year):
    premiership_data_list = []
    for league in league_dictionary: # iterating through keys
        df = pd.read_csv('https://www.football-data.co.uk/mmz4281/' + str(season) + str(season+1) + '/' + league_dictionary[league] + '.csv')
        df.insert(1, "Season", "20" + str(season))
        df.rename(columns = {"Div": "League"}, inplace = True)
        premiership_data_list.append(df)
    premiership_data = pd.concat(premiership_data_list)
    premiership_season_data["20" + str(season)] = premiership_data

#### Filter out data based on specific criteria

In [5]:
premiership_season_data_list = []

terminate_at = (ending_year - starting_year) # terminate_at will be less than 1 of assigned value
for i in range(0, terminate_at):
    premiership_season_data_SP1_league = premiership_season_data["202" + str(i+2)]
    premiership_season_data_SP1_league = premiership_season_data_SP1_league[premiership_season_data_SP1_league["League"] == "SP1"]
    premiership_season_data_SP1_league = premiership_season_data_SP1_league[premiership_season_data_SP1_league["AvgCAHH"] > 1.85]
    premiership_season_data_list.append(premiership_season_data_SP1_league)

### Summarizing the data from 2022 to 2024

In [6]:
avg_cahh = []
avg_caha = []
max_caha = []
max_cahh = []

for i, season_data in enumerate(premiership_season_data_list):
    avg_cahh.append(season_data.describe().mean().AvgCAHH)
    avg_caha.append(season_data.describe().mean().AvgCAHA)
    max_caha.append(season_data.describe().mean().MaxCAHA)
    max_cahh.append(season_data.describe().mean().MaxCAHH)

#### Making a dataframe

In [7]:
premiership_summary = {"Avg_odds_for_Handicap_Home": avg_cahh,
                       "Avg_odds_for_Handicap_Away": avg_caha,
                       "Maximum_odds_for_Handicap_Home": max_cahh,
                       "Maximum_odds_for_Handicap_Away": max_caha
                      }

premiership_summary_df = pd.DataFrame(premiership_summary)

In [8]:
new_indices_dict = {}

for i in range(0, terminate_at):
    new_indices_dict[i] = "202" + str(i+2)
    
premiership_summary_df.rename(index= new_indices_dict, inplace=True)

In [9]:
premiership_summary_df

Unnamed: 0,Avg_odds_for_Handicap_Home,Avg_odds_for_Handicap_Away,Maximum_odds_for_Handicap_Home,Maximum_odds_for_Handicap_Away
2022,35.124705,35.048344,35.183037,35.108863
2023,35.865351,35.797619,35.951063,35.863067
2024,36.743654,36.676518,36.792623,36.731009
