In [None]:
import pandas as pd
import os
os.chdir(r'D:\Projects\football-odds-analysis')
import plotly.express as px
from utils.save_as_video import plotly_fig_to_video
from utils.preprocess import preprocess_pipeline

# Bookmakers list
# Bet365
# Bet&Win
# Interwetten
# William_Hill
# VC_Bet

Divisions = {
    'E0': 'English Premier League',
    'E1': 'Championship',
    'E2': 'English League 1',
    'E3': 'English League 2',
    'EC': 'English Conference',
    'D1': 'Bundesliga 1',
    'D2': 'Bundesliga 2',
    'I1': 'Serie A',
    'I2': 'Serie B',
    'SP1': 'La Liga Primera',
    'SP2': 'La Liga Segunda',
    'F1': 'Le Championnat',
    'F2': 'France Division 2',
    'N1': 'Netherlands',
    'B1': 'Belgium',
    'P1': 'Portugal',
    'T1': 'Turkey',
    'G1': 'Greece',
}


def get_avg_margin(bookmaker: pd.DataFrame, 
                   company_name: str,
                   col_name: str = 'return_on_game') -> pd.DataFrame:
    """gets a bookmakers data and calulcates the average yearly margin og bookmaker over different leagues

    Args:
        bookmaker (pd.DataFramem): bookmakers data should include `Div`,`Date`, `return_on_game`
        company_name (str): bookmaker's name
        col_name (str, optional): column name for bookmaker's margin Defaults to 'return_on_game'.

    Returns:
        pd.DataFrame: average yearly margin og bookmaker over different leagues
    """
    bookmaker['Date'] = pd.to_datetime(bookmaker['Date'])
    bookmaker['Year'] = bookmaker['Date'].dt.year
    avg_margin_div_yearly = bookmaker.groupby(['Div', 'Year']).apply(lambda r: r[col_name].mean())
    avg_margin_div_yearly = avg_margin_div_yearly.reset_index()
    avg_margin_div_yearly['Bookmaker'] = company_name
    avg_margin_div_yearly.columns = ['Div', 'Year', 'average_margin', 'Bookmaker']
    return avg_margin_div_yearly

bookmakers_data = preprocess_pipeline('all_avail_games.csv')
avg_margin_data = pd.DataFrame()

for key in bookmakers_data.keys():
    if key != 'AVG':
        avg_margin_data = pd.concat([avg_margin_data, get_avg_margin(bookmakers_data[key],
                                                                     key)], axis=0)
    else:
        avg_margin_data = pd.concat([avg_margin_data, get_avg_margin(bookmakers_data[key], 
                                                                     key, 'Avg_return_on_game')], axis=0)

avg_margin_data = avg_margin_data.loc[:,~avg_margin_data.columns.duplicated()]
avg_margin_data.columns = ['League', 'Year', 'Average Margin of Games', 'Bookmaker']
avg_margin_data['League Name'] = avg_margin_data['League'].map(Divisions)

The average margin of bookmakers per game in different years, leagues and companies 

In [None]:
fig = px.scatter(avg_margin_data, x="League Name", y="Average Margin of Games", animation_frame="Year",
                 color="Bookmaker", range_y=[0.01,0.2])
fig.update_traces(marker_size=10)
fig.update_layout(
    autosize=False,
    width=900,
    height=500,)
fig

Is there a signifacant difference on margins of different companies in different years and leagues when the probablity of the game is heavily skewed (HomeTeam Win probability or AwayTeam Win probability is more than 80%)