In [1]:
import pandas as pd
import os
os.chdir(r'D:\Projects\football-odds-analysis')
from utils.preprocess import preprocess_pipeline

# Bookmakers list
# Bet365
# Bet&Win
# Interwetten
# William_Hill
# VC_Bet


def get_avg_margin(bookmaker: pd.DataFrame, 
                   company_name: str,
                   col_name: str = 'return_on_game') -> pd.DataFrame:
    """gets a bookmakers data and calulcates the average yearly margin og bookmaker over different leagues

    Args:
        bookmaker (pd.DataFramem): bookmakers data should include `Div`,`Date`, `return_on_game`
        company_name (str): bookmaker's name
        col_name (str, optional): column name for bookmaker's margin Defaults to 'return_on_game'.

    Returns:
        pd.DataFrame: average yearly margin og bookmaker over different leagues
    """
    bookmaker['Date'] = pd.to_datetime(bookmaker['Date'])
    bookmaker['Year'] = bookmaker['Date'].dt.year
    avg_margin_div_yearly = bookmaker.groupby(['Div', 'Year']).apply(lambda r: r[col_name].mean())
    avg_margin_div_yearly = avg_margin_div_yearly.reset_index()
    avg_margin_div_yearly['Bookmaker'] = company_name
    avg_margin_div_yearly.columns = ['Div', 'Year', 'average_margin', 'Bookmaker']
    return avg_margin_div_yearly

bookmakers_data = preprocess_pipeline('all_avail_games.csv')
avg_margin_data = pd.DataFrame()

for key in bookmakers_data.keys():
    if key != 'AVG':
        avg_margin_data = pd.concat([avg_margin_data, get_avg_margin(bookmakers_data[key],
                                                                     key)], axis=0)
    else:
        avg_margin_data = pd.concat([avg_margin_data, get_avg_margin(bookmakers_data[key], 
                                                                     key, 'Avg_return_on_game')], axis=0)

avg_margin_data = avg_margin_data.loc[:,~avg_margin_data.columns.duplicated()]
avg_margin_data.columns = ['League', 'Year', 'Average Margin of Games', 'Bookmaker']
avg_margin_data.head()

Unnamed: 0,League,Year,Average Margin of Games,Bookmaker
0,B1,2005,0.119081,Bet365
1,B1,2006,0.117174,Bet365
2,B1,2007,0.111013,Bet365
3,B1,2008,0.087826,Bet365
4,B1,2009,0.079723,Bet365


In [2]:
import plotly.express as px
fig = px.scatter(avg_margin_data, x="League", y="Average Margin of Games", animation_frame="Year",
                 color="Bookmaker", range_y=[0.01,0.2])
fig.update_traces(marker_size=10)
fig.update_layout(
    autosize=False,
    width=1200,
    height=600,)
fig