In [None]:
import pandas as pd
import os
import numpy as np
import tqdm
import pickle
import plotly.graph_objects as go
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("darkgrid")
os.chdir(r'D:\Projects\football-odds-analysis')
from utils.preprocess import preprocess_odds_results, preprocess_pipeline
from input_variables import COMPANY_BETS, CO_CODES
from utils.bet_payout_engine import get_bet_results

IS_FIRST_RUN = False
N_CUTS_PROBABILITIES = 4

bookmakers_data = preprocess_pipeline('all_avail_games.csv')
odds_results_data = preprocess_odds_results('all_avail_games.csv')
bookmakers_odds = odds_results_data['Odds']
game_results = odds_results_data['Results']

if IS_FIRST_RUN:
    bookmakers_deciles = bookmakers_data['Bet365'][['Unique_ID', 'Date', 'Div']].dropna()
    bookmakers_deciles.set_index('Unique_ID', drop=True, inplace=True)
    bookmakers_deciles['Date'] = pd.to_datetime(bookmakers_deciles['Date'])
    bookmakers_deciles['Year'] = bookmakers_deciles['Date'].dt.year
    intervals = np.linspace(0, 1, N_CUTS_PROBABILITIES+1)
    deciles_returns = {}
    for d in range(N_CUTS_PROBABILITIES):
        deciles_returns[d] = []
    
    for c in COMPANY_BETS:
        bookmakers_deciles[c] = pd.cut((1/bookmakers_odds[c]), intervals, labels=False)

    for game in tqdm.tqdm(bookmakers_odds.index):
        for c in COMPANY_BETS:
            decile = bookmakers_deciles.loc[game, c]
            year = bookmakers_deciles.loc[game, 'Year']
            div = bookmakers_deciles.loc[game, 'Div']
            bet = c[-1]
            company = CO_CODES[c[:-1]]
            deciles_returns[decile].append([game,year,div, get_bet_results(bookmakers_data, game, bet, company, 1) - 1])

    with open('deciles_returns.pickle', 'wb') as handle:
        pickle.dump(deciles_returns, handle, protocol=pickle.HIGHEST_PROTOCOL)
else:
    with open('deciles_returns.pickle', 'rb') as handle:
        deciles_returns = pickle.load(handle)

for key in deciles_returns.keys():
    decile_df = pd.DataFrame(deciles_returns[key])
    decile_df.columns = ['game_id', 'year', 'div', 'return']
    decile_df.set_index('game_id', drop=True, inplace=True)
    deciles_returns[key] = decile_df


In [None]:
N_Bets = pd.DataFrame(index=[f'Decile {s}' for s in range(N_CUTS_PROBABILITIES)], columns=['Number of Bets'])
for n in range(N_CUTS_PROBABILITIES):
    N_Bets.loc[f'Decile {n}', 'Number of Bets'] = len(deciles_returns[n])

fig = go.Figure(data=[go.Bar(x=N_Bets.index, y=N_Bets['Number of Bets'])])
fig.update_traces(marker_color='rgb(1, 36, 92)', marker_line_color='rgb(1, 36, 92)', opacity=0.8)
fig.update_layout(title_text='Number of bets in each deciles')
fig.update_xaxes(tickmode='linear')
fig.show()

In [None]:
decile_year_ret = pd.DataFrame()
for dec in deciles_returns.keys():
    decile_year_ret[dec] = deciles_returns[dec].groupby('year').apply(lambda r: r['return'].mean())
decile_year_ret.columns = [f'Decile {s} Avergae Return' for s in range(N_CUTS_PROBABILITIES)]
fig = px.line(decile_year_ret)
fig