In [1]:
from src.simulation.group_phase import simulate_group_phase
from src.simulation.bracket import get_knockout_games
from src.model import GoalSampler
import pandas as pd
from collections import defaultdict
from tqdm import tqdm

groups = ["A", "B", "C", "D", "E", "F"]
group_games = pd.read_csv(r"data\cleaned\2024_games.csv", sep=";")
group_games = group_games[group_games["group"].notna()]
market_values = pd.read_csv(r"data\cleaned\2024_market_values.csv", sep=";", index_col="Country")["MarketValue"]
goal_sampler = GoalSampler()
stages = ["Round of 16", "Quarterfinals", "Semifinals", "Final"]
results = {stage: defaultdict(int) for stage in stages}
results["Winner"] = defaultdict(int)
results["Finalists"] = defaultdict(int)
results["group_placements"] = {group: defaultdict(int) for group in groups}
# TODO: Add most goals scored by a team
results["most_goals"] = defaultdict(int)

def simulate_tournament(group_games: pd.DataFrame, market_values: pd.Series) -> str:
    global results
    group_results = simulate_group_phase(group_games)

    goals = {team: goals for team, goals in zip(group_results.group_results.index.get_level_values(1), group_results.group_results["goals"])}
    for group in groups:
        placements = group_results.group_results.loc[group].index.tolist()
        results["group_placements"][group]["|".join(placements)] += 1
        
    next_games = get_knockout_games(group_results)

    for stage in stages:
        for team1, team2 in next_games:
            results[stage][team1] += 1
            results[stage][team2] += 1


        round_winners = []
        for team1, team2 in next_games:
            winner, goals_team1, goals_team2 = goal_sampler.get_knockout_stage_winner(team1, team2, market_values[team1], market_values[team2])
            goals[team1] += goals_team1
            goals[team2] += goals_team2
            round_winners.append(winner)
        if len(round_winners) == 1:
            results["Winner"][round_winners[0]] += 1
            finalists = "-".join(sorted(next_games[0]))
            results["Finalists"][finalists] += 1
            results["most_goals"][max(goals, key=goals.get)] += 1
            return
        next_games = [round_winners[i:i+2] for i in range(0, len(round_winners), 2)]

In [2]:
n_runs = 500_000
pbar = tqdm(total=n_runs)

for simulation_run in range(n_runs):
    simulate_tournament(group_games, market_values)
    pbar.update(1)

100%|██████████| 500000/500000 [4:22:19<00:00, 30.81it/s]  

In [4]:
import json

with open("results.json", "w") as f:
    json.dump(results, f)

{'A': defaultdict(int,
             {'Germany|Switzerland|Scotland|Hungary': 52020,
              'Scotland|Germany|Switzerland|Hungary': 21607,
              'Germany|Hungary|Switzerland|Scotland': 43422,
              'Germany|Scotland|Switzerland|Hungary': 48060,
              'Switzerland|Hungary|Germany|Scotland': 11519,
              'Germany|Switzerland|Hungary|Scotland': 48546,
              'Scotland|Germany|Hungary|Switzerland': 19232,
              'Scotland|Switzerland|Germany|Hungary': 12270,
              'Switzerland|Germany|Hungary|Scotland': 24284,
              'Hungary|Scotland|Germany|Switzerland': 8912,
              'Germany|Scotland|Hungary|Switzerland': 42141,
              'Switzerland|Germany|Scotland|Hungary': 25542,
              'Germany|Hungary|Scotland|Switzerland': 40132,
              'Scotland|Hungary|Switzerland|Germany': 5445,
              'Hungary|Switzerland|Germany|Scotland': 10556,
              'Switzerland|Scotland|Hungary|Germany': 6438,
    

In [23]:
import pandas as pd

def get_odds(key: str) -> pd.Series:
    as_series = pd.Series(results[key])
    percentage = as_series / n_runs
    return 1 / percentage

for bet_type in results:
    # if bet_type == "group_placements":
    #     for group_name, group_placements in results[bet_type].items():
    #         odds = get_odds(group)
    #         odds.sort_values().to_excel(f"out/{group}.xlsx")
    # else:
        odds = get_odds(bet_type)
        odds.sort_values().to_excel(f"out/{bet_type}.xlsx")

TypeError: unsupported operand type(s) for /: 'collections.defaultdict' and 'int'

In [8]:
odds.sort_values().to_excel("odds.xlsx")

In [None]:
as_series = pd.Series(results["Winner"])
percentage = (as_series / as_series.sum()).sort_values()

In [None]:
percentage["Turkey"] + percentage["Georgia"] + percentage["Portugal"] + percentage["Czech Republic"]

0.13943

In [16]:
results["Winner"]

defaultdict(int,
            {'Spain': 46857,
             'England': 156165,
             'Germany': 30893,
             'Portugal': 59445,
             'France': 95394,
             'Denmark': 6654,
             'Italy': 19161,
             'Belgium': 13800,
             'Scotland': 2512,
             'Croatia': 4266,
             'Netherlands': 30478,
             'Poland': 2122,
             'Switzerland': 3670,
             'Hungary': 2161,
             'Romania': 1477,
             'Serbia': 4040,
             'Turkey': 4125,
             'Slovakia': 2096,
             'Czech Republic': 2058,
             'Ukraine': 5366,
             'Slovenia': 1639,
             'Georgia': 1788,
             'Austria': 2521,
             'Albania': 1312})

In [19]:
get_odds("Quarterfinals")

Spain             12.779349
Germany           14.838997
Portugal          12.890959
Ukraine           27.782794
Belgium           19.106850
Czech Republic    51.882693
England            9.897462
Switzerland       31.095736
Serbia            34.022863
Turkey            38.387348
France            11.139331
Croatia           30.256042
Poland            49.395522
Netherlands       15.593265
Hungary           39.126692
Italy             16.831971
Romania           48.352392
Scotland          35.810527
Slovenia          51.794686
Austria           45.296522
Albania           50.176873
Slovakia          41.662327
Georgia           55.910431
Denmark           27.346127
dtype: float64

In [18]:
1 / 0.4

2.5

In [20]:
as_series = pd.Series(results["Quarterfinals"])
percentage = as_series / as_series.sum()

In [21]:
as_series

Spain             313005
Germany           269560
Portugal          310295
Ukraine           143974
Belgium           209349
Czech Republic     77097
England           404144
Switzerland       128635
Serbia            117568
Turkey            104201
France            359088
Croatia           132205
Poland             80979
Netherlands       256521
Hungary           102232
Italy             237643
Romania            82726
Scotland          111699
Slovenia           77228
Austria            88307
Albania            79718
Slovakia           96010
Georgia            71543
Denmark           146273
dtype: int64

In [22]:
percentage

Spain             0.078251
Germany           0.067390
Portugal          0.077574
Ukraine           0.035993
Belgium           0.052337
Czech Republic    0.019274
England           0.101036
Switzerland       0.032159
Serbia            0.029392
Turkey            0.026050
France            0.089772
Croatia           0.033051
Poland            0.020245
Netherlands       0.064130
Hungary           0.025558
Italy             0.059411
Romania           0.020681
Scotland          0.027925
Slovenia          0.019307
Austria           0.022077
Albania           0.019929
Slovakia          0.024002
Georgia           0.017886
Denmark           0.036568
dtype: float64