In [1]:
import requests
import math
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup as bs

In [2]:
elo_ratings_url = "http://elofootball.com/"
elo_ratings_page = requests.get(elo_ratings_url)

In [3]:
if elo_ratings_page:
    page_parsed = bs(elo_ratings_page.text, 'html.parser')
    print("HTML parsed succesfully.")
else:
    print("Could not parse the HTML.")

HTML parsed succesfully.


In [4]:
all_trs = page_parsed.find_all("tr")
print("Total tr tags in the HTML:", len(all_trs))

Total tr tags in the HTML: 2025


In [5]:
teams = {}

# Since we cannot identify the table correctly, let's find the tr tags with the correct amount of td tags
for tr in all_trs:
    all_tds = tr.find_all("td")
    
    if all_tds and len(all_tds) == 18:
        teams[all_tds[1].text.strip()] = int(all_tds[8].text)
        
print("Total teams:", len(teams))

Total teams: 50


In [10]:
knockout_games = [
    [
        "Atlético Madrid",
        "Borussia Dortmund",
    ],
    [
        "Paris Saint-Germain",
        "FC Barcelona",
    ],
    [
        "Arsenal FC",
        "Bayern München",
    ],
    [
        "Real Madrid",
        "Manchester City",
    ],
]

print("Quarter finals power ranking by ELO ratings:")

columns = ['Team', 'ELO Rating']

data = []

for game in knockout_games:
    for team in game:
        data.append([team, teams[team]])
        
teams_df = pd.DataFrame(data, columns=columns).sort_values(by='ELO Rating', ascending=False).reset_index(drop=True)
display(teams_df)

Quarter finals power ranking by ELO ratings:


Unnamed: 0,Team,ELO Rating
0,Manchester City,2400
1,Real Madrid,2329
2,Arsenal FC,2241
3,Bayern München,2237
4,Paris Saint-Germain,2221
5,FC Barcelona,2197
6,Borussia Dortmund,2182
7,Atlético Madrid,2134


In [11]:
def get_winning_probability(elo_a, elo_b):
    # ELO difference
    delta_elo = elo_b - elo_a
    
    # Using a logistic function to convert the Team A winning probabilites
    proba_a = 1 / (1 + math.pow(10, delta_elo / 400))
    
    return proba_a

def simulate_game(proba_a):
    if np.random.rand() < proba_a:
        return 'A'  # Team A wins
    else:
        return 'B'  # Team B wins
    
def monte_carlo_simulation(team_a, team_b, num_simulations):
    team_a_wins = 0
    team_b_wins = 0
    
    # Probability based simply on the ELO rating
    elo_proba_a = get_winning_probability(team_a['elo'], team_b['elo'])
    
    for _ in range(num_simulations):
        simulation_result = simulate_game(elo_proba_a)
        if simulation_result == 'A':
            team_a_wins += 1
        else:
            team_b_wins += 1
           
    # Probability based on the simulations
    simulation_proba_a = team_a_wins / num_simulations
    simulation_proba_b = team_b_wins / num_simulations
    
    print(f"\n{team_a['team']} ({team_a['elo']}) won {team_a_wins} out of {num_simulations} simulations ({round(simulation_proba_a, 2)}%)")
    print(f"{team_b['team']} ({team_b['elo']}) won {team_b_wins} out of {num_simulations} simulations ({round(simulation_proba_b, 2)}%)")
    
    if simulation_proba_a > simulation_proba_b:
        predicted_winner = team_a['team']
    else:
        predicted_winner = team_b['team']
    
    print(f"RESULT: {predicted_winner} wins.")
    
    return predicted_winner
    
def simulate_stage(games, num_simulations):
    next_stage = []
    
    for game in games:
        winner = monte_carlo_simulation(
            {
                'team': game[0],
                'elo': teams[game[0]]
            }, 
            {
                'team': game[1],
                'elo': teams[game[1]]
            },
            num_simulations
        )
        
        if next_stage and len(next_stage[-1]) < 2:
            next_stage[-1].append(winner)
        else:
            next_stage.append([winner])
            
    return next_stage
        
while len(knockout_games) > 0 and len(knockout_games[0]) > 1:
    if len(knockout_games) == 4:
        print("\nQUARTER FINALS")
    elif len(knockout_games) == 2:
        print("\nSEMI FINALS")
    else:
        print("\nFINAL")
        
    knockout_games = simulate_stage(knockout_games, 101)


QUARTER FINALS

Atlético Madrid (2134) won 36 out of 101 simulations (0.36%)
Borussia Dortmund (2182) won 65 out of 101 simulations (0.64%)
RESULT: Borussia Dortmund wins.

Paris Saint-Germain (2221) won 50 out of 101 simulations (0.5%)
FC Barcelona (2197) won 51 out of 101 simulations (0.5%)
RESULT: FC Barcelona wins.

Arsenal FC (2241) won 53 out of 101 simulations (0.52%)
Bayern München (2237) won 48 out of 101 simulations (0.48%)
RESULT: Arsenal FC wins.

Real Madrid (2329) won 47 out of 101 simulations (0.47%)
Manchester City (2400) won 54 out of 101 simulations (0.53%)
RESULT: Manchester City wins.

SEMI FINALS

Borussia Dortmund (2182) won 62 out of 101 simulations (0.61%)
FC Barcelona (2197) won 39 out of 101 simulations (0.39%)
RESULT: Borussia Dortmund wins.

Arsenal FC (2241) won 34 out of 101 simulations (0.34%)
Manchester City (2400) won 67 out of 101 simulations (0.66%)
RESULT: Manchester City wins.

FINAL

Borussia Dortmund (2182) won 32 out of 101 simulations (0.32%)
M