In [10]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from termcolor import colored

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/football-clubs-rankings-world-cup-2025/CAF.csv
/kaggle/input/football-clubs-rankings-world-cup-2025/UEFA.csv
/kaggle/input/football-clubs-rankings-world-cup-2025/Concacaf.csv
/kaggle/input/football-clubs-rankings-world-cup-2025/CONMEBOL.csv
/kaggle/input/football-clubs-rankings-world-cup-2025/qualified_teams.csv
/kaggle/input/football-clubs-rankings-world-cup-2025/confederations_average_placement.csv
/kaggle/input/football-clubs-rankings-world-cup-2025/OFC.csv
/kaggle/input/football-clubs-rankings-world-cup-2025/AFC.csv


# 1. Introduction

In this notebook, I'm hoping to simulate the FIFA Club World Cup 2025 based on the Monte Carlo method on top of the confederations rankings added with the average placement of each confederation in the past intercontinental competitions.

# 2. Data Loading

First of all, we need to load our data. That will be consisted of the each confederation ranking (AFC, CAF, CONMEBOL, Concacaf, OFC and UEFA), a csv that contains the average placement for each confederation in the last 15 previous intercontinental competitions and finally the csv with all of the qualified teams.

In [11]:
def load_csv(file_path: str, limit: bool = True) -> pd.DataFrame:
    df = pd.read_csv(file_path)
    
    if limit:
        display(df.head(3))
    else:
        display(df)

    return df
    
# Loading the avg placement history
print("Average Placement")

confederations_avg_placement = load_csv("/kaggle/input/football-clubs-rankings-world-cup-2025/confederations_average_placement.csv", False)

# Loading the qualified teams
print("Qualified Teams")

qualified_teams = load_csv("/kaggle/input/football-clubs-rankings-world-cup-2025/qualified_teams.csv")

# Loading the confederations rankings
print("Confederation Rankings")
confederations = dict()
confederations["AFC"] = load_csv("/kaggle/input/football-clubs-rankings-world-cup-2025/AFC.csv")
confederations["CAF"] = load_csv("/kaggle/input/football-clubs-rankings-world-cup-2025/CAF.csv")
confederations["CONMEBOL"] = load_csv("/kaggle/input/football-clubs-rankings-world-cup-2025/CONMEBOL.csv")
confederations["Concacaf"] = load_csv("/kaggle/input/football-clubs-rankings-world-cup-2025/Concacaf.csv")
confederations["OFC"] = load_csv("/kaggle/input/football-clubs-rankings-world-cup-2025/OFC.csv")
confederations["UEFA"] = load_csv("/kaggle/input/football-clubs-rankings-world-cup-2025/UEFA.csv")

Average Placement


Unnamed: 0,Confederation,Average_Placement
0,UEFA,1.06
1,CONMEBOL,1.94
2,AFC,3.25
3,CAF,3.81
4,CONCACAF,4.19
5,OFC,4.94


Qualified Teams


Unnamed: 0,Name,Confederation,Country,Group
0,CR Flamengo,CONMEBOL,Brazil,D
1,Al Ahly FC,CAF,Egypt,A
2,Palmeiras,CONMEBOL,Brazil,A


Confederation Rankings


Unnamed: 0,Rk,Team,Tot pts
0,1,Al Hilal,118
1,2,Ulsan HD,81
2,3,Jeonbuk Motors,80


Unnamed: 0,Rk,Team,Tot pts
0,1,Al Ahly FC,140
1,2,Wydad AC,108
2,3,Espérance de Tunisie,100


Unnamed: 0,Rk,Team,Tot pts
0,1,CR Flamengo,141
1,2,Palmeiras,140
2,3,Atlético Mineiro,122


Unnamed: 0,Rk,Team,Tot pts
0,1,CF Monterrey,52
1,3,América,44
2,4,Philadelphia Union,41


Unnamed: 0,Rk,Team,Tot pts
0,1,Auckland City FC,66
1,2,AS Pirae,31
2,3,Rewa,20


Unnamed: 0,Rk,Team,Tot pts
0,1,Manchester City,123
1,2,Real Madrid C.F.,119
2,3,FC Bayern München,108


# 3. Data Engineering

Now that we have our data imported, we need to start thinking about how are we going to define the strength of our qualified teams. Since this is a competition that involves teams from different regions of the world, it's hard to find an exact way of putting into numbers how strong a team is compared to others that play in totally different competitions - That's a hard thing to do even with domestic teams.

To start things off, we are going to use the average placement for each confederation in the past 15 intercontinental competitions as a baseline. To convert that into something that we can actually use, we define a "Raw Factor" as we divide 1 by the average placement of each confederation. Once we do that, we'll need to define a number to be our target of excellence when looking at past competitions average placement, and for that we are going to use the UEFA average placement (1.06). 

After that, we have to normalize our confederations raw factors based on our biggest factor, which is the UEFA. For that, we just need to divide each confederation factor by the UEFA factor, so we can achieve our method of quantifying how strong is a confederation based on the UEFA.

In [12]:
confederations_normalized_factor = dict()

# Raw Factor
confederations_avg_placement["Raw Factor"] = 1 / confederations_avg_placement["Average_Placement"]

# UEFA Raw Factor
uefa_raw_factor = confederations_avg_placement[confederations_avg_placement["Confederation"] == "UEFA"].iloc[0]["Raw Factor"]

# Confederations Normalized Factor
for _, confederation in confederations_avg_placement.iterrows():
    confederations_normalized_factor[confederation.Confederation] = confederations_avg_placement[confederations_avg_placement["Confederation"] == confederation.Confederation].iloc[0]["Raw Factor"] / uefa_raw_factor

print(confederations_normalized_factor)

{'UEFA': 1.0, 'CONMEBOL': 0.5463917525773198, 'AFC': 0.3261538461538462, 'CAF': 0.2782152230971129, 'CONCACAF': 0.25298329355608595, 'OFC': 0.21457489878542513}


With those numbers saved, we just need to multiply the points of each team on their respective rankings by the factor of their confederation. Now we finally have a way of putting into numbers how strong a team actually is in this competition.

In [13]:
qualified_teams["Points"] = 0.0

for idx, team in qualified_teams.iterrows():
    confederation_avg_placement = confederations_avg_placement[confederations_avg_placement["Confederation"] == team.Confederation.upper()].iloc[0]["Average_Placement"]

    confederation_df = confederations[team.Confederation]

    team_points = confederation_df[confederation_df["Team"] == team.Name].iloc[0]["Tot pts"] * confederations_normalized_factor[team.Confederation.upper()]

    # Update the qualified teams dataframe
    qualified_teams.loc[idx, "Points"] = round(team_points ,2)

display(qualified_teams.sort_values(by="Points", ascending=False))

Unnamed: 0,Name,Confederation,Country,Group,Points
3,Manchester City,UEFA,England,G,123.0
4,Real Madrid C.F.,UEFA,Spain,H,119.0
15,FC Bayern München,UEFA,Germany,C,108.0
19,Paris Saint-Germain,UEFA,France,B,85.0
8,Chelsea FC,UEFA,England,D,79.0
21,Borussia Dortmund,UEFA,Germany,F,79.0
0,CR Flamengo,CONMEBOL,Brazil,D,77.04
2,Palmeiras,CONMEBOL,Brazil,A,76.49
22,FC Internazionale Milano,UEFA,Italy,E,76.0
24,FC Porto,UEFA,Portugal,A,68.0


In order to progress to our simulation, we need to split the qualified teams by their group so we can simulate the matches of each group.

In [14]:
groups = {group: data.reset_index(drop=True) for group, data in qualified_teams.groupby("Group")}
print("Group A")
groups["A"]

Group A


Unnamed: 0,Name,Confederation,Country,Group,Points
0,Al Ahly FC,CAF,Egypt,A,38.95
1,Palmeiras,CONMEBOL,Brazil,A,76.49
2,FC Porto,UEFA,Portugal,A,68.0
3,Inter Miami CF,Concacaf,USA,A,2.53


# 4. Predictions

Now we get to the fun stuff. In the code below we define functions to simulate our matches. Note that we are using the Monte Carlo simulation method, so we are going to simulate each match converting the points of both teams into probabilities. With those probabilities, we can generate a randomic number and define a winner for a match if that generated number is within the probability of that team winning against that opponent.

In [15]:
def get_winning_probability(team_points: float, opponent_points: float) -> float:
    if team_points <= 0 and opponent_points <= 0:
        return 0.5  # 0 Division
    if opponent_points <= 0:
        return 1.0
    if team_points <= 0:
        return 0.0

    return team_points / (team_points + opponent_points)

def simulate_game(proba_a):
    if np.random.rand() < proba_a:
        return 'A'  # Team A wins
    else:
        return 'B'  # Team B wins

def monte_carlo_simulation(team_a, team_b, num_simulations):
    team_a_wins = 0
    team_b_wins = 0
    
    # Probability based simply on the points
    team_a_proba = get_winning_probability(team_a['Points'], team_b['Points'])
    
    for _ in range(num_simulations):
        simulation_result = simulate_game(team_a_proba)
        if simulation_result == 'A':
            team_a_wins += 1
        else:
            team_b_wins += 1
           
    # Probability based on the simulations
    simulation_proba_a = team_a_wins / num_simulations
    simulation_proba_b = team_b_wins / num_simulations
    
    if simulation_proba_a > simulation_proba_b:
        print(colored(f"{team_a['Name']} 1 x 0 {team_b['Name']}", "red"))
        predicted_winner = team_a
    else:
        print(f"{team_a['Name']} 0 x 1 {team_b['Name']}")
        predicted_winner = team_b
    
    return predicted_winner

In the code below, we simulate all of the matches of the group stage, sorting our group tables by the number of points and, if there is a tie in points between two or more teams, we are going to prioritize the strongest team based on their normalized points.

In [16]:
group_size = 4
num_simulations = 5

for group_letter, group in groups.items():
    group["Group Points"] = 0

    print('\n')
    print(f"GROUP {group_letter}")

    # Iterate through the teams in a group
    for idx, team in group.iterrows():
        if (idx == group_size - 1):
            break

        # Iterate through the remaining teams of the group in order to simulate the matches
        for i in range(idx + 1, group_size):
            opponent = group.iloc[i]

            winner = monte_carlo_simulation(team, opponent, num_simulations)
            winner_idx = i if winner.Name == opponent.Name else idx

            group.loc[winner_idx, "Group Points"] += 3

    # Sort group by group points and strength points
    groups[group_letter] = group.sort_values(by=["Group Points", "Points"], ascending=[False, False]).reset_index(drop=True)

    print("\nTABLE")

    print(groups[group_letter])



GROUP A
Al Ahly FC 0 x 1 Palmeiras
Al Ahly FC 1 x 0 FC Porto
Al Ahly FC 1 x 0 Inter Miami CF
Palmeiras 1 x 0 FC Porto
Palmeiras 1 x 0 Inter Miami CF
FC Porto 1 x 0 Inter Miami CF

TABLE
             Name Confederation   Country Group  Points  Group Points
0       Palmeiras      CONMEBOL    Brazil     A   76.49             9
1      Al Ahly FC           CAF     Egypt     A   38.95             6
2        FC Porto          UEFA  Portugal     A   68.00             3
3  Inter Miami CF      Concacaf       USA     A    2.53             0


GROUP B
Botafogo 1 x 0 Seattle Sounders FC
Botafogo 0 x 1 Paris Saint-Germain
Botafogo 0 x 1 Atlético de Madrid
Seattle Sounders FC 0 x 1 Paris Saint-Germain
Seattle Sounders FC 0 x 1 Atlético de Madrid
Paris Saint-Germain 1 x 0 Atlético de Madrid

TABLE
                  Name Confederation Country Group  Points  Group Points
0  Paris Saint-Germain          UEFA  France     B   85.00             9
1   Atlético de Madrid          UEFA   Spain     B   67.00 

Finally, we are going to apply the same method to every match in the knockout stages, until we find our winner.

In [21]:
knockout_stages = {
    16: [],
    8: [],
    4: [],
    2: []
}

In [22]:
knockout_stages[16] = [
    (groups["A"].iloc[0], groups["B"].iloc[1]),
    (groups["C"].iloc[0], groups["D"].iloc[1]),
    (groups["E"].iloc[0], groups["F"].iloc[1]),
    (groups["G"].iloc[0], groups["H"].iloc[1]),
    (groups["B"].iloc[0], groups["A"].iloc[1]),
    (groups["D"].iloc[0], groups["C"].iloc[1]),
    (groups["F"].iloc[0], groups["E"].iloc[1]),
    (groups["H"].iloc[0], groups["G"].iloc[1]),
]

current_stage = 16

# Iterate through the stages until we get to the final
while current_stage >= 2:
    print("\nSTAGE:", current_stage)
    
    for game in knockout_stages[current_stage]:
        winner = monte_carlo_simulation(game[0], game[1], num_simulations)

        # Final
        if current_stage == 2:
            print("\nWINNER:", winner.Name.upper())
            break
        # If a match in the next stage is already full or nonexistent
        elif not knockout_stages[current_stage / 2] or len(knockout_stages[current_stage / 2][-1]) == 2:
            knockout_stages[current_stage / 2].append([]) # Create new match in the next stage

        knockout_stages[current_stage / 2][-1].append(winner)

    # Go to the next stage
    current_stage /= 2
        


STAGE: 16
Palmeiras 1 x 0 Atlético de Madrid
SL Benfica 0 x 1 Chelsea FC
FC Internazionale Milano 1 x 0 Fluminense FC
Manchester City 1 x 0 Al Hilal
Paris Saint-Germain 0 x 1 Al Ahly FC
CR Flamengo 0 x 1 FC Bayern München
Borussia Dortmund 1 x 0 CA River Plate
Real Madrid C.F. 1 x 0 Wydad AC

STAGE: 8.0
Palmeiras 0 x 1 Chelsea FC
FC Internazionale Milano 0 x 1 Manchester City
Al Ahly FC 0 x 1 FC Bayern München
Borussia Dortmund 0 x 1 Real Madrid C.F.

STAGE: 4.0
Chelsea FC 0 x 1 Manchester City
FC Bayern München 0 x 1 Real Madrid C.F.

STAGE: 2.0
Manchester City 0 x 1 Real Madrid C.F.

WINNER: REAL MADRID C.F.
