# What are the chances that Swedens wins it's qualification group? 

## Relevant information: 
* Group teams: Sweden, Switzerland, Kosovo, Slovenien
* Each team plays 6 games in total, 2 against each team. 

## Strategy:

* Step 1: Find a method for determining the probability for each match outcome.
Here a simple elo based method could be used.
* Then evaluated by seeing how it performs on historical data
If it doesn’t perform well go back and implement a more advanced method (like poison goal models, xG-data and ML)

Step 2: Run a monte carlo simulation of the group stage and see how many times sweden end on top.


## Sources
* Calculating elo: https://www.kaggle.com/code/thomasstokes/custom-football-elo-rating

## Load data

In [2]:
import kagglehub
import pandas as pd

# Download latest version
path = kagglehub.dataset_download("martj42/international-football-results-from-1872-to-2017")
df = pd.read_csv(path+"/results.csv")

print("Last updated: " + df['date'].max())

Last updated: 2025-06-10


## Calculate elo data

In [48]:
def calculate_team_a_expected_result(a_elo,b_elo):
    return 1 / (1 + pow(10,-(a_elo-b_elo)/600))

def update_elo_draw(a_elo, b_elo, i):
    w = 0.5

    new_a_elo = a_elo + i*(w - calculate_team_a_expected_result(a_elo, b_elo))
    new_b_elo = b_elo + i*(w - calculate_team_a_expected_result(b_elo,a_elo))
    return (new_a_elo, new_b_elo)

def update_elo_win(win_elo, loss_elo, i):
    w_win = 1
    w_loss = 0

    new_win_elo = win_elo + i*(w_win - calculate_team_a_expected_result(win_elo, loss_elo))
    new_loss_elo = loss_elo + i*(w_loss - calculate_team_a_expected_result(loss_elo,win_elo))
    return (new_win_elo, new_loss_elo)

### Add game importance

In [58]:
df['tournament'].str.lower().unique() #just to check all the unique games, printable

friendlies = ['friendly']
nations_leauge = ['uefa nations leauge']
qualifications = ['fifa world cup qualification', 'afc asian cup qualification', 'copa américa qualification',
                   'uefa euro qualification', 'concacaf championship qualification', 'african cup of nations qualification']
confederation_finals = ['copa américa', 'uefa euro', 'african cup of nations', 'concaf championship', 'afc asian cup', 
        'oceania nations cup', 'confederations cup']
world_cup = ['fifa world cup']

#retrieve match type and assign importance of game based on the match partitioning above
def game_importance_score(row):
    tournament = row['tournament'].lower()
    
    if tournament in friendlies:
        return 10

    if tournament in nations_leauge:
        return 15
    
    if tournament in qualifications:
        return 25

    if tournament in confederation_finals:
        return 35
    
    if tournament in world_cup:
        return 60

    
    return 10

df['importance'] = df.apply(game_importance_score, axis = 1)

### Add elo

In [61]:
all_countries = pd.unique(pd.concat([df['home_team'], df['away_team']]))

elo_home = []
elo_away = []

current_elo = {}
for country in all_countries:
    current_elo[country] = 1000


for index,row in df.iterrows():
    home_team = row.home_team
    away_team = row.away_team
    home_score = row.home_score
    away_score = row.away_score
    i = 10
    score_diff = home_score-away_score
    
    home_current_elo = current_elo[home_team]
    away_current_elo = current_elo[away_team]
    

    home_win = (score_diff)>0
    
    if (score_diff)>0:
        (new_home_elo, new_away_elo) = update_elo_win(home_current_elo,away_current_elo,i) 
    elif (score_diff)==0:
        (new_home_elo, new_away_elo) = update_elo_draw(home_current_elo,away_current_elo,i) 
    else:
        (new_away_elo, new_home_elo) = update_elo_win(away_current_elo,home_current_elo,i)

    # Update elo
    current_elo[home_team] = new_home_elo
    current_elo[away_team] = new_away_elo
    elo_home.append(new_home_elo)
    elo_away.append(new_away_elo)

df['home_elo'] = elo_home
df['away_elo'] = elo_away
    

In [62]:
countries = list(current_elo.keys())
elo = list(current_elo.values())

df_current_elo = pd.DataFrame(data={'Country': countries, 'Elo': elo})

# print(countries)
# print(elo)
print(df_current_elo.sort_values('Elo', ascending=False).head(50))

            Country          Elo
35            Spain  1498.810986
31           Brazil  1484.666488
8         Argentina  1469.951097
10           France  1449.058901
1           England  1409.385786
19          Germany  1397.012821
13      Netherlands  1389.067996
49         Portugal  1388.981159
20            Italy  1359.287165
96             Iran  1356.011792
32            Japan  1350.856076
9           Belgium  1333.070806
78         Colombia  1323.133276
106     South Korea  1311.870117
84          Croatia  1311.503140
55           Mexico  1297.993398
51        Australia  1286.845461
137         Morocco  1284.182796
5           Uruguay  1283.090337
36            Egypt  1276.391475
151         Senegal  1273.540299
26          Denmark  1271.104271
4     United States  1268.635471
140     Ivory Coast  1249.258475
157         Algeria  1248.905543
17      Switzerland  1244.642533
23           Russia  1240.649756
12           Jersey  1235.884248
18           Sweden  1226.510336
226       

## Step 1:

### Function for determining match statistics: