In [1]:
import math
import bisect
from tqdm import tqdm
from itertools import accumulate

class Random_Number_Generator():
    def __init__(self, c_seed=0):
        (self.w, self.n, self.m, self.r) = (32, 624, 397, 31)
        self.a = 0x9908B0DF
        (self.u, self.d) = (11, 0xFFFFFFFF)
        (self.s, self.b) = (7, 0x9D2C5680)
        (self.t, self.c) = (15, 0xEFC60000)
        self.l = 18
        self.f = 1812433253
        # make a arry to store the state of the generator
        self.MT = [0 for i in range(self.n)]
        self.index = self.n+1
        self.lower_mask = 0x7FFFFFFF
        self.upper_mask = 0x80000000
        # inital the seed
        self.c_seed = c_seed
        self.seed(c_seed)

    def seed(self, num):
        """initialize the generator from a seed"""
        self.MT[0] = num
        self.index = self.n
        for i in range(1, self.n):
            temp = self.f * (self.MT[i-1] ^ (self.MT[i-1] >> (self.w-2))) + i
            self.MT[i] = temp & 0xffffffff

    def twist(self):
        """ Generate the next n values from the series x_i"""
        for i in range(0, self.n):
            x = (self.MT[i] & self.upper_mask) + \
                (self.MT[(i+1) % self.n] & self.lower_mask)
            xA = x >> 1
            if (x % 2) != 0:
                xA = xA ^ self.a
            self.MT[i] = self.MT[(i + self.m) % self.n] ^ xA
        self.index = 0

    def extract_number(self):
        """ Extract a tempered value based on MT[index]
            calling twist() every n numbers
        """
        if self.index >= self.n:
            self.twist()

        y = self.MT[self.index]
        y = y ^ ((y >> self.u) & self.d)
        y = y ^ ((y << self.s) & self.b)
        y = y ^ ((y << self.t) & self.c)
        y = y ^ (y >> self.l)

        self.index += 1
        return y & 0xffffffff

    def random(self):
        """ return uniform ditribution in [0,1) """
        
        return self.extract_number() / 4294967296  # which is 2**w
            
    def choices(self, population, weights=None, k=1):
        if weights is None:
            weights = [1] * len(population)
        elif len(weights) != len(population):
            raise ValueError("The number of weights must match the population size.")

        total = sum(weights)
        cumulative_weights = [0] + list(accumulate(weights, lambda x,y: x+y))

        choices = []
        for i in range(k):
            x = self.random() * total
            idx = bisect.bisect(cumulative_weights, x)
            choices.append(population[idx-1])

        if k == 1:
            return choices[0]
        return choices

In [2]:
import pandas as pd

# Define the participating countries and their FIFA rankings
data = pd.read_excel("fifa-ranking.xlsx")
countries = dict(zip(data["country"],data["points"]))

countries['Iran'] = countries['IR Iran']
countries['South Korea'] = countries['Korea Republic']


RNG = Random_Number_Generator(243)

# Define the match schedule for the group stage

matches = {'Group A':[('Qatar','Ecuador'),('Senegal','Netherlands'),('Qatar','Senegal'),('Netherlands','Ecuador'),('Ecuador','Senegal'),('Netherlands','Qatar')],
           'Group B':[('England','Iran'),('USA','Wales'),('Wales','Iran'),('England','USA'),('Iran','USA'),('Wales','England')],
           'Group C':[('Argentina','Saudi Arabia'),('Mexico','Poland'),('Poland','Saudi Arabia'),('Argentina','Mexico'),('Poland','Argentina'),('Saudi Arabia','Mexico')],
           'Group D':[('Denmark','Tunisia'),('France','Australia'),('Tunisia','Australia'),('France','Denmark'),('Tunisia','France'),('Australia','Denmark')],
           'Group E':[('Germany','Japan'),('Spain','Costa Rica'),('Japan','Costa Rica'),('Spain','Germany'),('Japan','Spain'),('Costa Rica','Germany')],
           'Group F':[('Morocco','Croatia'),('Belgium','Canada'),('Belgium','Morocco'),('Croatia','Canada'),('Croatia','Belgium'),('Canada','Morocco')],
           'Group G':[('Switzerland','Cameroon'),('Brazil','Serbia'),('Cameroon','Serbia'),('Brazil','Switzerland'),('Serbia','Switzerland'),('Cameroon','Brazil')],
           'Group H':[('Uruguay','South Korea'),('Portugal','Ghana'),('South Korea','Ghana'),('Portugal','Uruguay'),('South Korea','Portugal'),('Ghana','Uruguay')]
          }

# Define a function to simulate a match between two teams
def simulate_match(team1, team2):
    lambda_1 = countries[team1]/countries[team2]
    lambda_2 = countries[team2]/countries[team1]
    # Generate the number of goals scored by each team using a Poisson distribution
    goals1 = sum(RNG.choices(range(10), k=1000, weights=[math.exp(-lambda_1)*pow(lambda_1, i)/math.factorial(i) for i in range(10)]))
    goals2 = sum(RNG.choices(range(10), k=1000, weights=[math.exp(-lambda_2)*pow(lambda_2, i)/math.factorial(i) for i in range(10)]))
    
    # Determine the winner of the match
    if goals1 > goals2:
        return team1
    elif goals1 < goals2:
        return team2
    else:
        #simulate_match(team1, team2)
        return None
    
def conduct_matches(match_schedule):
    group_results = {}
    for group in match_schedule:
        ind_results = {}
        for match in match_schedule[group]:
            team1, team2 = match
            winner = simulate_match(team1, team2)
            if winner is not None:
                ind_results[winner] = ind_results.get(winner, 0) + 3
            else:
                ind_results[team1] = ind_results.get(team1, 0) + 1
                ind_results[team2] = ind_results.get(team2, 0) + 1
                
        # Sort the teams by their points in the group stage and simulate the knockout stage
        qualified_teams = sorted(ind_results.keys(), key=lambda x: ind_results[x], reverse=True)[:2]
        group_results[group] = qualified_teams
    
    return group_results
    
    
# Define a function to simulate the entire tournament
def simulate_tournament():
    # Simulate the group stage
    group_stage_results = conduct_matches(matches)
    
    #simulate the knockout stage
    set_1 = [(i,j) for i,j in zip(group_stage_results['Group A'], group_stage_results['Group B'])]
    set_2 = [(i,j) for i,j in zip(group_stage_results['Group C'], group_stage_results['Group D'])]
    set_3 = [(i,j) for i,j in zip(group_stage_results['Group E'], group_stage_results['Group F'])]
    set_4 = [(i,j) for i,j in zip(group_stage_results['Group G'], group_stage_results['Group H'])]
    
    knock_out_schedule = {'set1':set_1,'set2':set_2,'set3':set_3,'set4':set_4}
    
    knock_out_results = conduct_matches(knock_out_schedule)
    
    #simulate quarters
    quarters_schedule = {'q1': [(i,j) for i,j in zip(knock_out_results['set1'],knock_out_results['set2'])],
                         'q2': [(i,j) for i,j in zip(knock_out_results['set3'],knock_out_results['set4'])]
                        }

    quarter_results = conduct_matches(quarters_schedule)
    
    #simulate semi finals
    semi_schedule = {'s1':[quarter_results['q1']], 's2':[quarter_results['q2']]}
    
    semi_results = conduct_matches(semi_schedule)
    
    #conduct finals
    final_schedule = {'final':[(semi_results['s1'][0], semi_results['s2'][0])]}
    
    winner = conduct_matches(final_schedule)['final']
    if len(winner) == 1:
        return winner[0]
    else:
        return 'Draw'

In [3]:
# Simulate the tournament 1000 times and count the number of wins for each team
results = {}
n_sim = 1000
for i in tqdm(range(n_sim),desc='Simulating', colour='blue'):
    winner = simulate_tournament()
    results[winner] = results.get(winner, 0) + 1

# Print the results
for team, count in results.items():
    print(f'{team}: {count} wins ({count/n_sim*100:.1f}%)')


Simulating: 100%|[34m██████████[0m| 1000/1000 [05:02<00:00,  3.30it/s]

Belgium: 580 wins (58.0%)
France: 336 wins (33.6%)
Brazil: 77 wins (7.7%)
Draw: 7 wins (0.7%)



