In [1]:
import numpy as np 
import pandas as pd
import math
import random
import openpyxl
from functools import partial


In [2]:
# Load the data from the Excel file
world_cup_data = pd.read_excel('fifa-ranking.xlsx')
world_cup_data

Unnamed: 0,rank,country,points
0,1,Belgium,1780
1,2,France,1755
2,3,Brazil,1743
3,4,England,1670
4,5,Portugal,1662
...,...,...,...
205,206,Sri Lanka,853
206,207,US Virgin Islands,844
207,208,British Virgin Islands,842
208,209,Anguilla,821


In [3]:
# Define the Mersenne Twister random number generator
def mt19937(seed):
    """
    Function: mt19937

This function generates random numbers using the Mersenne Twister algorithm.

Parameters:

seed : int : the seed value to initialize the random number generator.
Returns:

A generator object that yields a sequence of 32-bit unsigned integer random numbers.
Algorithm:

The Mersenne Twister algorithm is a pseudorandom number generator that produces a sequence of numbers that are uniformly distributed.
The algorithm is based on a matrix linear recurrence over a finite field. The matrix is designed to have a long period and good statistical properties.
The algorithm generates a sequence of 624 32-bit unsigned integers using the seed value.
The algorithm uses a tempering function to improve the statistical properties of the sequence.
The algorithm also uses a twist function to generate a new set of numbers based on the previous 624 numbers in the sequence.
Usage:

The mt19937 function can be used to generate random numbers for various applications, such as simulations, cryptography, and statistical analysis.
Example usage: rng = mt19937(1234), random_number = next(rng)
    """
    # Initialize the state array with a seed value
    state = np.zeros(624, dtype=np.uint32)
    state[0] = seed
    for i in range(1, 624):
        state[i] = (1812433253 * (state[i-1] ^ (state[i-1] >> 30)) + i) & 0xffffffff
    # Generate random numbers using the MT19937 algorithm
    index = 624
    if index == 624:
        index = 0
    while True:
        y = (state[index-624] & 0x80000000) | (state[index-623] & 0x7fffffff)
        state[index] = state[(index-397) % 624] ^ (y >> 1)
        if y % 2 != 0:
            state[index] ^= 0x9908b0df
        index += 1
        if index >= 624:
            index = 0
        z = state[index] ^ (state[index] >> 11)
        z ^= (z << 7) & 0x9d2c5680
        z ^= (z << 15) & 0xefc60000
        z ^= (z >> 18)
        yield z


In [4]:
# Define the Poisson distribution function
def poisson(lambda_, rng):
    # Generate a random number from a uniform distribution
    u = rng.__next__() / 4294967296
    # Initialize the Poisson cumulative distribution function
    p = np.exp(-lambda_)
    # Generate a Poisson-distributed random variable
    k = 0
    while u > p:
        k += 1
        p *= lambda_ / k
        print(k)
    return k

In [5]:
def generate_matches(df):
    lambda_=1
    seed=12345
    # Generate the Mersenne Twister random number generator
    rng = mt19937(seed)
    # Generate the Poisson distribution with lambda parameter
    poisson_dist = partial(poisson( lambda_, rng))

    matches = []
    # Loop over every combination of teams
    for i in range(df.shape[0]):
        for j in range(i+1, df.shape[0]):
            # Get the names of the two teams
            team1 = df.loc[i, 'country']
            team2 = df.loc[j, 'country']
            print(df.loc[j, 'country'])
            # Generate the number of goals for each team in the match
            goals1 = poisson_dist(df.loc[i, 'points'])
            goals2 = poisson_dist(df.loc[j, 'points'])
            matches.append((team1, team2, goals1, goals2))
    return matches

In [6]:
def simulate_world_cup(df: pd.DataFrame, num_simulations: int, seed: int) -> dict:
    """
    Simulates the World Cup tournament for a given dataframe of teams and their rankings.

    Args:
        df (pd.DataFrame): A dataframe containing the teams and their rankings.
        num_simulations (int): The number of times to simulate the tournament.
        seed (int): The seed for the random number generator.

    Returns:
        dict: A dictionary containing the number of times each team won the tournament.
    """
    rng = np.random.default_rng(seed)
    matches = generate_matches(df)
    results = {team: 0 for team in df['country']}
    for i in range(num_simulations):
        tournament_results = df.copy()
        tournament_results['points'] = 0
        for j, match in matches.iterrows():
            home_team, away_team = match[['Home Team', 'Away Team']]
            home_score = poisson(df.loc[df['country'] == home_team, 'lambda'].iloc[0], rng)
            away_score = poisson(df.loc[df['country'] == away_team, 'lambda'].iloc[0], rng)
            if home_score > away_score:
                tournament_results.loc[tournament_results['country'] == home_team, 'points'] += 3
            elif away_score > home_score:
                tournament_results.loc[tournament_results['country'] == away_team, 'points'] += 3
            else:
                tournament_results.loc[tournament_results['country'].isin([home_team, away_team]), 'points'] += 1
        tournament_results = tournament_results.sort_values(by=['points', 'lambda'], ascending=False)
        winner = tournament_results['country'].iloc[0]
        results[winner] += 1
    return results

In [None]:
matches = generate_matches(world_cup_data[:5])

In [None]:
simulate_world_cup(matches, 1000, 12345)