In [3]:
import pandas as pd
import numpy as np
from scipy.stats import poisson

In [9]:
# Charger les données
df_historical_data = pd.read_csv('clean_fifa_worldcup_matches.csv')
df_fixtures = pd.read_csv('clean_fifa_worldcup_fixture.csv')

In [59]:
historical_data.dtypes

HomeTeam     object
AwayTeam     object
Year          int64
HomeGoals     int64
AwayGoals     int64
dtype: object

In [60]:
df_historical_data['HomeGoals'] = pd.to_numeric(df_historical_data['HomeGoals'], errors='coerce')
df_historical_data['AwayGoals'] = pd.to_numeric(df_historical_data['AwayGoals'], errors='coerce')

In [61]:
# Calculer la moyenne des buts marqués et concédés pour chaque équipe
def calculate_team_strength(data):
    team_stats = pd.DataFrame()
    
    # Buts marqués
    team_stats['goals_scored'] = data.groupby('HomeTeam')['HomeGoals'].mean() + data.groupby('AwayTeam')['AwayGoals'].mean()
    
    # Buts concédés
    team_stats['goals_conceded'] = data.groupby('HomeTeam')['AwayGoals'].mean() + data.groupby('AwayTeam')['HomeGoals'].mean()
    
    return team_stats

In [62]:
df_team_strengths = calculate_team_strength(df_historical_data)

In [63]:
print(df_team_strengths)

Unnamed: 0,goals_scored,goals_conceded
Algeria,1.976190,2.952381
Angola,0.500000,1.500000
Argentina,3.044228,2.731634
Australia,1.666667,3.857143
Austria,3.019048,3.238095
...,...,...
Uruguay,2.886693,2.729908
Wales,2.500000,1.750000
West Germany,4.000000,2.686404
Yugoslavia,3.511111,2.433333


In [50]:
df_team_strengths.to_csv('strenghts.csv', index=False)

In [69]:
def predict_points(home, away):
    # Vérifier si les équipes existent dans les données
    if home in df_team_strengths.index and away in df_team_strengths.index:
        lamb_home = df_team_strengths.at[home, 'goals_scored'] * df_team_strengths.at[away, 'goals_conceded']
        lamb_away = df_team_strengths.at[away, 'goals_scored'] * df_team_strengths.at[home, 'goals_conceded']
    else:
        # Si une équipe n'a pas d'historique, on attribue la moyenne des autres équipes
        avg_goals_scored = df_team_strengths['goals_scored'].mean()
        avg_goals_conceded = df_team_strengths['goals_conceded'].mean()
        
        lamb_home = (df_team_strengths.at[home, 'goals_scored'] if home in df_team_strengths.index else avg_goals_scored) \
                    * (df_team_strengths.at[away, 'goals_conceded'] if away in df_team_strengths.index else avg_goals_conceded)
                    
        lamb_away = (df_team_strengths.at[away, 'goals_scored'] if away in df_team_strengths.index else avg_goals_scored) \
                    * (df_team_strengths.at[home, 'goals_conceded'] if home in df_team_strengths.index else avg_goals_conceded)
    
    # Probabilités pour les résultats
    prob_home, prob_away, prob_draw = 0, 0, 0
    for x in range(0, 11):  # Nombre de buts de l'équipe à domicile
        for y in range(0, 11):  # Nombre de buts de l'équipe à l'extérieur
            p = poisson.pmf(x, lamb_home) * poisson.pmf(y, lamb_away)
            if x == y:
                prob_draw += p
            elif x > y:
                prob_home += p
            else:
                prob_away += p

    # Calcul des points attendus
    points_home = 3 * prob_home + prob_draw
    points_away = 3 * prob_away + prob_draw
    
    return (points_home, points_away)


In [75]:
predict_points('Qatar', 'Ecuador')
# Ecuador ==> true

(0.7521773921056127, 1.9027773661139336)

In [76]:
predict_points('Senegal', 'Netherlands')
# Netherlands ==> true

(0.34902128362955764, 1.575228367702044)

In [77]:
predict_points('Qatar', 'Senegal')
#Senegal ==> true

(0.5794201217227852, 1.506156126921597)

In [78]:
predict_points('Netherlands', 'Ecuador')
#Netherlands => false

(2.118118963803906, 0.447360105302669)

In [79]:
predict_points('Ecuador', 'Senegal')
#égalite => false 

(1.3467554978168956, 1.4004345061216021)

In [80]:
predict_points('Netherlands', 'Qatar')
#Netherlands

(1.084592789972144, 0.08454568379066407)

In [None]:
#Netherlands = 9 points
#Ecuador =  4 points ou 6 points
#Senegal =  4 points ou 3 points
#Qatar = 0 points

#1 Netherlands  => true
#2 Ecuador => false

In [81]:
predict_points('England', 'Iran')
#England ==> true 

(2.541488534538541, 0.13519963857290127)

In [82]:
predict_points('United State', 'Wales')
#Wales 

(0.27255477698229563, 2.030845754830462)

In [83]:
predict_points('Wales', 'Iran')
#Wales ==> false

(2.5979877305509733, 0.13725880262528944)

In [84]:
predict_points('England', 'United State')
#England ==> false

(1.8985847167229961, 0.2755102907343436)

In [87]:
predict_points('Wales', 'England')
#égalité => false

(1.459381249623247, 1.3579712258817107)

In [86]:
predict_points('Iran', 'United State')
#United State => true

(0.6906913470220495, 2.0861654436317654)

In [88]:
#England 7 points = true 
#Wales 7 points  = false
#United State 3 points 
#Iran 0 points


In [89]:
predict_points('Argentina', 'Saudi Arabia')
#Argentina => false

(0.4129045352647607, 0.010301152536280085)

In [90]:
predict_points('Mexico', 'Poland')
#Poland => false

(0.7149898430301787, 1.5743418846045656)

In [91]:
predict_points('Poland', 'Saudi Arabia')
#Poland ==> true

(0.7436092942179612, 0.022773572630819933)

In [92]:
predict_points('Argentina','Mexico')
#Argentina ==> True

(1.444019579137419, 0.5337566692969863)

In [93]:
predict_points('Poland','Argentina')
#Argentina ==> True

(0.9055290142604736, 1.1177933866633882)

In [94]:
predict_points('Saudi Arabia', 'Mexico')
#Mexico ==> true

(0.11023381393741741, 1.4246686284538466)

In [None]:
#Argentina 9 points => true 
#Poland 6 points => true
#Mexico 3 points 
#Saudi Arabia 0 points 

In [95]:
predict_points('Denmark', 'Tunisia')
#Denmark => false

(1.4073668577751097, 0.2823361612537839)

In [101]:
predict_points('France', 'Australia')
#France => true

(0.5009993292559866, 0.015388591101809668)

In [102]:
predict_points('Tunisia', 'Australia')
#Tunisia => false

(1.6712630749870232, 0.7031191235106107)

In [103]:
predict_points('France', 'Denmark')
#France => true

(1.106789557771825, 0.5217658092500898)

In [99]:
predict_points('Australia', 'Denmark')
#Denmark => false

(0.08216411382517512, 1.060526619996513)

In [104]:
predict_points('Tunisia', 'France')
#France => false

(0.08010617191845973, 0.9345959328130188)

In [None]:
#France 9 points => true
#Denmark 6 points => false
#Tunisia 3 ponits
#Austria 0 points

In [105]:
predict_points('Germany', 'Japan')
#Germany => false

(1.194031868362481, 0.08588849435149619)

In [106]:
predict_points('Spain', 'Costa Rica')
#Spain => true

(1.5876933136967417, 0.24152103218612056)

In [107]:
predict_points('Japan', 'Costa Rica')
#égalité => false

(1.4613135375821031, 1.2068748597068597)

In [111]:
predict_points('Spain', 'Germany')
#Germany => false

(0.5358874450549279, 1.223585067140565)

In [112]:
predict_points('Japan', 'Spain')
#Spain => false

(0.33862558309052493, 1.8324803245846433)

In [110]:
predict_points('Costa Rica', 'Germany')
#Germany => true

(0.049943782637445706, 0.8259053421832803)

In [None]:
#Germany 9 points => false
#Spain 6 points => true
#Japan 1 point
#Costa Rica 1 point 

In [113]:
predict_points('Morocco', 'Croatia')
#Croatia => false

(0.3340114555909465, 2.1175888607482882)

In [114]:
predict_points('Belgium', 'Canada')
#Belgium => true

(2.22741285564992, 0.00016905022711809965)

In [115]:
predict_points('Belgium', 'Morocco')
#Belgium => false

(1.9055847876954237, 0.5294297862092473)

In [116]:
predict_points('Croatia', 'Canada')
#Croatia => true

(2.200176913197418, 0.00015614806248443323)

In [117]:
predict_points('Croatia', 'Belgium')
#Croatia => false

(1.2349000126958858, 0.7352864315790354)

In [119]:
predict_points('Canada', 'Morocco')
#Morocco => true

(0.008229747049020023, 2.952290015481692)

In [120]:
#Croatia 9 points
#Belgium 6 points
#Morocco 3 points
#Canada 0 points

In [121]:
predict_points('Switzerland', 'Cameroon')
#Switzerland => true

(1.1993628017570503, 0.17048321528166496)

In [122]:
predict_points('Brazil', 'Serbia')
#Brazil => true

(1.8136611189861047, 0.059760913577215025)

In [123]:
predict_points('Cameroon', 'Serbia')
#Serbia => false

(0.9017413954470088, 1.9310207981764198)

In [124]:
predict_points('Brazil', 'Switzerland')
#Brazil => true

(0.4854876525849038, 0.07576909343277408)

In [125]:
predict_points('Serbia', 'Switzerland')
#Switzerland => true

(0.6427002731565016, 1.9867465634507764)

In [126]:
predict_points('Cameroon', 'Brazil')
#Brazil => false

(0.0036185982163254938, 0.3073854147389852)

In [None]:
#Brazil 9 points => true 
#Switzerland 6 points => true
#Serbia 3 points 
#Cameroon 0 points  

In [127]:
predict_points('Uruguay', 'South Korea')
#Uruguay => false

(0.9297453388046442, 0.14651412488473672)

In [128]:
predict_points('Portugal', 'Ghana')
#Portugal => true

(1.741171271585802, 0.62997189815189)

In [129]:
predict_points('South Korea', 'Ghana')
#Ghana => true

(0.48325753228234886, 1.647559221173319)

In [130]:
predict_points('Portugal', 'Uruguay')
#Portugal => true

(1.1892353495490293, 0.8680944748988114)

In [131]:
predict_points('Ghana', 'Uruguay')
#Uruguay => true 

(0.8002344797904114, 1.5799207619116287)

In [132]:
predict_points('South Korea', 'Portugal')
#Portugal => false

(0.09651510462493783, 0.8927422092895089)

In [None]:
#Portugal 9 points => true 
#Uruguay 6 points => false 
#Ghana 3 points
#South Korea 0 points 

In [None]:
# Ce qui nous donne les 8 rencontres (prédites) suivantes :

# Netherlands – Walves
# Argentina – Denmark
# France – Poland
# England – Ecuador
# Germany – Belgium
# Brazil – Uruguay
# Croatia – Spain
# Portugal – Switzerland

In [136]:
def get_winner(df_fixture_updated):
    for index, row in df_fixture_updated.iterrows():
        home, away = row['home'], row['away']
        points_home, points_away = predict_points(home, away)
        if points_home > points_away:
            winner = home
        else:
            winner = away
        df_fixture_updated.loc[index, 'winner'] = winner
    return df_fixture_updated

In [137]:
fixtures = [
    {'home': 'Netherlands', 'away': 'Walves'},
    {'home': 'Argentina', 'away': 'Denmark'},
    {'home': 'France', 'away': 'Poland'},
    {'home': 'England', 'away': 'Ecuador'},
    {'home': 'Germany', 'away': 'Belgium'},
    {'home': 'Brazil', 'away': 'Uruguay'},
    {'home': 'Croatia', 'away': 'Spain'},
    {'home': 'Portugal', 'away': 'Switzerland'}
]

In [138]:
df_fixture_updated = pd.DataFrame(fixtures)

In [139]:
df_fixture_with_winners = get_winner(df_fixture_updated)

In [141]:
# Afficher les résultats
print(df_fixture_with_winners)

          home         away       winner
0  Netherlands       Walves  Netherlands
1    Argentina      Denmark      Denmark
2       France       Poland       France
3      England      Ecuador      England
4      Germany      Belgium      Germany
5       Brazil      Uruguay       Brazil
6      Croatia        Spain        Spain
7     Portugal  Switzerland     Portugal


In [143]:
# Allemagne – Brasil
# Netherlands  – Denmark
# Spain – Portugal
# England – France

In [148]:
fixtures = [
    {'home': 'Allemagne', 'away': 'Brasil'},
    {'home': 'Netherlands', 'away': 'Denmark'},
    {'home': 'Spain', 'away': 'Portugal'},
    {'home': 'England', 'away': 'France'},
]

In [149]:
df_fixture_updated = pd.DataFrame(fixtures)

In [150]:
df_fixture_with_winners = get_winner(df_fixture_updated)

In [151]:
# Afficher les résultats
print(df_fixture_with_winners)

          home      away       winner
0    Allemagne    Brasil       Brasil
1  Netherlands   Denmark  Netherlands
2        Spain  Portugal        Spain
3      England    France       France


In [None]:
# Netherlands – Brasil
# Pays-Bas – Portugal

In [153]:
fixtures = [
    {'home': 'Netherlands', 'away': 'Brasil'},
    {'home': 'Pays-Bas', 'away': 'Portugal'},
]
#systonic

In [154]:
df_fixture_updated = pd.DataFrame(fixtures)

In [155]:
df_fixture_with_winners = get_winner(df_fixture_updated)

In [156]:
# Afficher les résultats
print(df_fixture_with_winners)

          home      away       winner
0  Netherlands    Brasil  Netherlands
1     Pays-Bas  Portugal     Portugal


In [157]:
predict_points('Netherlands', 'Portugal')

(1.5050102639835456, 0.5824569371618901)

In [None]:
#le gagnant c'est Netherlands