# Betting Support Model (4.0 Beta Version) - La Liga

## Scrapping the data
For now we will be manually copying and pastying the tables on an Excel file. Tried using BeautifulSoup, requests, and Selenium for scrapping the data, but the JSON format in the HTML make things hard.

## Clean and Manipulate Data

In [21]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
import numpy as np

# Load the dataset
df_overall = pd.read_excel('xG_dataset_laliga.xlsx', sheet_name = "Overall")
df_home = pd.read_excel('xG_dataset_laliga.xlsx', sheet_name = "Home")
df_away = pd.read_excel('xG_dataset_laliga.xlsx', sheet_name = "Away")


# Split the values in the xG, xGA, and xPTS columns on the space character and select the first element
df_overall['xG'] = pd.to_numeric(df_overall['xG'].str.split().str[0])
df_overall['xGA'] = pd.to_numeric(df_overall['xGA'].str.split().str[0])
df_overall['xPTS'] = pd.to_numeric(df_overall['xPTS'].str.split().str[0])

df_home['xG'] = pd.to_numeric(df_home['xG'].str.split().str[0])
df_home['xGA'] = pd.to_numeric(df_home['xGA'].str.split().str[0])
df_home['xPTS'] = pd.to_numeric(df_home['xPTS'].str.split().str[0])

df_away['xG'] = pd.to_numeric(df_away['xG'].str.split().str[0])
df_away['xGA'] = pd.to_numeric(df_away['xGA'].str.split().str[0])
df_away['xPTS'] = pd.to_numeric(df_away['xPTS'].str.split().str[0])



In [22]:
# Adding the Ratios

list_metric = ["xG", "xGA", "xPTS"]
list_naming = ["xG per Game", "xGA per Game", "xPTS per Game"]

# Loop for Overall
counter = 0
while counter < len(list_metric):
    df_overall[list_naming[counter]] = df_overall[list_metric[counter]] / df_overall["M"]
    counter += 1

# Adding performance difference    

for i in range(len(df_overall)):
    xG_per_game_diff = df_overall.loc[i, "G"] / df_overall.loc[i, "M"] - df_overall.loc[i, "xG per Game"]
    xGA_per_game_diff = df_overall.loc[i, "xGA per Game"] -  df_overall.loc[i, "GA"] / df_overall.loc[i, "M"]  
    df_overall.loc[i, "xG per Game Diff"] = xG_per_game_diff
    df_overall.loc[i, "xGA per Game Diff"] = xGA_per_game_diff
    
df_overall.head(3)

Unnamed: 0,№,Team,M,W,D,L,G,GA,PTS,xG,xGA,xPTS,xG per Game,xGA per Game,xPTS per Game,xG per Game Diff,xGA per Game Diff
0,1,Barcelona,21,18,2,1,43,7,56,49.48,16.61,45.76,2.35619,0.790952,2.179048,-0.308571,0.457619
1,2,Real Madrid,22,16,3,3,46,17,51,48.14,20.83,47.89,2.188182,0.946818,2.176818,-0.097273,0.174091
2,3,Real Sociedad,22,13,4,5,32,22,43,35.7,20.84,41.25,1.622727,0.947273,1.875,-0.168182,-0.052727


In [23]:
# Loop for Home    
counter = 0
while counter < len(list_metric):
    df_home[list_naming[counter]] = df_home[list_metric[counter]] / df_home["M"]
    counter += 1

for i in range(len(df_home)):
    xG_per_game_diff = df_home.loc[i, "G"] / df_home.loc[i, "M"] - df_home.loc[i, "xG per Game"]
    xGA_per_game_diff = df_home.loc[i, "xGA per Game"] -  df_home.loc[i, "GA"] / df_home.loc[i, "M"]  
    df_home.loc[i, "xG per Game Diff"] = xG_per_game_diff
    df_home.loc[i, "xGA per Game Diff"] = xGA_per_game_diff    
    
df_home.head(3)

Unnamed: 0,№,Team,M,W,D,L,G,GA,PTS,xG,xGA,xPTS,xG per Game,xGA per Game,xPTS per Game,xG per Game Diff,xGA per Game Diff
0,1,Barcelona,10,8,2,0,22,1,26,28.46,6.27,24.6,2.846,0.627,2.46,-0.646,0.527
1,2,Real Madrid,10,7,3,0,22,7,24,24.07,8.25,23.41,2.407,0.825,2.341,-0.207,0.125
2,3,Girona,12,6,3,3,23,17,21,21.51,12.12,22.49,1.7925,1.01,1.874167,0.124167,-0.406667


In [24]:
# Loop for Away 
counter = 0
while counter < len(list_metric):
    df_away[list_naming[counter]] = df_away[list_metric[counter]] / df_away["M"]
    counter += 1

for i in range(len(df_away)):
    xG_per_game_diff = df_away.loc[i, "G"] / df_away.loc[i, "M"] - df_away.loc[i, "xG per Game"]
    xGA_per_game_diff = df_away.loc[i, "xGA per Game"] -  df_away.loc[i, "GA"] / df_away.loc[i, "M"]  
    df_away.loc[i, "xG per Game Diff"] = xG_per_game_diff
    df_away.loc[i, "xGA per Game Diff"] = xGA_per_game_diff     
    
       
df_away.head(3)

Unnamed: 0,№,Team,M,W,D,L,G,GA,PTS,xG,xGA,xPTS,xG per Game,xGA per Game,xPTS per Game,xG per Game Diff,xGA per Game Diff
0,1,Barcelona,11,10,0,1,21,6,30,21.02,10.34,21.16,1.910909,0.94,1.923636,-0.001818,0.394545
1,2,Real Madrid,12,9,0,3,24,10,27,24.07,12.58,24.49,2.005833,1.048333,2.040833,-0.005833,0.215
2,3,Real Sociedad,11,8,1,2,19,10,25,17.34,9.11,20.89,1.576364,0.828182,1.899091,0.150909,-0.080909


In [25]:
# Set display options
pd.set_option("display.max_rows", None)  # Display all rows
pd.set_option("display.max_columns", None)  # Display all columns
pd.set_option("display.width", None)  # Set display width to auto

# Merge dataframes on 'team' column
df_merged_1 = pd.merge(df_home, df_away, on='Team')
df_merged = pd.merge(df_overall, df_merged_1, on="Team")
df_merged

Unnamed: 0,№,Team,M,W,D,L,G,GA,PTS,xG,xGA,xPTS,xG per Game,xGA per Game,xPTS per Game,xG per Game Diff,xGA per Game Diff,№_x,M_x,W_x,D_x,L_x,G_x,GA_x,PTS_x,xG_x,xGA_x,xPTS_x,xG per Game_x,xGA per Game_x,xPTS per Game_x,xG per Game Diff_x,xGA per Game Diff_x,№_y,M_y,W_y,D_y,L_y,G_y,GA_y,PTS_y,xG_y,xGA_y,xPTS_y,xG per Game_y,xGA per Game_y,xPTS per Game_y,xG per Game Diff_y,xGA per Game Diff_y
0,1,Barcelona,21,18,2,1,43,7,56,49.48,16.61,45.76,2.35619,0.790952,2.179048,-0.308571,0.457619,1,10,8,2,0,22,1,26,28.46,6.27,24.6,2.846,0.627,2.46,-0.646,0.527,1,11,10,0,1,21,6,30,21.02,10.34,21.16,1.910909,0.94,1.923636,-0.001818,0.394545
1,2,Real Madrid,22,16,3,3,46,17,51,48.14,20.83,47.89,2.188182,0.946818,2.176818,-0.097273,0.174091,2,10,7,3,0,22,7,24,24.07,8.25,23.41,2.407,0.825,2.341,-0.207,0.125,2,12,9,0,3,24,10,27,24.07,12.58,24.49,2.005833,1.048333,2.040833,-0.005833,0.215
2,3,Real Sociedad,22,13,4,5,32,22,43,35.7,20.84,41.25,1.622727,0.947273,1.875,-0.168182,-0.052727,9,11,5,3,3,13,12,18,18.36,11.73,20.36,1.669091,1.066364,1.850909,-0.487273,-0.024545,3,11,8,1,2,19,10,25,17.34,9.11,20.89,1.576364,0.828182,1.899091,0.150909,-0.080909
3,4,Atletico Madrid,21,11,5,5,30,17,38,33.98,23.46,34.76,1.618095,1.117143,1.655238,-0.189524,0.307619,13,10,4,3,3,15,10,15,17.46,10.96,17.07,1.746,1.096,1.707,-0.246,0.096,4,11,7,2,2,15,7,23,16.52,12.51,17.7,1.501818,1.137273,1.609091,-0.138182,0.500909
4,5,Real Betis,22,11,4,7,29,23,37,34.28,30.49,32.33,1.558182,1.385909,1.469545,-0.24,0.340455,5,11,6,2,3,18,12,20,20.33,14.22,18.03,1.848182,1.292727,1.639091,-0.211818,0.201818,5,11,5,2,4,11,11,17,13.95,16.27,14.3,1.268182,1.479091,1.3,-0.268182,0.479091
5,6,Rayo Vallecano,21,9,6,6,28,23,33,25.63,27.23,27.68,1.220476,1.296667,1.318095,0.112857,0.201429,11,10,5,2,3,15,11,17,14.05,11.44,14.63,1.405,1.144,1.463,0.095,0.044,6,11,4,4,3,13,12,16,11.58,15.79,13.05,1.052727,1.435455,1.186364,0.129091,0.344545
6,7,Athletic Club,21,9,5,7,31,22,32,31.72,18.43,37.03,1.510476,0.877619,1.763333,-0.034286,-0.17,4,11,6,2,3,16,7,20,19.02,7.41,23.41,1.729091,0.673636,2.128182,-0.274545,0.037273,7,10,3,3,4,15,15,12,12.7,11.02,13.62,1.27,1.102,1.362,0.23,-0.398
7,8,Villarreal,22,9,4,9,24,22,31,31.65,31.49,31.31,1.438636,1.431364,1.423182,-0.347727,0.431364,7,10,6,1,3,14,8,19,18.96,9.11,19.57,1.896,0.911,1.957,-0.496,0.111,9,12,3,3,6,10,14,12,12.69,22.37,11.75,1.0575,1.864167,0.979167,-0.224167,0.6975
8,9,Mallorca,22,9,4,9,20,22,31,22.67,24.66,28.67,1.030455,1.120909,1.303182,-0.121364,0.120909,6,11,6,2,3,12,8,20,13.24,9.45,18.11,1.203636,0.859091,1.646364,-0.112727,0.131818,11,11,3,2,6,8,14,11,9.43,15.21,10.56,0.857273,1.382727,0.96,-0.13,0.11
9,10,Osasuna,22,8,6,8,19,21,30,25.25,28.3,27.12,1.147727,1.286364,1.232727,-0.284091,0.331818,10,11,6,0,5,12,11,18,17.05,12.81,17.48,1.55,1.164545,1.589091,-0.459091,0.164545,8,11,2,6,3,7,10,12,8.2,15.49,9.64,0.745455,1.408182,0.876364,-0.109091,0.499091


Tomorrow games include: 

- Elche (home) vs Espanyol (away)
- Rayo Vallecano (home) vs Sevilla (away)
- Atletico Madrid (home) vs Athletic Club (away)
- Barcelona (home) vs Cadiz (away)

In [26]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

### Predictions

To determine the likelihood of the Barcelona vs Cadiz game to have over 2 goals based on the provided data, we can use the Poisson distribution, which is commonly used in soccer to model the number of goals scored in a match.

The Poisson distribution has one parameter, λ, which represents the expected number of goals scored by a team in a match. The probability of a team scoring exactly k goals is given by the following formula:

P(k;λ) = (e^-λ * λ^k) / k!

To calculate the probability of a match having over 2 goals, we need to calculate the probability of both teams scoring 1 or more goals, which is given by:

P(Barcelona > 0; λB) * P(Cadiz > 0; λC)

where P(Barcelona > 0; λB) is the probability of Barcelona scoring at least one goal, which is equal to:

P(Barcelona > 0; λB) = 1 - P(Barcelona = 0; λB)

and P(Barcelona = 0; λB) is the probability of Barcelona not scoring any goals, which is given by:

P(Barcelona = 0; λB) = e^-λB

where λB is the expected number of goals scored by Barcelona in the match. We can estimate λB as the product of Barcelona's overall xG and Cadiz's overall xGA, which gives:

λB = 2.35 * 1.77 = 4.1655

Similarly, we can estimate λC as the product of Cadiz's overall xG and Barcelona's overall xGA, which gives:

λC = 0.91 * 0.79 = 0.719

Using these values, we can calculate the probability of both teams scoring 1 or more goals as:

P(Barcelona > 0; λB) * P(Cadiz > 0; λC) = (1 - e^-λB) * (1 - e^-λC) = 0.925

This means that the probability of the match having over 2 goals is 92.5%.

In [27]:
import numpy as np
from scipy.stats import poisson

# Assuming the main dataframe is named "df_merged"

# Get Barcelona home stats
barcelona_home_stats = df_merged.loc[df_merged['Team'] == 'Barcelona', ['xG per Game_x', 'xGA per Game_x', 'xG per Game Diff_x', 'xGA per Game Diff_x']]
barcelona_home_xG = barcelona_home_stats['xG per Game_x'].values[0]
barcelona_home_xGA = barcelona_home_stats['xGA per Game_x'].values[0]
barcelona_home_xG_diff = barcelona_home_stats['xG per Game Diff_x'].values[0]
barcelona_home_xGA_diff = barcelona_home_stats['xGA per Game Diff_x'].values[0]

# Get Cadiz away stats
cadiz_away_stats = df_merged.loc[df_merged['Team'] == 'Cadiz', ['xG per Game_y', 'xGA per Game_y', 'xG per Game Diff_y', 'xGA per Game Diff_y']]
cadiz_away_xG = cadiz_away_stats['xG per Game_y'].values[0]
cadiz_away_xGA = cadiz_away_stats['xGA per Game_y'].values[0]
cadiz_away_xG_diff = cadiz_away_stats['xG per Game Diff_y'].values[0]
cadiz_away_xGA_diff = cadiz_away_stats['xGA per Game Diff_y'].values[0]


# Calculate lambda for Barcelona scoring at home and Cadiz conceding away
lambda_barcelona = (barcelona_home_xG + barcelona_home_xG_diff) * (cadiz_away_xGA - cadiz_away_xGA_diff)
lambda_cadiz = (cadiz_away_xG + cadiz_away_xG_diff) * (barcelona_home_xGA - barcelona_home_xGA_diff)

# Calculate the probability of Barcelona scoring x goals at home and Cadiz conceding x goals away
prob = 0
for i in range(3):
    for j in range(3):
        prob += poisson.pmf(i, lambda_barcelona) * poisson.pmf(j, lambda_cadiz)
        
# Calculate the probability of over 2 goals
prob_over_2 = 1 - poisson.cdf(2, lambda_barcelona) * poisson.cdf(2, lambda_cadiz)

# Calculate total expected goals
lambda_total = lambda_barcelona + lambda_cadiz

print("The probability of the Barcelona vs Cadiz game ending with over 2 goals is: {:.3f}".format(prob_over_2))
print("Total expected goals anticipated:", lambda_total)

The probability of the Barcelona vs Cadiz game ending with over 2 goals is: 0.756
Total expected goals anticipated: 4.0200000000000005


In [28]:
# Run the simulation 1 times
for n in range(1):
    print(f"Simulation {n+1}:")
    for i in range(3):
        for j in range(3):
            score_prob = poisson.pmf(i, lambda_barcelona) * poisson.pmf(j, lambda_cadiz)
            print(f"Barcelona {i} - {j} Cadiz ({score_prob:.3f})")

Simulation 1:
Barcelona 0 - 0 Cadiz (0.018)
Barcelona 0 - 1 Cadiz (0.001)
Barcelona 0 - 2 Cadiz (0.000)
Barcelona 1 - 0 Cadiz (0.071)
Barcelona 1 - 1 Cadiz (0.004)
Barcelona 1 - 2 Cadiz (0.000)
Barcelona 2 - 0 Cadiz (0.141)
Barcelona 2 - 1 Cadiz (0.008)
Barcelona 2 - 2 Cadiz (0.000)


## Create the Function

In [35]:
def predict_game_result(home_team, away_team, df_merged):
    from scipy.stats import poisson

    # Get home team stats
    home_stats = df_merged.loc[df_merged['Team'] == home_team, ['xG per Game_x', 'xGA per Game_x', 'xG per Game Diff_x', 'xGA per Game Diff_x']]
    home_xG = home_stats['xG per Game_x'].values[0]
    home_xGA = home_stats['xGA per Game_x'].values[0]
    home_xG_diff = home_stats['xG per Game Diff_x'].values[0]
    home_xGA_diff = home_stats['xGA per Game Diff_x'].values[0]

    # Get away team stats
    away_stats = df_merged.loc[df_merged['Team'] == away_team, ['xG per Game_y', 'xGA per Game_y', 'xG per Game Diff_y', 'xGA per Game Diff_y']]
    away_xG = away_stats['xG per Game_y'].values[0]
    away_xGA = away_stats['xGA per Game_y'].values[0]
    away_xG_diff = away_stats['xG per Game Diff_y'].values[0]
    away_xGA_diff = away_stats['xGA per Game Diff_y'].values[0]

    # Calculate lambda for home team scoring at home and away team conceding away
    lambda_home = (home_xG + home_xG_diff) * (away_xGA - away_xGA_diff)
    lambda_away = (away_xG + away_xG_diff) * (home_xGA - home_xGA_diff)

    # Calculate the probability of the home team scoring x goals at home and the away team conceding x goals away
    prob = 0
    for i in range(3):
        for j in range(3):
            prob += poisson.pmf(i, lambda_home) * poisson.pmf(j, lambda_away)
    
    max_score=6
    for n in range(1):
        print(f"Simulation {n+1}:")
        for i in range(max_score+1):
            for j in range(max_score+1):
                score_prob = poisson.pmf(i, lambda_home) * poisson.pmf(j, lambda_away)
                print(f"{home_team} {i} - {j} {away_team} ({score_prob:.3f})")
      
    

    # Calculate the probability of over 2 goals
    prob_over_2 = 1 - poisson.cdf(2, lambda_home) * poisson.cdf(2, lambda_away)

    # Calculate total expected goals
    lambda_total = lambda_home + lambda_away
    

    return {"home_team": home_team, "away_team": away_team, "lambda_home": lambda_home, "lambda_away": lambda_away, 
            "prob_over_2_goals": prob_over_2, "expected_goals": lambda_total}


# Predictions

In [30]:
predict_game_result('Barcelona', 'Cadiz', df_merged)

Simulation 1:
Barcelona 0 - 0 Cadiz (0.018)
Barcelona 0 - 1 Cadiz (0.001)
Barcelona 0 - 2 Cadiz (0.000)
Barcelona 0 - 3 Cadiz (0.000)
Barcelona 0 - 4 Cadiz (0.000)
Barcelona 1 - 0 Cadiz (0.071)
Barcelona 1 - 1 Cadiz (0.004)
Barcelona 1 - 2 Cadiz (0.000)
Barcelona 1 - 3 Cadiz (0.000)
Barcelona 1 - 4 Cadiz (0.000)
Barcelona 2 - 0 Cadiz (0.141)
Barcelona 2 - 1 Cadiz (0.008)
Barcelona 2 - 2 Cadiz (0.000)
Barcelona 2 - 3 Cadiz (0.000)
Barcelona 2 - 4 Cadiz (0.000)
Barcelona 3 - 0 Cadiz (0.186)
Barcelona 3 - 1 Cadiz (0.011)
Barcelona 3 - 2 Cadiz (0.000)
Barcelona 3 - 3 Cadiz (0.000)
Barcelona 3 - 4 Cadiz (0.000)
Barcelona 4 - 0 Cadiz (0.184)
Barcelona 4 - 1 Cadiz (0.011)
Barcelona 4 - 2 Cadiz (0.000)
Barcelona 4 - 3 Cadiz (0.000)
Barcelona 4 - 4 Cadiz (0.000)


{'home_team': 'Barcelona',
 'away_team': 'Cadiz',
 'lambda_home': 3.9600000000000004,
 'lambda_away': 0.059999999999999984,
 'prob_over_2_goals': 0.7559852854220809,
 'expected_goals': 4.0200000000000005}

In [31]:
predict_game_result('Getafe', 'Valencia', df_merged)

Simulation 1:
Getafe 0 - 0 Valencia (0.101)
Getafe 0 - 1 Valencia (0.103)
Getafe 0 - 2 Valencia (0.052)
Getafe 0 - 3 Valencia (0.018)
Getafe 0 - 4 Valencia (0.005)
Getafe 1 - 0 Valencia (0.129)
Getafe 1 - 1 Valencia (0.131)
Getafe 1 - 2 Valencia (0.067)
Getafe 1 - 3 Valencia (0.023)
Getafe 1 - 4 Valencia (0.006)
Getafe 2 - 0 Valencia (0.082)
Getafe 2 - 1 Valencia (0.083)
Getafe 2 - 2 Valencia (0.042)
Getafe 2 - 3 Valencia (0.014)
Getafe 2 - 4 Valencia (0.004)
Getafe 3 - 0 Valencia (0.035)
Getafe 3 - 1 Valencia (0.035)
Getafe 3 - 2 Valencia (0.018)
Getafe 3 - 3 Valencia (0.006)
Getafe 3 - 4 Valencia (0.002)
Getafe 4 - 0 Valencia (0.011)
Getafe 4 - 1 Valencia (0.011)
Getafe 4 - 2 Valencia (0.006)
Getafe 4 - 3 Valencia (0.002)
Getafe 4 - 4 Valencia (0.000)


{'home_team': 'Getafe',
 'away_team': 'Valencia',
 'lambda_home': 1.2727272727272727,
 'lambda_away': 1.0181818181818183,
 'prob_over_2_goals': 0.20889504627016475,
 'expected_goals': 2.290909090909091}

## Entire Matchday Predictions

In [32]:
import pandas as pd

home_teams = ["Girona", "Real Sociedad", "Real Betis", "Mallorca", "Osasuna", "Elche", "Rayo Vallecano", "Atletico Madrid", "Barcelona", "Getafe"]
away_teams = ["Almeria", "Celta Vigo", "Real Valladolid", "Villarreal", "Real Madrid", "Espanyol", "Sevilla", "Athletic Club", "Cadiz", "Valencia"]

# Create a dataframe with the home and away teams for each matchday
df_matchdays = pd.DataFrame({'home_team': home_teams, 'away_team': away_teams})

# Print the dataframe
print(df_matchdays)


         home_team        away_team
0           Girona          Almeria
1    Real Sociedad       Celta Vigo
2       Real Betis  Real Valladolid
3         Mallorca       Villarreal
4          Osasuna      Real Madrid
5            Elche         Espanyol
6   Rayo Vallecano          Sevilla
7  Atletico Madrid    Athletic Club
8        Barcelona            Cadiz
9           Getafe         Valencia


In [36]:
for i, row in df_matchdays.iterrows():
    home_team = row['home_team']
    away_team = row['away_team']
    print(f"Match {i+1}: {home_team} vs {away_team}")
    print(" ")
    result = predict_game_result(home_team, away_team, df_merged)
    print(result)
    print(" ")


Match 1: Girona vs Almeria
 
Simulation 1:
Girona 0 - 0 Almeria (0.005)
Girona 0 - 1 Almeria (0.005)
Girona 0 - 2 Almeria (0.002)
Girona 0 - 3 Almeria (0.001)
Girona 0 - 4 Almeria (0.000)
Girona 0 - 5 Almeria (0.000)
Girona 0 - 6 Almeria (0.000)
Girona 1 - 0 Almeria (0.020)
Girona 1 - 1 Almeria (0.021)
Girona 1 - 2 Almeria (0.011)
Girona 1 - 3 Almeria (0.004)
Girona 1 - 4 Almeria (0.001)
Girona 1 - 5 Almeria (0.000)
Girona 1 - 6 Almeria (0.000)
Girona 2 - 0 Almeria (0.043)
Girona 2 - 1 Almeria (0.045)
Girona 2 - 2 Almeria (0.023)
Girona 2 - 3 Almeria (0.008)
Girona 2 - 4 Almeria (0.002)
Girona 2 - 5 Almeria (0.000)
Girona 2 - 6 Almeria (0.000)
Girona 3 - 0 Almeria (0.063)
Girona 3 - 1 Almeria (0.065)
Girona 3 - 2 Almeria (0.033)
Girona 3 - 3 Almeria (0.011)
Girona 3 - 4 Almeria (0.003)
Girona 3 - 5 Almeria (0.001)
Girona 3 - 6 Almeria (0.000)
Girona 4 - 0 Almeria (0.069)
Girona 4 - 1 Almeria (0.071)
Girona 4 - 2 Almeria (0.036)
Girona 4 - 3 Almeria (0.013)
Girona 4 - 4 Almeria (0.003)


## Export the Output into a Word Document

In [34]:
from docx import Document
from docx.shared import Inches
from datetime import date

# Create a new Word document
doc = Document()

# Add a title with today's date
doc.add_heading(f"La Liga predictions - {date.today().strftime('%B %d, %Y')}", 0)

# Loop through the matchdays and add the predictions to the document
for i, matchday in df_matchdays.iterrows():
    home_team = matchday['home_team']
    away_team = matchday['away_team']
    prediction = predict_game_result(home_team, away_team, df_merged)

    # Add a heading with the match details
    doc.add_heading(f"Match {i+1}: {home_team} vs. {away_team}", level=1)

    # Add the simulation results
    doc.add_paragraph("Simulation Results:")
    for sim in range(3):
        for j in range(3):
            score_prob = poisson.pmf(sim, prediction['lambda_home']) * poisson.pmf(j, prediction['lambda_away'])
            doc.add_paragraph(f"{home_team} {sim} - {j} {away_team} ({score_prob:.3f})")

    # Add the prediction results
    doc.add_paragraph("Prediction Results:")
    doc.add_paragraph(f"Expected goals: {prediction['expected_goals']:.2f}")
    doc.add_paragraph(f"Probability of over 2 goals: {prediction['prob_over_2_goals']:.2f}")

    # Add a page break after each match
    doc.add_page_break()

# Save the document with the current date in the filename
doc.save(f"La Liga matchday_predictions_{date.today().strftime('%Y-%m-%d')}.docx")


Simulation 1:
Girona 0 - 0 Almeria (0.005)
Girona 0 - 1 Almeria (0.005)
Girona 0 - 2 Almeria (0.002)
Girona 0 - 3 Almeria (0.001)
Girona 0 - 4 Almeria (0.000)
Girona 1 - 0 Almeria (0.020)
Girona 1 - 1 Almeria (0.021)
Girona 1 - 2 Almeria (0.011)
Girona 1 - 3 Almeria (0.004)
Girona 1 - 4 Almeria (0.001)
Girona 2 - 0 Almeria (0.043)
Girona 2 - 1 Almeria (0.045)
Girona 2 - 2 Almeria (0.023)
Girona 2 - 3 Almeria (0.008)
Girona 2 - 4 Almeria (0.002)
Girona 3 - 0 Almeria (0.063)
Girona 3 - 1 Almeria (0.065)
Girona 3 - 2 Almeria (0.033)
Girona 3 - 3 Almeria (0.011)
Girona 3 - 4 Almeria (0.003)
Girona 4 - 0 Almeria (0.069)
Girona 4 - 1 Almeria (0.071)
Girona 4 - 2 Almeria (0.036)
Girona 4 - 3 Almeria (0.013)
Girona 4 - 4 Almeria (0.003)
Simulation 1:
Real Sociedad 0 - 0 Celta Vigo (0.043)
Real Sociedad 0 - 1 Celta Vigo (0.043)
Real Sociedad 0 - 2 Celta Vigo (0.021)
Real Sociedad 0 - 3 Celta Vigo (0.007)
Real Sociedad 0 - 4 Celta Vigo (0.002)
Real Sociedad 1 - 0 Celta Vigo (0.093)
Real Sociedad