# Betting Model

## Web Scrapping

<div class="alert alert-block alert-success">

In [7]:
# import libraries
import requests
from bs4 import BeautifulSoup
import json
import pandas as pd
import numpy as np

In [8]:
la_liga_url = 'https://understat.com/league/La_liga/2023'
epl_url = 'https://understat.com/league/EPL/2023'
bundesliga_url = 'https://understat.com/league/Bundesliga/2023'
serie_a_url = 'https://understat.com/league/Serie_A/2023'
ligue1_url = 'https://understat.com/league/Ligue_1/2023'

In [9]:
def scraper(base_url):
    ###### Sending the request to the Web Server #######
    url = base_url
    
    res = requests.get(url)
    soup = BeautifulSoup(res.content, 'lxml')
    scripts = soup.find_all('script')

    strings = scripts[2].string

    ind_start = strings.index("('")+2 # 30, since it is the number of characters from the start of the line to the start of the json data
    ind_end = strings.index("')")
    json_data = strings[ind_start:ind_end]

    json_data = json_data.encode('utf8').decode('unicode_escape')

    #convert string to json format
    data = json.loads(json_data)
    
    ##################### Overall Standings #######################
    team_statistics = {}

    # loop though the outer dictionary (teams)
    for team_id, team_data in data.items():
        team_stats = {
            'matches': 0.0,
            'wins': 0.0,
            'draws': 0.0,
            'losses': 0.0,
            'goals': 0.0,
            'goals_against': 0.0,
            'points': 0.0,
            'xG': 0.0,
            'xGA': 0.0,
            'xPTS': 0.0
        }

        # loop through each game within the same team
        for game in team_data['history']:
            team_stats['matches'] += 1
            team_stats['wins'] += game['wins']
            team_stats['draws'] += game['draws']
            team_stats['losses'] += game['loses']
            team_stats['goals'] += game['scored']
            team_stats['goals_against'] += game['missed']
            team_stats['points'] += game['pts']
            team_stats['xG'] += game['xG']
            team_stats['xGA'] += game['xGA']
            team_stats['xPTS'] += game['xpts']


        # store the team statistics in the dictionary
        team_statistics[team_data['title']] = team_stats
        
        
        # Rename columns
        # This is overall STANDING
        # Still need to do Home and AWAY
        columns = {'matches':'M', 'wins':'W', 'draws':'D', 
                   'losses':'L', 'goals':'G', 'goals_against':'GA', 
                   'points':'PTS'}#, 'xG'xG', 'xGA', 'xPTS'}

        overall_df = pd.DataFrame(team_statistics).T # transpose
        overall_df = overall_df.rename(columns=columns)
        overall_df.reset_index(inplace=True)
        overall_df.rename(columns={'index':'Team'}, inplace=True)
        overall_df.sort_values(by='PTS', ascending=False, inplace=True)
        overall_df.reset_index(inplace=True, drop=True)
        
        
    ##################### Home Standings #######################
    
    team_stats = {}

    # loop the outer dictionary (teams)
    for team_id, team_data in data.items():
        team_stats_h = {
            'matches': 0.0,
            'wins': 0.0,
            'draws': 0.0,
            'losses': 0.0,
            'goals': 0.0,
            'goals_against': 0.0,
            'points': 0.0,
            'xG': 0.0,
            'xGA': 0.0,
            'xPTS': 0.0
        }

        # loop through each game within the same team
        for game in team_data['history']:
            if game['h_a'] == 'h':
                team_stats_h['matches'] += 1
                team_stats_h['wins'] += game['wins']
                team_stats_h['draws'] += game['draws']
                team_stats_h['losses'] += game['loses']
                team_stats_h['goals'] += game['scored']
                team_stats_h['goals_against'] += game['missed']
                team_stats_h['points'] += game['pts']
                team_stats_h['xG'] += game['xG']
                team_stats_h['xGA'] += game['xGA']
                team_stats_h['xPTS'] += game['xpts']


        # store the team statistics in the dictionary
        team_stats[team_data['title']] = team_stats_h

    home_df = pd.DataFrame(team_stats).T # transpose

    columns = {'matches':'M', 'wins':'W', 'draws':'D', 
               'losses':'L', 'goals':'G', 'goals_against':'GA', 
               'points':'PTS'}#, 'xG'xG', 'xGA', 'xPTS'}


    home_df = home_df.rename(columns=columns)
    home_df.reset_index(inplace=True)
    home_df.rename(columns={'index':'Team'}, inplace=True)
    
    
    
    ##################### Away Standings #######################
    
    team_stats = {}

    # loop the outer dictionary (teams)
    for team_id, team_data in data.items():
        team_stats_h = {
            'matches': 0.0,
            'wins': 0.0,
            'draws': 0.0,
            'losses': 0.0,
            'goals': 0.0,
            'goals_against': 0.0,
            'points': 0.0,
            'xG': 0.0,
            'xGA': 0.0,
            'xPTS': 0.0
        }

        # loop through each game within the same team
        for game in team_data['history']:
            if game['h_a'] == 'a':
                team_stats_h['matches'] += 1
                team_stats_h['wins'] += game['wins']
                team_stats_h['draws'] += game['draws']
                team_stats_h['losses'] += game['loses']
                team_stats_h['goals'] += game['scored']
                team_stats_h['goals_against'] += game['missed']
                team_stats_h['points'] += game['pts']
                team_stats_h['xG'] += game['xG']
                team_stats_h['xGA'] += game['xGA']
                team_stats_h['xPTS'] += game['xpts']


        # store the team statistics in the dictionary
        team_stats[team_data['title']] = team_stats_h

    away_df = pd.DataFrame(team_stats).T # transpose

    columns = {'matches':'M', 'wins':'W', 'draws':'D', 
               'losses':'L', 'goals':'G', 'goals_against':'GA', 
               'points':'PTS'}#, 'xG'xG', 'xGA', 'xPTS'}


    away_df = away_df.rename(columns=columns)
    away_df.reset_index(inplace=True)
    away_df.rename(columns={'index':'Team'}, inplace=True)
    
    ###################### Adding Ratios ################
    list_metric = ["xG", "xGA", "xPTS"]
    list_naming = ["xG per Game", "xGA per Game", "xPTS per Game"]
    
    # ----------------------  (overall)
    counter = 0
    while counter < len(list_metric):
        overall_df[list_naming[counter]] = overall_df[list_metric[counter]] / overall_df["M"]
        counter += 1
        
    # adding performance difference  
    for i in range(len(overall_df)):
        xG_per_game_diff = overall_df.loc[i, "G"] / overall_df.loc[i, "M"] - overall_df.loc[i, "xG per Game"]
        xGA_per_game_diff = overall_df.loc[i, "xGA per Game"] -  overall_df.loc[i, "GA"] / overall_df.loc[i, "M"]  
        overall_df.loc[i, "xG per Game Diff"] = xG_per_game_diff
        overall_df.loc[i, "xGA per Game Diff"] = xGA_per_game_diff
        
    # ----------------------  (home)
    counter = 0
    while counter < len(list_metric):
        home_df[list_naming[counter]] = home_df[list_metric[counter]] / home_df["M"]
        counter += 1
    
    for i in range(len(home_df)):
        xG_per_game_diff = home_df.loc[i, "G"] / home_df.loc[i, "M"] - home_df.loc[i, "xG per Game"]
        xGA_per_game_diff = home_df.loc[i, "xGA per Game"] -  home_df.loc[i, "GA"] / home_df.loc[i, "M"]  
        home_df.loc[i, "xG per Game Diff"] = xG_per_game_diff
        home_df.loc[i, "xGA per Game Diff"] = xGA_per_game_diff
        
    # ----------------------  (away)    
    counter = 0
    while counter < len(list_metric):
        away_df[list_naming[counter]] = away_df[list_metric[counter]] / away_df["M"]
        counter += 1
        
    for i in range(len(away_df)):
        xG_per_game_diff = away_df.loc[i, "G"] / away_df.loc[i, "M"] - away_df.loc[i, "xG per Game"]
        xGA_per_game_diff = away_df.loc[i, "xGA per Game"] -  away_df.loc[i, "GA"] / away_df.loc[i, "M"]  
        away_df.loc[i, "xG per Game Diff"] = xG_per_game_diff
        away_df.loc[i, "xGA per Game Diff"] = xGA_per_game_diff
    
    
    '''
    # ----------------------  (overall)
    overall_df['xG per Game'] = overall_df['xG'] / overall_df['M']
    overall_df['xGA per Game'] = overall_df['xGA'] / overall_df['M']
    overall_df['xPTS per Game'] = overall_df['xPTS'] / overall_df['M']
    
    # ----------------------  (home)
    home_df['xG per Game'] = home_df['xG'] / home_df['M']
    home_df['xGA per Game'] = home_df['xGA'] / home_df['M']
    home_df['xPTS per Game'] = home_df['xPTS'] / home_df['M']
    
    # ----------------------  (home)
    away_df['xG per Game'] = away_df['xG'] / away_df['M']
    away_df['xGA per Game'] = away_df['xGA'] / away_df['M']
    away_df['xPTS per Game'] = away_df['xPTS'] / away_df['M']
    '''
    ###################### Merging the Dataframes ################
      
    # Set display options
    pd.set_option("display.max_rows", None)  # Display all rows
    pd.set_option("display.max_columns", None)  # Display all columns
    pd.set_option("display.width", None)  # Set display width to auto
        
    df_merged = pd.merge(home_df, away_df, on='Team')
    df_merged = pd.merge(overall_df, df_merged, on='Team')
    
    return df_merged
    

In [10]:
la_liga_df = scraper(la_liga_url)
epl_df = scraper(epl_url)
bundesliga_df = scraper(bundesliga_url)
seriea_df = scraper(serie_a_url)
ligue1_df = scraper(ligue1_url)

In [11]:
#la_liga_df = scraper(la_liga_url)
epl_df

Unnamed: 0,Team,M,W,D,L,G,GA,PTS,xG,xGA,xPTS,xG per Game,xGA per Game,xPTS per Game,xG per Game Diff,xGA per Game Diff,M_x,W_x,D_x,L_x,G_x,GA_x,PTS_x,xG_x,xGA_x,xPTS_x,xG per Game_x,xGA per Game_x,xPTS per Game_x,xG per Game Diff_x,xGA per Game Diff_x,M_y,W_y,D_y,L_y,G_y,GA_y,PTS_y,xG_y,xGA_y,xPTS_y,xG per Game_y,xGA per Game_y,xPTS per Game_y,xG per Game Diff_y,xGA per Game Diff_y
0,Liverpool,20.0,13.0,6.0,1.0,43.0,18.0,45.0,46.31066,24.879013,38.7915,2.315533,1.243951,1.939575,-0.165533,0.343951,10.0,8.0,2.0,0.0,26.0,8.0,26.0,29.032311,9.631729,23.9909,2.903231,0.963173,2.39909,-0.303231,0.163173,10.0,5.0,4.0,1.0,17.0,10.0,19.0,17.278349,15.247284,14.8006,1.727835,1.524728,1.48006,-0.027835,0.524728
1,Aston Villa,20.0,13.0,3.0,4.0,43.0,27.0,42.0,38.525512,27.820772,35.4246,1.926276,1.391039,1.77123,0.223724,0.041039,10.0,9.0,1.0,0.0,29.0,8.0,28.0,22.709539,9.812138,22.028,2.270954,0.981214,2.2028,0.629046,0.181214,10.0,4.0,2.0,4.0,14.0,19.0,14.0,15.815973,18.008634,13.3966,1.581597,1.800863,1.33966,-0.181597,-0.099137
2,Arsenal,20.0,12.0,4.0,4.0,37.0,20.0,40.0,40.560137,19.714644,39.24,2.028007,0.985732,1.962,-0.178007,-0.014268,10.0,7.0,2.0,1.0,22.0,10.0,23.0,23.241428,9.546847,21.6868,2.324143,0.954685,2.16868,-0.124143,-0.045315,10.0,5.0,2.0,3.0,15.0,10.0,17.0,17.318709,10.167797,17.5532,1.731871,1.01678,1.75532,-0.231871,0.01678
3,Manchester City,19.0,12.0,4.0,3.0,45.0,21.0,40.0,42.78687,18.502703,40.8137,2.251941,0.973826,2.148089,0.116481,-0.131437,9.0,6.0,3.0,0.0,24.0,9.0,21.0,17.290899,7.169729,19.5832,1.921211,0.796637,2.175911,0.745456,-0.203363,10.0,6.0,1.0,3.0,21.0,12.0,19.0,25.495971,11.332974,21.2305,2.549597,1.133297,2.12305,-0.449597,-0.066703
4,Tottenham,20.0,12.0,3.0,5.0,42.0,29.0,39.0,37.680233,36.661317,29.4302,1.884012,1.833066,1.47151,0.215988,0.383066,10.0,7.0,0.0,3.0,20.0,13.0,21.0,21.445518,18.482443,16.2365,2.144552,1.848244,1.62365,-0.144552,0.548244,10.0,5.0,3.0,2.0,22.0,16.0,18.0,16.234715,18.178874,13.1937,1.623472,1.817887,1.31937,0.576528,0.217887
5,West Ham,20.0,10.0,4.0,6.0,33.0,30.0,34.0,29.407577,36.487184,23.8838,1.470379,1.824359,1.19419,0.179621,0.324359,10.0,5.0,3.0,2.0,17.0,10.0,18.0,14.102105,15.234944,13.638,1.410211,1.523494,1.3638,0.289789,0.523494,10.0,5.0,1.0,4.0,16.0,20.0,16.0,15.305472,21.25224,10.2458,1.530547,2.125224,1.02458,0.069453,0.125224
6,Manchester United,20.0,10.0,1.0,9.0,22.0,27.0,31.0,29.298176,35.559991,23.7815,1.464909,1.778,1.189075,-0.364909,0.428,10.0,6.0,0.0,4.0,13.0,16.0,18.0,18.375659,17.952939,14.7023,1.837566,1.795294,1.47023,-0.537566,0.195294,10.0,4.0,1.0,5.0,9.0,11.0,13.0,10.922517,17.607052,9.0792,1.092252,1.760705,0.90792,-0.192252,0.660705
7,Brighton,20.0,8.0,7.0,5.0,38.0,33.0,31.0,36.654884,31.77391,30.499,1.832744,1.588696,1.52495,0.067256,-0.061304,10.0,5.0,4.0,1.0,22.0,14.0,19.0,22.19801,15.581633,18.0344,2.219801,1.558163,1.80344,-0.019801,0.158163,10.0,3.0,3.0,4.0,16.0,19.0,12.0,14.456874,16.192277,12.4646,1.445687,1.619228,1.24646,0.154313,-0.280772
8,Newcastle United,20.0,9.0,2.0,9.0,39.0,29.0,29.0,43.620299,33.921538,34.5864,2.181015,1.696077,1.72932,-0.231015,0.246077,10.0,8.0,0.0,2.0,23.0,7.0,24.0,27.51035,10.370248,23.6653,2.751035,1.037025,2.36653,-0.451035,0.337025,10.0,1.0,2.0,7.0,16.0,22.0,5.0,16.109949,23.55129,10.9211,1.610995,2.355129,1.09211,-0.010995,0.155129
9,Chelsea,20.0,8.0,4.0,8.0,34.0,31.0,28.0,41.962153,30.572709,34.3883,2.098108,1.528635,1.719415,-0.398108,-0.021365,10.0,4.0,3.0,3.0,17.0,14.0,15.0,23.40343,13.036766,20.2835,2.340343,1.303677,2.02835,-0.640343,-0.096323,10.0,4.0,1.0,5.0,17.0,17.0,13.0,18.558723,17.535943,14.1048,1.855872,1.753594,1.41048,-0.155872,0.053594


In [12]:
# send df_merged to the TotalStandings(Cleansed) table

la_liga_df.to_excel('/Users/enzovillafuerte/Desktop/Python_Projects/Football Bettting Model/TotalStandings(Cleansed)/La Liga.xlsx', index=False)
epl_df.to_excel('/Users/enzovillafuerte/Desktop/Python_Projects/Football Bettting Model/TotalStandings(Cleansed)/EPL.xlsx', index=False)
bundesliga_df.to_excel('/Users/enzovillafuerte/Desktop/Python_Projects/Football Bettting Model/TotalStandings(Cleansed)/Bundesliga.xlsx', index=False)
ligue1_df.to_excel('/Users/enzovillafuerte/Desktop/Python_Projects/Football Bettting Model/TotalStandings(Cleansed)/Ligue 1.xlsx', index=False)
seriea_df.to_excel('/Users/enzovillafuerte/Desktop/Python_Projects/Football Bettting Model/TotalStandings(Cleansed)/Serie A.xlsx', index=False)

## Poisson Model
<div class="alert alert-block alert-success">

In [13]:
def predict_game_result(home_team, away_team, df_merged):
    from scipy.stats import poisson

    # getting home team stats
    home_stats = df_merged.loc[df_merged['Team'] == home_team, ['xG per Game_x', 'xGA per Game_x', 'xG per Game Diff_x', 'xGA per Game Diff_x']]
    home_xG = home_stats['xG per Game_x'].values[0]
    home_xGA = home_stats['xGA per Game_x'].values[0]
    home_xG_diff = home_stats['xG per Game Diff_x'].values[0]
    home_xGA_diff = home_stats['xGA per Game Diff_x'].values[0]

    # getting away team stats
    away_stats = df_merged.loc[df_merged['Team'] == away_team, ['xG per Game_y', 'xGA per Game_y', 'xG per Game Diff_y', 'xGA per Game Diff_y']]
    away_xG = away_stats['xG per Game_y'].values[0]
    away_xGA = away_stats['xGA per Game_y'].values[0]
    away_xG_diff = away_stats['xG per Game Diff_y'].values[0]
    away_xGA_diff = away_stats['xGA per Game Diff_y'].values[0]

    # calculating lambda for each team (explanation in One note)
    lambda_home = (home_xG + home_xG_diff) * (away_xGA - away_xGA_diff)
    lambda_away = (away_xG + away_xG_diff) * (home_xGA - home_xGA_diff)

    # calculating the probability of the home team scoring x goals at home and the away team conceding x goals away
    prob = 0
    for i in range(3):
        for j in range(3):
            prob += poisson.pmf(i, lambda_home) * poisson.pmf(j, lambda_away)
    '''        
    max_score=4
    for n in range(1):
        print(f"Simulation {n+1}:")
        for i in range(max_score+1):
            for j in range(max_score+1):
                score_prob = poisson.pmf(i, lambda_home) * poisson.pmf(j, lambda_away)
                print(f"{home_team} {i} - {j} {away_team} ({score_prob:.3f})")  
                '''

    # calculating the probability of the game having over 2 goals
    prob_over_2 = 1 - poisson.cdf(2, lambda_home) * poisson.cdf(2, lambda_away)
    prob_over_1 = 1 - poisson.cdf(1, lambda_home) * poisson.cdf(1, lambda_away)
    prob_over_3 = 1 - poisson.cdf(3, lambda_home) * poisson.cdf(3, lambda_away)
    
    # calculating probability of each team having +1.5 goals
    prob_over_2_home = 1 - poisson.cdf(2, lambda_home)
    prob_over_2_away = 1 - poisson.cdf(2, lambda_away)
    
    # calculating probability of both teams scoring
    #goal_home = poisson.pmf(0, lambda_home)
    #goal_away = poisson.pmf(0, lambda_away)
    #both_score = 1 - (goal_home * goal_away)

    #calculate total xG
    lambda_total = lambda_home + lambda_away
    

    return {"home_team": home_team, "away_team": away_team, "lambda_home": lambda_home, "lambda_away": lambda_away, 
            "prob_over_2_goals": prob_over_2, "prob_over_1_goal": prob_over_1, "prob_over_3_goals": prob_over_3, 
            "h_+1.5": prob_over_2_home, "a_+1.5": prob_over_2_away, "expected_goals": lambda_total}


In [14]:
def predict_game_winner(home_team, away_team, df_merged):
    from scipy.stats import poisson
    
    # getting home team stats
    home_stats = df_merged.loc[df_merged['Team'] == home_team, ['xG per Game_x', 'xGA per Game_x', 'xG per Game Diff_x', 'xGA per Game Diff_x']]
    home_xG = home_stats['xG per Game_x'].values[0]
    home_xGA = home_stats['xGA per Game_x'].values[0]
    home_xG_diff = home_stats['xG per Game Diff_x'].values[0]
    home_xGA_diff = home_stats['xGA per Game Diff_x'].values[0]

    # getting away team stats
    away_stats = df_merged.loc[df_merged['Team'] == away_team, ['xG per Game_y', 'xGA per Game_y', 'xG per Game Diff_y', 'xGA per Game Diff_y']]
    away_xG = away_stats['xG per Game_y'].values[0]
    away_xGA = away_stats['xGA per Game_y'].values[0]
    away_xG_diff = away_stats['xG per Game Diff_y'].values[0]
    away_xGA_diff = away_stats['xGA per Game Diff_y'].values[0]

    # calculating lambda for home team scoring at home and away team conceding away
    lambda_home = (home_xG + home_xG_diff) * (away_xGA - away_xGA_diff)
    lambda_away = (away_xG + away_xG_diff) * (home_xGA - home_xGA_diff)

    # calculate the probability of the home team scoring x goals at home and the away team conceding x goals away
    home_probs = poisson.pmf(range(8), lambda_home)
    away_probs = poisson.pmf(range(8), lambda_away)

    # calculate probabilities of different scorlines
    score_prob = np.outer(home_probs, away_probs)
    max_score = score_prob.shape[0] - 1

    # calculating probabilities of each outcome (win, draw, lose)
    home_win_prob = np.sum(np.tril(score_prob, -1))
    draw_prob = np.sum(np.diag(score_prob))
    away_win_prob = np.sum(np.triu(score_prob, 1))
    
    # calculate the total sum of probabilities
    total_prob = home_win_prob + draw_prob + away_win_prob

    # normalize probabilities
    home_win_prob /= total_prob
    draw_prob /= total_prob
    away_win_prob /= total_prob
    
    # return results as a dictionary
    return {
        'home_team': home_team,
        'away_team': away_team,
        'home_win_prob': home_win_prob,
        'draw_prob': draw_prob,
        'away_win_prob': away_win_prob
    }

### Premier League
<div class="alert alert-block alert-info">

In [42]:
import pandas as pd


df_matchdays = pd.read_csv('EPL-fixtures.csv')
df_matchdays.head(10)
# We will adjust the df manually to only contain the next matchday games
# normally there are 10 games index[9]

df_matchdays = df_matchdays.loc[0:9]
df_matchdays

Unnamed: 0,home_team,away_team
0,Chelsea,Fulham
1,Newcastle United,Manchester City
2,Everton,Aston Villa
3,Manchester United,Tottenham
4,Burnley,Luton
5,Arsenal,Crystal Palace
6,Brentford,Nottingham Forest
7,Sheffield United,West Ham
8,Bournemouth,Liverpool
9,Brighton,Wolverhampton Wanderers


#### O/U Predictions

In [16]:
# creating an empty list to store the predictions for each game
predictions = []

for i, row in df_matchdays.iterrows():
    home_team = row['home_team']
    away_team = row['away_team']
    #print(f"Match {i+1}: {home_team} vs {away_team}")
    #print(" ")
    result = predict_game_result(home_team, away_team, epl_df)
    predictions.append({
        'home_team': home_team,
        'away_team': away_team,
        '+1.5(%)' : result['prob_over_1_goal'],
        '+2.5(%)': result['prob_over_2_goals'],
        '+3.5(%)': result['prob_over_3_goals'],
        'H+1.5(%)': result['h_+1.5'],
        'A+1.5(%)': result['a_+1.5'],
        #'AA(%)': result['AA'],
        'xG': result['expected_goals']
        
    })
    #print(result)
    #print(" ")

df_predictions = pd.DataFrame(predictions)    
df_predictions

Unnamed: 0,home_team,away_team,+1.5(%),+2.5(%),+3.5(%),H+1.5(%),A+1.5(%),xG
0,Chelsea,Fulham,0.936921,0.782068,0.566445,0.748411,0.133777,5.17
1,Newcastle United,Manchester City,0.864849,0.608939,0.342581,0.520957,0.183661,4.23
2,Everton,Aston Villa,0.809101,0.502601,0.235304,0.347668,0.237507,3.77
3,Manchester United,Tottenham,0.94852,0.792247,0.551567,0.344966,0.682836,5.6
4,Burnley,Luton,0.911037,0.690494,0.412062,0.383239,0.498175,4.888889
5,Arsenal,Crystal Palace,0.889136,0.676367,0.434601,0.640574,0.099584,4.4
6,Brentford,Nottingham Forest,0.932036,0.751779,0.502341,0.66026,0.269379,5.2
7,Sheffield United,West Ham,0.957764,0.822374,0.601149,0.323324,0.737503,5.84


In [51]:
print(f"{predict_game_result('Brentford', 'Wolverhampton Wanderers', epl_df)} \n\n\n , {predict_game_result('Tottenham', 'Burnley', epl_df)}\n\n\n, {predict_game_result('Arsenal', 'Liverpool', epl_df)}")
      
      

{'home_team': 'Brentford', 'away_team': 'Wolverhampton Wanderers', 'lambda_home': 2.8899999999999997, 'lambda_away': 2.52, 'prob_over_2_goals': 0.7585143560437291, 'prob_over_1_goal': 0.9387707249140496, 'prob_over_3_goals': 0.49389322181991346, 'h_+1.5': 0.5517194902651474, 'a_+1.5': 0.46130684089053076, 'expected_goals': 5.41} 


 , {'home_team': 'Tottenham', 'away_team': 'Burnley', 'lambda_home': 3.4, 'lambda_away': 1.3, 'prob_over_2_goals': 0.7088046987843963, 'prob_over_1_goal': 0.9079557957308383, 'prob_over_3_goals': 0.4657055947783355, 'h_+1.5': 0.6602601118038803, 'a_+1.5': 0.1428875109080303, 'expected_goals': 4.7}


, {'home_team': 'Arsenal', 'away_team': 'Liverpool', 'lambda_home': 2.2, 'lambda_away': 1.7, 'prob_over_2_goals': 0.5284666970611315, 'prob_over_1_goal': 0.8251098851082501, 'prob_over_3_goals': 0.25700256660393017, 'h_+1.5': 0.37728625000368377, 'a_+1.5': 0.24277679280141484, 'expected_goals': 3.9000000000000004}


#### H2H Predictions

In [17]:
# creating an empty list to store the predictions for each game
predictions_h2h = []

for i, row in df_matchdays.iterrows():
    home_team = row['home_team']
    away_team = row['away_team']

    
    #print(f"Match {i+1}: {home_team} vs {away_team}")
    #print(" ")
    result_h2h = predict_game_winner(home_team, away_team, epl_df)
    predictions_h2h.append({
        'home_team': home_team,
        'away_team': away_team,
        'Home (%)': result_h2h['home_win_prob'],
        'Draw (%)': result_h2h['draw_prob'],
        'Away (%)': result_h2h['away_win_prob'],
    })
    #print(result_h2h)
    #print(" ")

df_predictions_h2h = pd.DataFrame(predictions_h2h) 
df_predictions_h2h

Unnamed: 0,home_team,away_team,Home (%),Draw (%),Away (%)
0,Chelsea,Fulham,0.824824,0.098739,0.076437
1,Newcastle United,Manchester City,0.643616,0.169868,0.186515
2,Everton,Aston Villa,0.475338,0.210166,0.314496
3,Manchester United,Tottenham,0.206259,0.150092,0.643649
4,Burnley,Luton,0.331805,0.183948,0.484247
5,Arsenal,Crystal Palace,0.789922,0.120279,0.0898
6,Brentford,Nottingham Forest,0.676683,0.146881,0.176436
7,Sheffield United,West Ham,0.167227,0.13487,0.697903


In [52]:
print(f"{predict_game_winner('Brentford', 'Wolverhampton Wanderers', epl_df)} \n\n\n , {predict_game_winner('Tottenham', 'Burnley', epl_df)}\n\n\n, {predict_game_winner('Arsenal', 'Liverpool', epl_df)}")



{'home_team': 'Brentford', 'away_team': 'Wolverhampton Wanderers', 'home_win_prob': 0.47170292302076816, 'draw_prob': 0.1764792485728599, 'away_win_prob': 0.3518178284063719} 


 , {'home_team': 'Tottenham', 'away_team': 'Burnley', 'home_win_prob': 0.7659779504620666, 'draw_prob': 0.12567756199906938, 'away_win_prob': 0.108344487538864}


, {'home_team': 'Arsenal', 'away_team': 'Liverpool', 'home_win_prob': 0.493892377572036, 'draw_prob': 0.20464845135669057, 'away_win_prob': 0.30145917107127335}


#### Exporting Output

In [18]:
from openpyxl import load_workbook
import xlwings as xw

'''
GOALS
'''
# load the excel file and select the worksheet
wb = xw.Book('PREDICTIONS_OUTPUT_GOALS.xlsx')
ws = wb.sheets('EPL')

# export the previous dataframe starting from cell A1
ws.range('A1').options(index=False).value = df_predictions

wb.save()
wb.close()

'''
H2H
'''

# load the excel file and select the worksheet
wb_h2h = xw.Book('PREDICTIONS_OUTPUT_XH2H.xlsx')
ws_h2h = wb_h2h.sheets('EPL')

# export the previous dataframe starting from cell A1
ws_h2h.range('A1').options(index=False).value = df_predictions_h2h

wb_h2h.save()
wb_h2h.close()

### La Liga
<div class="alert alert-block alert-info">

In [19]:
import pandas as pd

df_matchdays = pd.read_csv('LaLiga-fixtures.csv')
df_matchdays.head(10)
# We will adjust the df manually to only contain the next matchday games
# normally there are 10 games index[9]

df_matchdays = df_matchdays.loc[0:10]
df_matchdays

Unnamed: 0,home_team,away_team
0,Granada,Cadiz
1,Celta Vigo,Real Betis
2,Real Madrid,Mallorca
3,Girona,Atletico Madrid
4,Osasuna,Almeria
5,Sevilla,Athletic Club
6,Las Palmas,Barcelona
7,Sevilla,Alaves
8,Las Palmas,Villarreal
9,Mallorca,Celta Vigo


#### O/U Predictions

In [20]:
# creating an empty list to store the predictions for each game
predictions = []

for i, row in df_matchdays.iterrows():
    home_team = row['home_team']
    away_team = row['away_team']
    #print(f"Match {i+1}: {home_team} vs {away_team}")
    #print(" ")
    result = predict_game_result(home_team, away_team, la_liga_df)
    predictions.append({
        'home_team': home_team,
        'away_team': away_team,
        '+1.5(%)' : result['prob_over_1_goal'],
        '+2.5(%)': result['prob_over_2_goals'],
        '+3.5(%)': result['prob_over_3_goals'],
        'H+1.5(%)': result['h_+1.5'],
        'A+1.5(%)': result['a_+1.5'],
        'xG': result['expected_goals']
        
    })
    #print(result)
    #print(" ")

df_predictions = pd.DataFrame(predictions)    
df_predictions

Unnamed: 0,home_team,away_team,+1.5(%),+2.5(%),+3.5(%),H+1.5(%),A+1.5(%),xG
0,Granada,Cadiz,0.794232,0.487037,0.229503,0.383239,0.168295,3.62963
1,Celta Vigo,Real Betis,0.517621,0.19432,0.053148,0.087239,0.117316,2.222222
2,Real Madrid,Mallorca,0.938442,0.814573,0.636815,0.811866,0.014388,4.875
3,Girona,Atletico Madrid,0.850926,0.571839,0.294328,0.412781,0.270867,4.138889
4,Osasuna,Almeria,0.806288,0.509516,0.2516,0.422531,0.150631,3.703704
5,Sevilla,Athletic Club,0.701965,0.360895,0.136317,0.177342,0.223122,3.069444
6,Las Palmas,Barcelona,0.569529,0.237234,0.073307,0.168015,0.083198,2.421875
7,Sevilla,Alaves,0.665406,0.354086,0.152375,0.329338,0.036901,2.744444
8,Las Palmas,Villarreal,0.684203,0.378632,0.170775,0.357115,0.033469,2.819444
9,Mallorca,Celta Vigo,0.616303,0.280333,0.095834,0.21155,0.087239,2.617284


#### H2H Predictions

In [21]:
# creating an empty list to store the predictions for each game
predictions_h2h = []

for i, row in df_matchdays.iterrows():
    home_team = row['home_team']
    away_team = row['away_team']

    
    #print(f"Match {i+1}: {home_team} vs {away_team}")
    #print(" ")
    result_h2h = predict_game_winner(home_team, away_team, la_liga_df)
    predictions_h2h.append({
        'home_team': home_team,
        'away_team': away_team,
        'Home (%)': result_h2h['home_win_prob'],
        'Draw (%)': result_h2h['draw_prob'],
        'Away (%)': result_h2h['away_win_prob'],
    })
    #print(result_h2h)
    #print(" ")

df_predictions_h2h = pd.DataFrame(predictions_h2h) 
df_predictions_h2h

Unnamed: 0,home_team,away_team,Home (%),Draw (%),Away (%)
0,Granada,Cadiz,0.558856,0.202354,0.238791
1,Celta Vigo,Real Betis,0.318644,0.288541,0.392814
2,Real Madrid,Mallorca,0.947994,0.039013,0.012993
3,Girona,Atletico Madrid,0.499546,0.197986,0.302467
4,Osasuna,Almeria,0.603493,0.191057,0.20545
5,Sevilla,Athletic Club,0.341349,0.238921,0.41973
6,Las Palmas,Barcelona,0.459368,0.268672,0.27196
7,Sevilla,Alaves,0.67797,0.198786,0.123244
8,Las Palmas,Villarreal,0.704544,0.18626,0.109196
9,Mallorca,Celta Vigo,0.499435,0.25161,0.248954


#### Exporting Output

In [22]:
'''
GOALS
'''
# load the excel file and select the worksheet
wb = xw.Book('PREDICTIONS_OUTPUT_GOALS.xlsx')
ws = wb.sheets('La Liga')

# export the previous dataframe starting from cell A1
ws.range('A1').options(index=False).value = df_predictions

wb.save()
wb.close()

'''
H2H
'''

# load the excel file and select the worksheet
wb_h2h = xw.Book('PREDICTIONS_OUTPUT_XH2H.xlsx')
ws_h2h = wb_h2h.sheets('La Liga')

# export the previous dataframe starting from cell A1
ws_h2h.range('A1').options(index=False).value = df_predictions_h2h

wb_h2h.save()
wb_h2h.close()

### Bundesliga
<div class="alert alert-block alert-info">

In [23]:
import pandas as pd

df_matchdays = pd.read_csv('Bundesliga-fixtures.csv')
df_matchdays.head(10)
# We will adjust the df manually to only contain the next matchday games
# normally there are 10 games index[9]

df_matchdays = df_matchdays.loc[0:8]
df_matchdays

Unnamed: 0,home_team,away_team
0,Bayern Munich,Hoffenheim
1,RasenBallsport Leipzig,Eintracht Frankfurt
2,Freiburg,Union Berlin
3,Mainz 05,Wolfsburg
4,FC Cologne,FC Heidenheim
5,Augsburg,Bayer Leverkusen
6,Darmstadt,Borussia Dortmund
7,Bochum,Werder Bremen
8,Borussia M.Gladbach,VfB Stuttgart


#### O/U Predictions

In [24]:
# creating an empty list to store the predictions for each game
predictions = []

for i, row in df_matchdays.iterrows():
    home_team = row['home_team']
    away_team = row['away_team']
    #print(f"Match {i+1}: {home_team} vs {away_team}")
    #print(" ")
    result = predict_game_result(home_team, away_team, bundesliga_df)
    predictions.append({
        'home_team': home_team,
        'away_team': away_team,
        '+1.5(%)' : result['prob_over_1_goal'],
        '+2.5(%)': result['prob_over_2_goals'],
        '+3.5(%)': result['prob_over_3_goals'],
        'H+1.5(%)': result['h_+1.5'],
        'A+1.5(%)': result['a_+1.5'],
        'xG': result['expected_goals']
        
    })
    #print(result)
    #print(" ")

df_predictions = pd.DataFrame(predictions)    
df_predictions

Unnamed: 0,home_team,away_team,+1.5(%),+2.5(%),+3.5(%),H+1.5(%),A+1.5(%),xG
0,Bayern Munich,Hoffenheim,0.995846,0.975512,0.922034,0.969556,0.195649,8.482143
1,RasenBallsport Leipzig,Eintracht Frankfurt,0.955748,0.841374,0.66488,0.826422,0.086138,5.53125
2,Freiburg,Union Berlin,0.961713,0.85164,0.673619,0.827566,0.13961,5.795918
3,Mainz 05,Wolfsburg,0.744529,0.411173,0.167531,0.218489,0.246553,3.321429
4,FC Cologne,FC Heidenheim,0.752247,0.424719,0.1798,0.303998,0.173448,3.357143
5,Augsburg,Bayer Leverkusen,0.963393,0.846514,0.650609,0.246553,0.796289,5.964286
6,Darmstadt,Borussia Dortmund,0.961926,0.83658,0.625694,0.313658,0.761897,5.964286
7,Bochum,Werder Bremen,0.944469,0.795624,0.578232,0.74969,0.183509,5.387755
8,Borussia M.Gladbach,VfB Stuttgart,0.925766,0.725291,0.452506,0.527134,0.419055,5.142857


#### H2H Predictions

In [25]:
# creating an empty list to store the predictions for each game
predictions_h2h = []

for i, row in df_matchdays.iterrows():
    home_team = row['home_team']
    away_team = row['away_team']

    
    #print(f"Match {i+1}: {home_team} vs {away_team}")
    #print(" ")
    result_h2h = predict_game_winner(home_team, away_team, bundesliga_df)
    predictions_h2h.append({
        'home_team': home_team,
        'away_team': away_team,
        'Home (%)': result_h2h['home_win_prob'],
        'Draw (%)': result_h2h['draw_prob'],
        'Away (%)': result_h2h['away_win_prob'],
    })
    #print(result_h2h)
    #print(" ")

df_predictions_h2h = pd.DataFrame(predictions_h2h) 
df_predictions_h2h

Unnamed: 0,home_team,away_team,Home (%),Draw (%),Away (%)
0,Bayern Munich,Hoffenheim,0.939889,0.036504,0.023606
1,RasenBallsport Leipzig,Eintracht Frankfurt,0.897528,0.064078,0.038394
2,Freiburg,Union Berlin,0.866937,0.07703,0.056033
3,Mainz 05,Wolfsburg,0.36294,0.229239,0.407821
4,FC Cologne,FC Heidenheim,0.492671,0.221267,0.286062
5,Augsburg,Bayer Leverkusen,0.106215,0.10733,0.786455
6,Darmstadt,Borussia Dortmund,0.150636,0.127334,0.72203
7,Bochum,Werder Bremen,0.793068,0.109207,0.097726
8,Borussia M.Gladbach,VfB Stuttgart,0.481515,0.179875,0.33861


#### Exporting Output

In [26]:
from openpyxl import load_workbook
import xlwings as xw

'''
GOALS
'''
# load the excel file and select the worksheet
wb = xw.Book('PREDICTIONS_OUTPUT_GOALS.xlsx')
ws = wb.sheets('Bundesliga')

# export the previous dataframe starting from cell A1
ws.range('A1').options(index=False).value = df_predictions

wb.save()
wb.close()

'''
H2H
'''

# load the excel file and select the worksheet
wb_h2h = xw.Book('PREDICTIONS_OUTPUT_XH2H.xlsx')
ws_h2h = wb_h2h.sheets('Bundesliga')

# export the previous dataframe starting from cell A1
ws_h2h.range('A1').options(index=False).value = df_predictions_h2h

wb_h2h.save()
wb_h2h.close()

### Serie A
<div class="alert alert-block alert-info">

In [27]:
import pandas as pd

df_matchdays = pd.read_csv('SerieA-fixtures.csv')
df_matchdays.head(10)
# We will adjust the df manually to only contain the next matchday games
# normally there are 10 games index[9]

df_matchdays = df_matchdays.loc[0:9]
df_matchdays

Unnamed: 0,home_team,away_team
0,Bologna,Genoa
1,Inter,Verona
2,Frosinone,Monza
3,Lecce,Cagliari
4,Sassuolo,Fiorentina
5,Empoli,AC Milan
6,Torino,Napoli
7,Udinese,Lazio
8,Salernitana,Juventus
9,Roma,Atalanta


#### O/U Predictions

In [28]:
# creating an empty list to store the predictions for each game
predictions = []

for i, row in df_matchdays.iterrows():
    home_team = row['home_team']
    away_team = row['away_team']
    #print(f"Match {i+1}: {home_team} vs {away_team}")
    #print(" ")
    result = predict_game_result(home_team, away_team, seriea_df)
    predictions.append({
        'home_team': home_team,
        'away_team': away_team,
        '+1.5(%)' : result['prob_over_1_goal'],
        '+2.5(%)': result['prob_over_2_goals'],
        '+3.5(%)': result['prob_over_3_goals'],
        'H+1.5(%)': result['h_+1.5'],
        'A+1.5(%)': result['a_+1.5'],
        'xG': result['expected_goals']
        
    })
    #print(result)
    #print(" ")

df_predictions = pd.DataFrame(predictions)    
df_predictions

Unnamed: 0,home_team,away_team,+1.5(%),+2.5(%),+3.5(%),H+1.5(%),A+1.5(%),xG
0,Bologna,Genoa,0.593875,0.306993,0.130157,0.303283,0.005324,2.271605
1,Inter,Verona,0.861849,0.663862,0.443587,0.661687,0.006431,3.777778
2,Frosinone,Monza,0.728841,0.420886,0.195562,0.383239,0.06104,3.111111
3,Lecce,Cagliari,0.69242,0.389927,0.179604,0.369995,0.031638,2.851852
4,Sassuolo,Fiorentina,0.803044,0.498277,0.236654,0.186519,0.383239,3.703704
5,Empoli,AC Milan,0.845603,0.596957,0.350999,0.078045,0.562839,3.925926
6,Torino,Napoli,0.481277,0.171195,0.044995,0.06104,0.117316,2.074074
7,Udinese,Lazio,0.772546,0.447791,0.192396,0.263436,0.250291,3.506173
8,Salernitana,Juventus,0.850406,0.618895,0.382782,0.044281,0.601237,3.888889
9,Roma,Atalanta,0.891506,0.68465,0.44721,0.65451,0.087239,4.407407


#### H2H Predictions

In [29]:
# creating an empty list to store the predictions for each game
predictions_h2h = []

for i, row in df_matchdays.iterrows():
    home_team = row['home_team']
    away_team = row['away_team']

    
    #print(f"Match {i+1}: {home_team} vs {away_team}")
    #print(" ")
    result_h2h = predict_game_winner(home_team, away_team, seriea_df)
    predictions_h2h.append({
        'home_team': home_team,
        'away_team': away_team,
        'Home (%)': result_h2h['home_win_prob'],
        'Draw (%)': result_h2h['draw_prob'],
        'Away (%)': result_h2h['away_win_prob'],
    })
    #print(result_h2h)
    #print(" ")

df_predictions_h2h = pd.DataFrame(predictions_h2h) 
df_predictions_h2h

Unnamed: 0,home_team,away_team,Home (%),Draw (%),Away (%)
0,Bologna,Genoa,0.758186,0.184287,0.057528
1,Inter,Verona,0.918163,0.063717,0.01812
2,Frosinone,Monza,0.675712,0.187221,0.137067
3,Lecce,Cagliari,0.717033,0.180328,0.102639
4,Sassuolo,Fiorentina,0.253775,0.203092,0.543133
5,Empoli,AC Milan,0.097222,0.13606,0.766717
6,Torino,Napoli,0.275239,0.297312,0.427449
7,Udinese,Lazio,0.398721,0.222731,0.378549
8,Salernitana,Juventus,0.062006,0.110965,0.827029
9,Roma,Atalanta,0.808706,0.112586,0.078708


#### Exporting Output

In [30]:
from openpyxl import load_workbook
import xlwings as xw

'''
GOALS
'''
# load the excel file and select the worksheet
wb = xw.Book('PREDICTIONS_OUTPUT_GOALS.xlsx')
ws = wb.sheets('Serie A')

# export the previous dataframe starting from cell A1
ws.range('A1').options(index=False).value = df_predictions

wb.save()
wb.close()

'''
H2H
'''

# load the excel file and select the worksheet
wb_h2h = xw.Book('PREDICTIONS_OUTPUT_XH2H.xlsx')
ws_h2h = wb_h2h.sheets('Serie A')

# export the previous dataframe starting from cell A1
ws_h2h.range('A1').options(index=False).value = df_predictions_h2h

wb_h2h.save()
wb_h2h.close()

### Ligue 1
<div class="alert alert-block alert-info">

In [31]:
import pandas as pd

df_matchdays = pd.read_csv('Ligue1-fixtures.csv')
df_matchdays.head(10)
# We will adjust the df manually to only contain the next matchday games
# normally there are 10 games index[9]

df_matchdays = df_matchdays.loc[0:8]
df_matchdays

Unnamed: 0,home_team,away_team
0,Marseille,Strasbourg
1,Monaco,Reims
2,Rennes,Nice
3,Lille,Lorient
4,Metz,Toulouse
5,Nantes,Clermont Foot
6,Brest,Montpellier
7,Le Havre,Lyon
8,Lens,Paris Saint Germain


#### O/U Predictions

In [32]:
# creating an empty list to store the predictions for each game
predictions = []

for i, row in df_matchdays.iterrows():
    home_team = row['home_team']
    away_team = row['away_team']
    #print(f"Match {i+1}: {home_team} vs {away_team}")
    #print(" ")
    result = predict_game_result(home_team, away_team, ligue1_df)
    predictions.append({
        'home_team': home_team,
        'away_team': away_team,
        '+1.5(%)' : result['prob_over_1_goal'],
        '+2.5(%)': result['prob_over_2_goals'],
        '+3.5(%)': result['prob_over_3_goals'],
        'H+1.5(%)': result['h_+1.5'],
        'A+1.5(%)': result['a_+1.5'],
        'xG': result['expected_goals']
        
    })
    #print(result)
    #print(" ")

df_predictions = pd.DataFrame(predictions)    
df_predictions

Unnamed: 0,home_team,away_team,+1.5(%),+2.5(%),+3.5(%),H+1.5(%),A+1.5(%),xG
0,Marseille,Strasbourg,0.779741,0.541348,0.317841,0.540909,0.000955,3.03125
1,Monaco,Reims,0.83113,0.581021,0.341042,0.557883,0.052334,3.75
2,Rennes,Nice,0.742229,0.40871,0.166278,0.25603,0.205222,3.305556
3,Lille,Lorient,0.827763,0.601544,0.374437,0.597482,0.01009,3.53125
4,Metz,Toulouse,0.473483,0.178809,0.053123,0.157184,0.025657,1.986111
5,Nantes,Clermont Foot,0.78402,0.482599,0.234761,0.416444,0.113366,3.513889
6,Brest,Montpellier,0.809495,0.527583,0.277975,0.476065,0.098328,3.671875
7,Le Havre,Lyon,0.528341,0.223457,0.075262,0.203008,0.025657,2.171875
8,Lens,Paris Saint Germain,0.672024,0.329458,0.11885,0.145776,0.215028,2.90625


#### H2H Predictions

In [33]:
# creating an empty list to store the predictions for each game
predictions_h2h = []

for i, row in df_matchdays.iterrows():
    home_team = row['home_team']
    away_team = row['away_team']

    
    #print(f"Match {i+1}: {home_team} vs {away_team}")
    #print(" ")
    result_h2h = predict_game_winner(home_team, away_team, ligue1_df)
    predictions_h2h.append({
        'home_team': home_team,
        'away_team': away_team,
        'Home (%)': result_h2h['home_win_prob'],
        'Draw (%)': result_h2h['draw_prob'],
        'Away (%)': result_h2h['away_win_prob'],
    })
    #print(result_h2h)
    #print(" ")

df_predictions_h2h = pd.DataFrame(predictions_h2h) 
df_predictions_h2h

Unnamed: 0,home_team,away_team,Home (%),Draw (%),Away (%)
0,Marseille,Strasbourg,0.908809,0.07832,0.012871
1,Monaco,Reims,0.794189,0.127669,0.078143
2,Rennes,Nice,0.426229,0.229097,0.344674
3,Lille,Lorient,0.885312,0.085689,0.028999
4,Metz,Toulouse,0.547579,0.281295,0.171125
5,Nantes,Clermont Foot,0.635515,0.18796,0.176525
6,Brest,Montpellier,0.691746,0.167506,0.140748
7,Le Havre,Lyon,0.597203,0.253867,0.14893
8,Lens,Paris Saint Germain,0.315118,0.244836,0.440046


#### Exporting Output

In [34]:
from openpyxl import load_workbook
import xlwings as xw

'''
GOALS
'''
# load the excel file and select the worksheet
wb = xw.Book('PREDICTIONS_OUTPUT_GOALS.xlsx')
ws = wb.sheets('Ligue 1')

# export the previous dataframe starting from cell A1
ws.range('A1').options(index=False).value = df_predictions

wb.save()
wb.close()

'''
H2H
'''

# load the excel file and select the worksheet
wb_h2h = xw.Book('PREDICTIONS_OUTPUT_XH2H.xlsx')
ws_h2h = wb_h2h.sheets('Ligue 1')

# export the previous dataframe starting from cell A1
ws_h2h.range('A1').options(index=False).value = df_predictions_h2h

wb_h2h.save()
wb_h2h.close()

## Unification of Data 
#### Remember to run Eredivisie first
<div class="alert alert-block alert-info">

In [35]:
la_liga = pd.read_excel('PREDICTIONS_OUTPUT_GOALS.xlsx', sheet_name='La Liga')
la_liga['League'] = 'La Liga'

ligue_1 = pd.read_excel('PREDICTIONS_OUTPUT_GOALS.xlsx', sheet_name='Ligue 1')
ligue_1['League'] = 'Ligue 1'

epl = pd.read_excel('PREDICTIONS_OUTPUT_GOALS.xlsx', sheet_name='EPL')
epl['League'] = 'EPL'

seriea = pd.read_excel('PREDICTIONS_OUTPUT_GOALS.xlsx', sheet_name='Serie A')
seriea['League'] = 'Serie A'

bundesliga = pd.read_excel('PREDICTIONS_OUTPUT_GOALS.xlsx', sheet_name='Bundesliga')
bundesliga['League'] = 'Bundesliga'

eredivisie = pd.read_excel('PREDICTIONS_OUTPUT_GOALS.xlsx', sheet_name='Eredivisie')
eredivisie['League'] = 'Eredivisie'

frames = [la_liga, ligue_1, epl, seriea, bundesliga, eredivisie]
final_df = pd.concat(frames, ignore_index=True)

final_df = final_df.dropna()

#final_df = final_df.drop([30,29])

final_df.to_csv('Z_Tableau_dataset.csv')

In [36]:
final_df

Unnamed: 0,home_team,away_team,+1.5(%),+2.5(%),+3.5(%),H+1.5(%),A+1.5(%),xG,League
0,Granada,Cadiz,0.794232,0.487037,0.229503,0.383239,0.168295,3.62963,La Liga
1,Celta Vigo,Real Betis,0.517621,0.19432,0.053148,0.087239,0.117316,2.222222,La Liga
2,Real Madrid,Mallorca,0.938442,0.814573,0.636815,0.811866,0.014388,4.875,La Liga
3,Girona,Atletico Madrid,0.850926,0.571839,0.294328,0.412781,0.270867,4.138889,La Liga
4,Osasuna,Almeria,0.806288,0.509516,0.2516,0.422531,0.150631,3.703704,La Liga
5,Sevilla,Athletic Club,0.701965,0.360895,0.136317,0.177342,0.223122,3.069444,La Liga
6,Las Palmas,Barcelona,0.569529,0.237234,0.073307,0.168015,0.083198,2.421875,La Liga
7,Sevilla,Alaves,0.665406,0.354086,0.152375,0.329338,0.036901,2.744444,La Liga
8,Las Palmas,Villarreal,0.684203,0.378632,0.170775,0.357115,0.033469,2.819444,La Liga
9,Mallorca,Celta Vigo,0.616303,0.280333,0.095834,0.21155,0.087239,2.617284,La Liga
