# Poisson Distribution Predictions

This is a sampled version depicting how the computations were conducted. To preview the actual dataset, please scroll to the bottom of the page.

Avoid running this script if your database instance is not setup.

In [1]:
import pandas as pd
import datetime
import math
from itertools import product

In [2]:
# Flask Configuration

from flask import Flask
from sqlalchemy import create_engine
from flask_mysqldb import MySQL

app = Flask(__name__)

app.config['MYSQL_HOST'] = 'localhost'
app.config['MYSQL_USER'] = 'root'
app.config['MYSQL_PASSWORD'] = ''
app.config['MYSQL_DB'] = 'betstats'

mysql = MySQL(app)

mysql_alchemy = create_engine(f'mysql+pymysql://root:@localhost/betstats')

## Getting The Info

In [3]:
with app.app_context():
    cur = mysql.connection.cursor()
    stats_table = cur.execute("""SELECT
                                      f.fixture_id,
                                      l.league_round AS max_league_round,
                                      l.max_fixture_date AS max_lr_fixture_date,
                                      f.teams_home_id,
                                      f.teams_away_id,
                                      ph.home_attack_ft,
                                      pa.away_defence_ft,
                                      pa.away_attack_ft,
                                      ph.home_defence_ft,
                                      l.avg_goals_for_average_home_ft,
                                      l.avg_goals_for_average_away_ft,
                                      ph.home_attack_1h,
                                      pa.away_defence_1h,
                                      pa.away_attack_1h,
                                      ph.home_defence_1h,
                                      l.avg_goals_for_average_home_1h,
                                      l.avg_goals_for_average_away_1h,
                                      ph.home_attack_2h,
                                      pa.away_defence_2h,
                                      pa.away_attack_2h,
                                      ph.home_defence_2h,
                                      l.avg_goals_for_average_home_2h,
                                      l.avg_goals_for_average_away_2h
                                    FROM
                                      `football_fixtures` AS f
                                    LEFT JOIN
                                      `power_table_statistics_filtered` AS ph
                                    ON
                                      f.league_season = ph.league_season
                                      AND f.league_id = ph.league_id
                                      AND f.teams_home_id = ph.team_id
                                    LEFT JOIN
                                      `power_table_statistics_filtered` AS pa
                                    ON
                                      f.league_season = pa.league_season
                                      AND f.league_id = pa.league_id
                                      AND f.teams_away_id = pa.team_id
                                    LEFT JOIN
                                      `league_goals_statistics_filtered` AS l
                                    ON
                                      f.league_season = l.league_season
                                      AND f.league_id = l.league_id
                                    WHERE
                                      f.league_id = 39 AND
                                      f.fixture_id IN (
                                      SELECT
                                        fixture_id
                                      FROM
                                        `football_teams_next_fixture_id`
                                      GROUP BY
                                        fixture_id)""")

    if stats_table > 0:
        stats_table_details = cur.fetchall()
        
        # Retrieving column info
        num_fields = len(cur.description)
        column_names = [i[0] for i in cur.description]
        
        # Changing the SQL result into a PD DF
        stats_table_df = pd.DataFrame(stats_table_details)
        stats_table_df.columns = column_names

display(stats_table_df)

Unnamed: 0,fixture_id,max_league_round,max_lr_fixture_date,teams_home_id,teams_away_id,home_attack_ft,away_defence_ft,away_attack_ft,home_defence_ft,avg_goals_for_average_home_ft,...,away_attack_1h,home_defence_1h,avg_goals_for_average_home_1h,avg_goals_for_average_away_1h,home_attack_2h,away_defence_2h,away_attack_2h,home_defence_2h,avg_goals_for_average_home_2h,avg_goals_for_average_away_2h
0,710819,36.0,2022-05-08 15:30:00,49,46,1.337,1.335,1.009,0.915,1.496,...,1.021,0.51,0.69,0.576,1.46,1.312,0.999,1.241,0.806,0.707
1,710916,36.0,2022-05-08 15:30:00,66,52,1.086,0.982,0.917,1.214,1.496,...,0.818,1.302,0.69,0.576,1.164,0.729,0.999,1.143,0.806,0.707
2,710917,36.0,2022-05-08 15:30:00,45,55,0.865,1.224,0.996,0.915,1.496,...,0.868,0.918,0.69,0.576,1.167,1.102,1.1,0.91,0.806,0.707
3,710918,36.0,2022-05-08 15:30:00,63,51,0.708,0.742,0.952,1.554,1.496,...,0.578,1.431,0.69,0.576,0.73,0.757,1.257,1.655,0.806,0.707
4,710920,36.0,2022-05-08 15:30:00,34,42,0.891,1.021,1.192,1.166,1.496,...,1.736,1.158,0.69,0.576,0.69,0.948,0.748,1.172,0.806,0.707
5,710921,36.0,2022-05-08 15:30:00,41,40,0.817,0.589,1.88,0.949,1.496,...,2.144,1.061,0.69,0.576,0.483,0.511,1.665,0.859,0.806,0.707
6,710922,36.0,2022-05-08 15:30:00,47,44,1.337,1.021,0.687,0.869,1.496,...,0.715,0.818,0.69,0.576,1.387,0.948,0.666,0.91,0.806,0.707
7,710923,36.0,2022-05-08 15:30:00,38,46,0.629,1.335,1.009,1.874,1.496,...,1.021,1.634,0.69,0.576,0.511,1.312,0.999,2.069,0.806,0.707
8,710924,36.0,2022-05-08 15:30:00,48,50,1.151,0.314,1.559,1.036,1.496,...,1.634,1.061,0.69,0.576,1.378,0.437,1.498,1.015,0.806,0.707
9,710925,36.0,2022-05-08 15:30:00,39,71,0.708,1.453,0.458,0.869,1.496,...,0.307,0.918,0.69,0.576,0.438,1.53,0.583,0.827,0.806,0.707


## Generating a New DF

This will be saved as a table later on in phpMyAdmin.

In [4]:
betstats_generated_dt = datetime.datetime.now()

print(betstats_generated_dt)

2022-05-13 11:47:07.531714


In [5]:
computed_df = stats_table_df
computed_df['betstats_generated_dt'] = betstats_generated_dt

computed_df['exp_home_goals_ft'] = round(pd.to_numeric(computed_df['home_attack_ft'] * computed_df['away_defence_ft'] * computed_df['avg_goals_for_average_home_ft']), 3)
computed_df['exp_away_goals_ft'] = round(pd.to_numeric(computed_df['away_attack_ft'] * computed_df['home_defence_ft'] * computed_df['avg_goals_for_average_away_ft']), 3)
computed_df['exp_home_goals_1h'] = round(pd.to_numeric(computed_df['home_attack_1h'] * computed_df['away_defence_1h'] * computed_df['avg_goals_for_average_home_1h']), 3)
computed_df['exp_away_goals_1h'] = round(pd.to_numeric(computed_df['away_attack_1h'] * computed_df['home_defence_1h'] * computed_df['avg_goals_for_average_away_1h']), 3)
computed_df['exp_home_goals_2h'] = round(pd.to_numeric(computed_df['home_attack_2h'] * computed_df['away_defence_2h'] * computed_df['avg_goals_for_average_home_2h']), 3)
computed_df['exp_away_goals_2h'] = round(pd.to_numeric(computed_df['away_attack_2h'] * computed_df['home_defence_2h'] * computed_df['avg_goals_for_average_away_2h']), 3)

columns_to_keep = ['betstats_generated_dt', 'fixture_id',
                   'max_league_round', 'max_lr_fixture_date',
                   'exp_home_goals_ft', 'exp_away_goals_ft',
                   'exp_home_goals_1h', 'exp_away_goals_1h',
                   'exp_home_goals_2h', 'exp_away_goals_2h']

computed_df = computed_df[columns_to_keep]

display(computed_df)

Unnamed: 0,betstats_generated_dt,fixture_id,max_league_round,max_lr_fixture_date,exp_home_goals_ft,exp_away_goals_ft,exp_home_goals_1h,exp_away_goals_1h,exp_home_goals_2h,exp_away_goals_2h
0,2022-05-13 11:47:07.531714,710819,36.0,2022-05-08 15:30:00,2.67,1.185,1.122,0.3,1.544,0.877
1,2022-05-13 11:47:07.531714,710916,36.0,2022-05-08 15:30:00,1.595,1.428,0.878,0.613,0.684,0.807
2,2022-05-13 11:47:07.531714,710917,36.0,2022-05-08 15:30:00,1.584,1.169,0.483,0.459,1.037,0.708
3,2022-05-13 11:47:07.531714,710918,36.0,2022-05-08 15:30:00,0.786,1.898,0.341,0.476,0.445,1.471
4,2022-05-13 11:47:07.531714,710920,36.0,2022-05-08 15:30:00,1.361,1.783,0.862,1.158,0.527,0.62
5,2022-05-13 11:47:07.531714,710921,36.0,2022-05-08 15:30:00,0.72,2.289,0.568,1.31,0.199,1.011
6,2022-05-13 11:47:07.531714,710922,36.0,2022-05-08 15:30:00,2.042,0.766,0.976,0.337,1.06,0.428
7,2022-05-13 11:47:07.531714,710923,36.0,2022-05-08 15:30:00,1.256,2.426,0.721,0.961,0.54,1.461
8,2022-05-13 11:47:07.531714,710924,36.0,2022-05-08 15:30:00,0.541,2.072,0.105,0.999,0.485,1.075
9,2022-05-13 11:47:07.531714,710925,36.0,2022-05-08 15:30:00,1.539,0.511,0.961,0.162,0.54,0.341


In [6]:
for i in range(0, len(computed_df)):
    # Generating expected home and away goals.    
    home_dict_ft = {}
    home_dict_1h = {}
    home_dict_2h = {}
    away_dict_ft = {}
    away_dict_1h = {}
    away_dict_2h = {}
      
    exp_home_goals_ft = computed_df.iloc[i]['exp_home_goals_ft']
    exp_away_goals_ft = computed_df.iloc[i]['exp_away_goals_ft']
    exp_home_goals_1h = computed_df.iloc[i]['exp_home_goals_1h']
    exp_away_goals_1h = computed_df.iloc[i]['exp_away_goals_1h']
    exp_home_goals_2h = computed_df.iloc[i]['exp_home_goals_2h']
    exp_away_goals_2h = computed_df.iloc[i]['exp_away_goals_2h']
    
    for goals in range(0, 10):
        if goals is 0:            
            home_dict_ft[goals] = (math.exp(-exp_home_goals_ft) * (exp_home_goals_ft ** goals)) / 1
            away_dict_ft[goals] = (math.exp(-exp_away_goals_ft) * (exp_away_goals_ft ** goals)) / 1
            home_dict_1h[goals] = (math.exp(-exp_home_goals_1h) * (exp_home_goals_1h ** goals)) / 1
            away_dict_1h[goals] = (math.exp(-exp_away_goals_1h) * (exp_away_goals_1h ** goals)) / 1
            home_dict_2h[goals] = (math.exp(-exp_home_goals_2h) * (exp_home_goals_2h ** goals)) / 1
            away_dict_2h[goals] = (math.exp(-exp_away_goals_2h) * (exp_away_goals_2h ** goals)) / 1
        else:
            home_dict_ft[goals] = (math.exp(-exp_home_goals_ft) * (exp_home_goals_ft ** goals)) / math.factorial(goals)
            away_dict_ft[goals] = (math.exp(-exp_away_goals_ft) * (exp_away_goals_ft ** goals)) / math.factorial(goals)
            home_dict_1h[goals] = (math.exp(-exp_home_goals_1h) * (exp_home_goals_1h ** goals)) / math.factorial(goals)
            away_dict_1h[goals] = (math.exp(-exp_away_goals_1h) * (exp_away_goals_1h ** goals)) / math.factorial(goals)
            home_dict_2h[goals] = (math.exp(-exp_home_goals_2h) * (exp_home_goals_2h ** goals)) / math.factorial(goals)
            away_dict_2h[goals] = (math.exp(-exp_away_goals_2h) * (exp_away_goals_2h ** goals)) / math.factorial(goals)
            
    computed_dict_ft = {}
    computed_dict_1h = {}
    computed_dict_2h = {}
    
    # The product is the same for 'ft', '1h' and '2h', so we can loop once.
    for x, y in product(home_dict_ft, away_dict_ft):
        computed_dict_ft.update({(x, y): (home_dict_ft[x] * away_dict_ft[y])})
        computed_dict_1h.update({(x, y): (home_dict_1h[x] * away_dict_1h[y])})
        computed_dict_2h.update({(x, y): (home_dict_2h[x] * away_dict_2h[y])})
        
    # Creating a temporary DataFrame and filling-in the values accordingly.
    column_names = index_no = [i for i in range(0, 10)]
    
    # Full-Time
    score_pred_ft = pd.DataFrame(columns=column_names, index=index_no)
    
    home_win_prob_ft = 0
    draw_prob_ft = 0
    away_win_prob_ft = 0
    
    for key, value in computed_dict_ft.items():
        score_pred_ft.at[key[0], key[1]] = value
        
        if key[0] is key[1]:
            draw_prob_ft += value
        elif key[0] > key[1]:
            home_win_prob_ft += value
        else:
            away_win_prob_ft += value
    
    home_win_prob_ft = round(home_win_prob_ft, 2)
    draw_prob_ft = round(draw_prob_ft, 2)
    away_win_prob_ft = round(away_win_prob_ft, 2)
    
    sum_prob_ft = home_win_prob_ft + draw_prob_ft + away_win_prob_ft
    
    if sum_prob_ft != 1:
        diff_ft = 1 - sum_prob_ft
        draw_prob_ft += diff_ft
        draw_prob_ft = round(draw_prob_ft, 2)
        
    sorted_computed_dict_ft = dict(sorted(computed_dict_ft.items(), key=lambda item:item[1], reverse=True))
    most_likely_scores_ft = list(sorted_computed_dict_ft.items())[:3]
    
    score_prob_ft_01 = f'{most_likely_scores_ft[0][0][0]} - {most_likely_scores_ft[0][0][1]} ({round(most_likely_scores_ft[0][1] * 100)}%)'
    score_prob_ft_02 = f'{most_likely_scores_ft[1][0][0]} - {most_likely_scores_ft[1][0][1]} ({round(most_likely_scores_ft[1][1] * 100)}%)'
    score_prob_ft_03 = f'{most_likely_scores_ft[2][0][0]} - {most_likely_scores_ft[2][0][1]} ({round(most_likely_scores_ft[2][1] * 100)}%)'
    
    
    # First Half
    score_pred_1h = pd.DataFrame(columns=column_names, index=index_no)
    
    home_win_prob_1h = 0
    draw_prob_1h = 0
    away_win_prob_1h = 0
    
    for key, value in computed_dict_1h.items():
        score_pred_1h.at[key[0], key[1]] = value
        
        if key[0] is key[1]:
            draw_prob_1h += value
        elif key[0] > key[1]:
            home_win_prob_1h += value
        else:
            away_win_prob_1h += value
    
    home_win_prob_1h = round(home_win_prob_1h, 2)
    draw_prob_1h = round(draw_prob_1h, 2)
    away_win_prob_1h = round(away_win_prob_1h, 2)
    
    sum_prob_1h = home_win_prob_1h + draw_prob_1h + away_win_prob_1h
    
    sorted_computed_dict_1h = dict(sorted(computed_dict_1h.items(), key=lambda item:item[1], reverse=True))
    most_likely_scores_1h = list(sorted_computed_dict_1h.items())[:3]
    
    score_prob_1h_01 = f'{most_likely_scores_1h[0][0][0]} - {most_likely_scores_1h[0][0][1]} ({round(most_likely_scores_1h[0][1] * 100)}%)'
    score_prob_1h_02 = f'{most_likely_scores_1h[1][0][0]} - {most_likely_scores_1h[1][0][1]} ({round(most_likely_scores_1h[1][1] * 100)}%)'
    score_prob_1h_03 = f'{most_likely_scores_1h[2][0][0]} - {most_likely_scores_1h[2][0][1]} ({round(most_likely_scores_1h[2][1] * 100)}%)'
    
    
    # Second Half
    score_pred_2h = pd.DataFrame(columns=column_names, index=index_no)
    
    home_win_prob_2h = 0
    draw_prob_2h = 0
    away_win_prob_2h = 0
    
    for key, value in computed_dict_2h.items():
        score_pred_2h.at[key[0], key[1]] = value
        
        if key[0] is key[1]:
            draw_prob_2h += value
        elif key[0] > key[1]:
            home_win_prob_2h += value
        else:
            away_win_prob_2h += value
    
    home_win_prob_2h = round(home_win_prob_2h, 2)
    draw_prob_2h = round(draw_prob_2h, 2)
    away_win_prob_2h = round(away_win_prob_2h, 2)
    
    sum_prob_2h = home_win_prob_2h + draw_prob_2h + away_win_prob_2h
    
    if sum_prob_2h != 1:
        diff_2h = 1 - sum_prob_2h
        draw_prob_2h += diff_2h
        draw_prob_2h = round(draw_prob_2h, 2)
        
    sorted_computed_dict_2h = dict(sorted(computed_dict_2h.items(), key=lambda item:item[1], reverse=True))
    most_likely_scores_2h = list(sorted_computed_dict_2h.items())[:3]
    
    score_prob_2h_01 = f'{most_likely_scores_2h[0][0][0]} - {most_likely_scores_2h[0][0][1]} ({round(most_likely_scores_2h[0][1] * 100)}%)'
    score_prob_2h_02 = f'{most_likely_scores_2h[1][0][0]} - {most_likely_scores_2h[1][0][1]} ({round(most_likely_scores_2h[1][1] * 100)}%)'
    score_prob_2h_03 = f'{most_likely_scores_2h[2][0][0]} - {most_likely_scores_2h[2][0][1]} ({round(most_likely_scores_2h[2][1] * 100)}%)'
    
    
    # Assigning the newly generated values in the respective DataFrame cells.
    # 'i' is equal to the iteration value (since we're looping the rows in the DF)
    computed_df.at[i, 'score_prob_dict_ft'] = str(computed_dict_ft)
    computed_df.at[i, 'result_prob_ft_home'] = home_win_prob_ft
    computed_df.at[i, 'result_prob_ft_draw'] = draw_prob_ft
    computed_df.at[i, 'result_prob_ft_away'] = away_win_prob_ft
    computed_df.at[i, 'score_prob_ft_01'] = score_prob_ft_01
    computed_df.at[i, 'score_prob_ft_02'] = score_prob_ft_02
    computed_df.at[i, 'score_prob_ft_03'] = score_prob_ft_03
    
    computed_df.at[i, 'score_prob_dict_1h'] = str(computed_dict_1h)
    computed_df.at[i, 'result_prob_1h_home'] = home_win_prob_1h
    computed_df.at[i, 'result_prob_1h_draw'] = draw_prob_1h
    computed_df.at[i, 'result_prob_1h_away'] = away_win_prob_1h
    computed_df.at[i, 'score_prob_1h_01'] = score_prob_1h_01
    computed_df.at[i, 'score_prob_1h_02'] = score_prob_1h_02
    computed_df.at[i, 'score_prob_1h_03'] = score_prob_1h_03
    
    computed_df.at[i, 'score_prob_dict_2h'] = str(computed_dict_2h)
    computed_df.at[i, 'result_prob_2h_home'] = home_win_prob_2h
    computed_df.at[i, 'result_prob_2h_draw'] = draw_prob_2h
    computed_df.at[i, 'result_prob_2h_away'] = away_win_prob_2h
    computed_df.at[i, 'score_prob_2h_01'] = score_prob_2h_01
    computed_df.at[i, 'score_prob_2h_02'] = score_prob_2h_02
    computed_df.at[i, 'score_prob_2h_03'] = score_prob_2h_03

# Replacing single quotes with double - otherwise it will be invalid JSON.
computed_df = computed_df.replace({'\'': '"'}, regex=True)
    
display(computed_df)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self.obj[key] = _infer_fill_value(value)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  isetter(loc, value)


Unnamed: 0,betstats_generated_dt,fixture_id,max_league_round,max_lr_fixture_date,exp_home_goals_ft,exp_away_goals_ft,exp_home_goals_1h,exp_away_goals_1h,exp_home_goals_2h,exp_away_goals_2h,...,score_prob_1h_01,score_prob_1h_02,score_prob_1h_03,score_prob_dict_2h,result_prob_2h_home,result_prob_2h_draw,result_prob_2h_away,score_prob_2h_01,score_prob_2h_02,score_prob_2h_03
0,2022-05-13 11:47:07.531714,710819,36.0,2022-05-08 15:30:00,2.67,1.185,1.122,0.3,1.544,0.877,...,1 - 0 (27%),0 - 0 (24%),2 - 0 (15%),"{(0, 0): 0.08883274028791911, (0, 1): 0.077906...",0.53,0.26,0.21,1 - 0 (14%),1 - 1 (12%),2 - 0 (11%)
1,2022-05-13 11:47:07.531714,710916,36.0,2022-05-08 15:30:00,1.595,1.428,0.878,0.613,0.684,0.807,...,0 - 0 (23%),1 - 0 (20%),0 - 1 (14%),"{(0, 0): 0.22514739553267432, (0, 1): 0.181693...",0.28,0.37,0.35,0 - 0 (23%),0 - 1 (18%),1 - 0 (15%)
2,2022-05-13 11:47:07.531714,710917,36.0,2022-05-08 15:30:00,1.584,1.169,0.483,0.459,1.037,0.708,...,0 - 0 (39%),1 - 0 (19%),0 - 1 (18%),"{(0, 0): 0.17464498896681088, (0, 1): 0.123648...",0.43,0.32,0.25,1 - 0 (18%),0 - 0 (17%),1 - 1 (13%)
3,2022-05-13 11:47:07.531714,710918,36.0,2022-05-08 15:30:00,0.786,1.898,0.341,0.476,0.445,1.471,...,0 - 0 (44%),0 - 1 (21%),1 - 0 (15%),"{(0, 0): 0.14719456439994122, (0, 1): 0.216523...",0.11,0.26,0.63,0 - 1 (22%),0 - 2 (16%),0 - 0 (15%)
4,2022-05-13 11:47:07.531714,710920,36.0,2022-05-08 15:30:00,1.361,1.783,0.862,1.158,0.527,0.62,...,0 - 1 (15%),0 - 0 (13%),1 - 1 (13%),"{(0, 0): 0.31758810597858733, (0, 1): 0.196904...",0.25,0.44,0.31,0 - 0 (32%),0 - 1 (20%),1 - 0 (17%)
5,2022-05-13 11:47:07.531714,710921,36.0,2022-05-08 15:30:00,0.72,2.289,0.568,1.31,0.199,1.011,...,0 - 1 (20%),0 - 0 (15%),0 - 2 (13%),"{(0, 0): 0.2981972794298874, (0, 1): 0.3014774...",0.07,0.36,0.57,0 - 1 (30%),0 - 0 (30%),0 - 2 (15%)
6,2022-05-13 11:47:07.531714,710922,36.0,2022-05-08 15:30:00,2.042,0.766,0.976,0.337,1.06,0.428,...,0 - 0 (27%),1 - 0 (26%),2 - 0 (13%),"{(0, 0): 0.22582385189647586, (0, 1): 0.096652...",0.51,0.34,0.15,1 - 0 (24%),0 - 0 (23%),2 - 0 (13%)
7,2022-05-13 11:47:07.531714,710923,36.0,2022-05-08 15:30:00,1.256,2.426,0.721,0.961,0.54,1.461,...,0 - 0 (19%),0 - 1 (18%),1 - 0 (13%),"{(0, 0): 0.13520001559846745, (0, 1): 0.197527...",0.14,0.26,0.6,0 - 1 (20%),0 - 2 (14%),0 - 0 (14%)
8,2022-05-13 11:47:07.531714,710924,36.0,2022-05-08 15:30:00,0.541,2.072,0.105,0.999,0.485,1.075,...,0 - 0 (33%),0 - 1 (33%),0 - 2 (17%),"{(0, 0): 0.21013607120076477, (0, 1): 0.225896...",0.17,0.33,0.5,0 - 1 (23%),0 - 0 (21%),0 - 2 (12%)
9,2022-05-13 11:47:07.531714,710925,36.0,2022-05-08 15:30:00,1.539,0.511,0.961,0.162,0.54,0.341,...,0 - 0 (33%),1 - 0 (31%),2 - 0 (15%),"{(0, 0): 0.4143683360922424, (0, 1): 0.1412996...",0.32,0.5,0.18,0 - 0 (41%),1 - 0 (22%),0 - 1 (14%)


## Entire Dataset

In [7]:
entire_dataset = pd.read_csv('football_dataset_poisson_predictions.csv')

entire_dataset = entire_dataset.dropna()

display(entire_dataset)

Unnamed: 0,betstats_generated_dt,fixture_id,league_avg_join,max_lr_fixture_date,exp_home_goals_ft,exp_away_goals_ft,exp_home_goals_1h,exp_away_goals_1h,exp_home_goals_2h,exp_away_goals_2h,...,result_prob_1h_away,score_prob_1h_01,score_prob_1h_02,score_prob_1h_03,result_prob_2h_home,result_prob_2h_draw,result_prob_2h_away,score_prob_2h_01,score_prob_2h_02,score_prob_2h_03
0,24:57.7,115,5,00:00.0,1.304,4.160,1.935,2.370,0.500,1.238,...,0.48,1 - 2 (7%),2 - 2 (7%),1 - 1 (6%),0.15,0.30,0.55,0 - 1 (22%),0 - 0 (18%),0 - 2 (13%)
1,24:57.7,116,5,00:00.0,0.489,1.733,0.322,1.441,0.989,1.238,...,0.66,0 - 1 (25%),0 - 2 (18%),0 - 0 (17%),0.29,0.29,0.42,0 - 1 (13%),1 - 1 (13%),0 - 0 (11%)
2,24:57.7,117,5,00:00.0,0.326,1.560,0.500,0.395,0.330,1.238,...,0.22,0 - 0 (41%),1 - 0 (20%),0 - 1 (16%),0.10,0.30,0.60,0 - 1 (26%),0 - 0 (21%),0 - 2 (16%)
3,24:57.7,118,5,00:00.0,1.003,0.694,0.508,0.395,0.495,1.546,...,0.22,0 - 0 (41%),1 - 0 (21%),0 - 1 (16%),0.12,0.25,0.63,0 - 1 (20%),0 - 2 (16%),0 - 0 (13%)
4,24:57.7,119,5,00:00.0,3.423,0.693,2.582,1.580,0.990,0.833,...,0.22,2 - 1 (8%),3 - 1 (7%),2 - 2 (6%),0.38,0.33,0.29,0 - 0 (16%),1 - 0 (16%),0 - 1 (13%)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28045,24:57.7,710808,24,00:00.0,0.931,0.716,0.654,0.474,0.322,0.269,...,0.23,0 - 0 (32%),1 - 0 (21%),0 - 1 (15%),0.22,0.60,0.18,0 - 0 (55%),1 - 0 (18%),0 - 1 (15%)
28046,24:57.7,710812,25,00:00.0,2.763,0.598,1.285,0.119,1.473,0.420,...,0.03,1 - 0 (32%),0 - 0 (25%),2 - 0 (20%),0.64,0.26,0.10,1 - 0 (22%),2 - 0 (16%),0 - 0 (15%)
28047,24:57.7,717741,31,00:00.0,0.815,1.585,0.367,0.168,0.447,1.756,...,0.11,0 - 0 (59%),1 - 0 (21%),0 - 1 (10%),0.09,0.21,0.70,0 - 1 (19%),0 - 2 (17%),0 - 0 (11%)
28048,24:57.7,717742,32,00:00.0,1.362,1.190,1.135,0.344,0.387,0.853,...,0.11,1 - 0 (26%),0 - 0 (23%),2 - 0 (15%),0.16,0.39,0.45,0 - 0 (29%),0 - 1 (25%),1 - 0 (11%)


## Notes

Results and visualisations regarding this mathematical benchmark model are provided in the write-up. Additional visualisations can be accessed by clicking on this [link](https://docs.google.com/spreadsheets/d/1i-WSEkhrl4q6TLL68K2F5h8F2f2bGpYCKbM4Kk3aXQY/edit).

