In [6]:
import pandas as pd
import numpy as np

import SimulationEngine
import PLScraper
import manipulate

from pandasql import sqldf
import datetime

In [7]:
lastseason_fpath="data/2022-23 PL/2023_matchday_results.csv"
lastseason=pd.read_csv(lastseason_fpath)

oldcols=['fixture.date','teams.home.name','goals.home','teams.away.name','goals.away']
newcols=['Datetime','HomeTeam','HomeGoals','AwayTeam','AwayGoals']
lastseason=manipulate.rename_cols(lastseason,oldcols,newcols)
lastseason=lastseason[newcols]
lastseason['Status']='FT'

In [8]:
thisseason=PLScraper.scrape_pl_results()

In [9]:
query='''
    WITH FINAL AS
    (
     SELECT Datetime, HomeTeam, HomeGoals, AwayTeam, AwayGoals, Status
    FROM lastseason
    
    UNION ALL
    
    SELECT Datetime, HomeTeam, HomeGoals, AwayTeam, AwayGoals, Status
    FROM thisseason
    WHERE
    Datetime > (SELECT MAX(Datetime) FROM lastseason)
    AND Status='FT'
    )
    SELECT *
    FROM FINAL
    ORDER BY Datetime DESC;
    '''

results_df=sqldf(query)

results_df=manipulate.df_strings_to_numbers(results_df,['HomeGoals','AwayGoals'])

In [10]:
all_teams=pd.unique(results_df.HomeTeam)
print(len(all_teams))
print(sorted(all_teams))

27
['Arsenal', 'Aston Villa', 'Bournemouth', 'Brentford', 'Brighton', 'Brighton and Hove Albion', 'Burnley', 'Chelsea', 'Crystal Palace', 'Everton', 'Fulham', 'Leeds', 'Leicester', 'Liverpool', 'Manchester City', 'Manchester United', 'Newcastle', 'Newcastle United', 'Nottingham Forest', 'Sheffield United', 'Southampton', 'Tottenham', 'Tottenham Hotspur', 'West Ham', 'West Ham United', 'Wolverhampton Wanderers', 'Wolves']


In [11]:
print(sorted(pd.unique(lastseason.HomeTeam)))

['Arsenal', 'Aston Villa', 'Bournemouth', 'Brentford', 'Brighton', 'Chelsea', 'Crystal Palace', 'Everton', 'Fulham', 'Leeds', 'Leicester', 'Liverpool', 'Manchester City', 'Manchester United', 'Newcastle', 'Nottingham Forest', 'Southampton', 'Tottenham', 'West Ham', 'Wolves']


In [12]:
prevnames=['Brighton','Newcastle','Tottenham','West Ham']
newnames=['Brighton and Hove Albion', 'Newcastle United', 'Tottenham Hotspur', 'West Ham United']

results_df=manipulate.df_rename_vals(results_df,['HomeTeam','AwayTeam'],prevnames,newnames)


In [13]:
all_teams=pd.unique(results_df.HomeTeam)
print(len(all_teams))
print(sorted(all_teams))

23
['Arsenal', 'Aston Villa', 'Bournemouth', 'Brentford', 'Brighton and Hove Albion', 'Burnley', 'Chelsea', 'Crystal Palace', 'Everton', 'Fulham', 'Leeds', 'Leicester', 'Liverpool', 'Manchester City', 'Manchester United', 'Newcastle United', 'Nottingham Forest', 'Sheffield United', 'Southampton', 'Tottenham Hotspur', 'West Ham United', 'Wolverhampton Wanderers', 'Wolves']


In [14]:
fixture_df=PLScraper.scrape_pl_fixtures()
sim_until_date=datetime.datetime.now()+datetime.timedelta(days=7)
fixture_df=fixture_df[fixture_df.Datetime<sim_until_date]

In [22]:
predictions_this_weekend=SimulationEngine.predict_multiple_results(fixture_df,results_df)

derived_cols=['OddsHome','OddsAway','OddsDraw']
source_cols=['ProbHomeWin','ProbAwayWin','ProbDraw']

for i in range(len(derived_cols)):
    predictions_this_weekend[derived_cols]=predictions_this_weekend[source_cols].apply(lambda x: 1/x)

In [23]:
predictions_this_weekend

Unnamed: 0,Datetime,HomeTeam,AwayTeam,ProbHomeWin,ProbAwayWin,ProbDraw,FullSimulatedResults,OddsHome,OddsAway,OddsDraw
0,2023-09-01 20:00:00,Luton Town,West Ham United,0.137,0.721,0.142,,7.29927,1.386963,7.042254
1,2023-09-02 12:30:00,Sheffield United,Everton,0.336,0.468,0.196,,2.97619,2.136752,5.102041
2,2023-09-02 15:00:00,Brentford,Bournemouth,0.716,0.131,0.153,,1.396648,7.633588,6.535948
3,2023-09-02 15:00:00,Burnley,Tottenham Hotspur,0.175,0.693,0.132,,5.714286,1.443001,7.575758
4,2023-09-02 15:00:00,Chelsea,Nottingham Forest,0.337,0.473,0.19,,2.967359,2.114165,5.263158
5,2023-09-02 15:00:00,Manchester City,Fulham,0.609,0.206,0.185,,1.642036,4.854369,5.405405
6,2023-09-02 17:30:00,Brighton and Hove Albion,Newcastle United,0.315,0.499,0.186,,3.174603,2.004008,5.376344
7,2023-09-03 14:00:00,Crystal Palace,Wolverhampton Wanderers,0.574,0.221,0.205,,1.74216,4.524887,4.878049
8,2023-09-03 14:00:00,Liverpool,Aston Villa,0.471,0.331,0.198,,2.123142,3.021148,5.050505
9,2023-09-03 16:30:00,Arsenal,Manchester United,0.374,0.41,0.216,,2.673797,2.439024,4.62963
