In [1]:
import pandas as pd
import numpy as np

import SimulationEngine
import PLScraper
import manipulate

from pandasql import sqldf
import datetime

In [2]:
lastseason_fpath="data/2022-23 PL/2023_matchday_results.csv"
lastseason=pd.read_csv(lastseason_fpath)

oldcols=['fixture.date','teams.home.name','goals.home','teams.away.name','goals.away']
newcols=['Datetime','HomeTeam','HomeGoals','AwayTeam','AwayGoals']
lastseason=manipulate.rename_cols(lastseason,oldcols,newcols)
lastseason=lastseason[newcols]
lastseason['Status']='FT'

In [3]:
thisseason=PLScraper.scrape_pl_results()

In [4]:
query='''
    WITH FINAL AS
    (
     SELECT Datetime, HomeTeam, HomeGoals, AwayTeam, AwayGoals, Status
    FROM lastseason
    
    UNION ALL
    
    SELECT Datetime, HomeTeam, HomeGoals, AwayTeam, AwayGoals, Status
    FROM thisseason
    WHERE
    Datetime > (SELECT MAX(Datetime) FROM lastseason)
    AND Status='FT'
    )
    SELECT *
    FROM FINAL
    ORDER BY Datetime DESC;
    '''

results_df=sqldf(query)

#results_df=manipulate.df_strings_to_numbers(results_df,['HomeGoals','AwayGoals'])

In [5]:
all_teams=pd.unique(results_df.HomeTeam)
print(len(all_teams))
print(sorted(all_teams))

28
['Arsenal', 'Aston Villa', 'Bournemouth', 'Brentford', 'Brighton', 'Brighton and Hove Albion', 'Burnley', 'Chelsea', 'Crystal Palace', 'Everton', 'Fulham', 'Leeds', 'Leicester', 'Liverpool', 'Luton Town', 'Manchester City', 'Manchester United', 'Newcastle', 'Newcastle United', 'Nottingham Forest', 'Sheffield United', 'Southampton', 'Tottenham', 'Tottenham Hotspur', 'West Ham', 'West Ham United', 'Wolverhampton Wanderers', 'Wolves']


In [6]:
print(sorted(pd.unique(lastseason.HomeTeam)))

['Arsenal', 'Aston Villa', 'Bournemouth', 'Brentford', 'Brighton', 'Chelsea', 'Crystal Palace', 'Everton', 'Fulham', 'Leeds', 'Leicester', 'Liverpool', 'Manchester City', 'Manchester United', 'Newcastle', 'Nottingham Forest', 'Southampton', 'Tottenham', 'West Ham', 'Wolves']


In [7]:
prevnames=['Brighton','Newcastle','Tottenham','West Ham','Wolves']
newnames=['Brighton and Hove Albion', 'Newcastle United', 'Tottenham Hotspur', 'West Ham United','Wolverhampton Wanderers']

results_df=manipulate.df_rename_vals(results_df,['HomeTeam','AwayTeam'],prevnames,newnames)


In [8]:
all_teams=pd.unique(results_df.HomeTeam)
print(len(all_teams))
print(sorted(all_teams))

23
['Arsenal', 'Aston Villa', 'Bournemouth', 'Brentford', 'Brighton and Hove Albion', 'Burnley', 'Chelsea', 'Crystal Palace', 'Everton', 'Fulham', 'Leeds', 'Leicester', 'Liverpool', 'Luton Town', 'Manchester City', 'Manchester United', 'Newcastle United', 'Nottingham Forest', 'Sheffield United', 'Southampton', 'Tottenham Hotspur', 'West Ham United', 'Wolverhampton Wanderers']


In [19]:
fixture_df=PLScraper.scrape_pl_fixtures()
#fixture_df.dropna(inplace=True)
sim_until_date=datetime.datetime.now()+datetime.timedelta(days=7)
fixture_df=fixture_df[fixture_df.Datetime<sim_until_date]

In [28]:
predictions_this_weekend=SimulationEngine.predict_multiple_results(fixture_df,results_df)

derived_cols=['OddsHome','OddsAway','OddsDraw']
source_cols=['ProbHomeWin','ProbAwayWin','ProbDraw']

for i in range(len(derived_cols)):
    predictions_this_weekend[derived_cols]=predictions_this_weekend[source_cols].apply(lambda x: 1/x)
    
predictions_this_weekend.drop(columns=['FullSimulatedResults'],inplace=True)

In [29]:
predictions_this_weekend

Unnamed: 0,Datetime,HomeTeam,AwayTeam,ProbHomeWin,ProbAwayWin,ProbDraw,OddsHome,OddsAway,OddsDraw
0,2023-09-23 15:00:00,Crystal Palace,Fulham,0.461,0.348,0.191,2.169197,2.873563,5.235602
1,2023-09-23 15:00:00,Luton Town,Wolverhampton Wanderers,0.376,0.406,0.218,2.659574,2.463054,4.587156
2,2023-09-23 15:00:00,Manchester City,Nottingham Forest,0.582,0.221,0.197,1.718213,4.524887,5.076142
3,2023-09-23 17:30:00,Brentford,Everton,0.559,0.223,0.218,1.788909,4.484305,4.587156
4,2023-09-23 20:00:00,Burnley,Manchester United,0.181,0.674,0.145,5.524862,1.48368,6.896552
5,2023-09-24 14:00:00,Arsenal,Tottenham Hotspur,0.433,0.386,0.181,2.309469,2.590674,5.524862
6,2023-09-24 14:00:00,Brighton and Hove Albion,Bournemouth,0.6,0.215,0.185,1.666667,4.651163,5.405405
7,2023-09-24 14:00:00,Chelsea,Aston Villa,0.373,0.421,0.206,2.680965,2.375297,4.854369
8,2023-09-24 14:00:00,Liverpool,West Ham United,0.601,0.23,0.169,1.663894,4.347826,5.91716
9,2023-09-24 16:30:00,Sheffield United,Newcastle United,0.265,0.543,0.192,3.773585,1.841621,5.208333
