# FPL Data Generator

This notebook scrapes played matches, then generates the required CSV files for the accompanying Tableau dashboards.

In [1]:
#Standard data manipulations
import pandas as pd
import numpy as np

from datetime import date
import sqlite3
import pickle

#Import custom functions
from PythonFunctions.apicallers import *
from PythonFunctions.sqlfunctions import *
from PythonFunctions.matchscrapers import *

#Suppress warnings from showing
import warnings
warnings.filterwarnings('ignore')

#Connect to the SQL database
conn = sqlite3.connect('Data/20_21fpl.db')
#Instantiate a cursor
c = conn.cursor()

Work out which matches we ought to have data for, which aren't present in the database.

In [2]:
to_run = suggested_match_ids()
print(to_run)

[58919, 58920, 58921, 58922, 58923, 58925]


Update data for these matches.

In [3]:
CoreDataUpdater(to_run, c, conn)


FAILURE: Match 58919 failed to scrape - may not have been played yet


FAILURE: Match 58920 failed to scrape - may not have been played yet


FAILURE: Match 58921 failed to scrape - may not have been played yet

Stored match 0.0 (ID: 58922)
Match 58922 object successfully instantiated
Match 58922 shot detail dataframe successfully created
Match 58922 player detail dataframe successfully created
Match 58922 team detail dataframe successfully created
Match 58922 SQL entries committed

FAILURE: Match 58923 failed to scrape - may not have been played yet

Stored match 0.0 (ID: 58925)
Match 58925 object successfully instantiated
Match 58925 shot detail dataframe successfully created
Match 58925 player detail dataframe successfully created
Match 58925 team detail dataframe successfully created
Match 58925 SQL entries committed


Sweep up matches that the updater might have missed.

In [4]:
MatchSweeper(c, conn)

Attempting to scrape [58900, 58919, 58920, 58921, 58923]

FAILURE: Match 58900 failed to scrape - may not have been played yet


FAILURE: Match 58919 failed to scrape - may not have been played yet


FAILURE: Match 58920 failed to scrape - may not have been played yet

Stored match 0.0 (ID: 58921)
Match 58921 object successfully instantiated
Match 58921 shot detail dataframe successfully created
Match 58921 player detail dataframe successfully created
Match 58921 team detail dataframe successfully created
Match 58921 SQL entries committed
Stored match 0.0 (ID: 58923)
Match 58923 object successfully instantiated
Match 58923 shot detail dataframe successfully created
Match 58923 player detail dataframe successfully created
Match 58923 team detail dataframe successfully created
Match 58923 SQL entries committed


# CSV Creation

In [5]:
#Bring in XG functions now (note this is brought in after the new goal data is known)
from PythonFunctions.xg import *

In [8]:
#Generate a player detail dataframe, with the XG stats 
df_pm = pd.merge(df_pm_generator(),
                 api_stat_generator(),
                 on=['MatchID','Player']).drop(['TableIndex','Minutes'], axis=1)

df_tm = df_tm_generator()

In [9]:
df_pm.tail(10)

Unnamed: 0,MatchID,Player,GameWeek,ForTeam,AgainstTeam,RelativeStrength,Goals,ShotsOnTarget,ShotsInBox,CloseShots,...,XGI,Date,BPS,CleanSheet,MinutesPlayed,NetTransfersIn,Points,Price,Saves,SelectedBy
0,58898,Mesut Ozil,1,Arsenal,Fulham,2,0,0,0,0,...,0.000000,2020-09-12,0,0.0,0.0,0,0,7.0,0.0,76656
1,58898,Sokratis,1,Arsenal,Fulham,2,0,0,0,0,...,0.000000,2020-09-12,0,0.0,0.0,0,0,5.0,0.0,12184
2,58898,David Luiz,1,Arsenal,Fulham,2,0,0,0,0,...,0.000000,2020-09-12,0,0.0,0.0,0,0,5.5,0.0,69564
3,58898,Pierre-Emerick Aubameyang,1,Arsenal,Fulham,2,1,2,2,2,...,0.680175,2020-09-12,19,1.0,90.0,0,7,12.0,0.0,2823465
4,58898,Cedric Soares,1,Arsenal,Fulham,2,0,0,0,0,...,0.000000,2020-09-12,0,0.0,0.0,0,0,5.0,0.0,35774
5,58898,Alexandre Lacazette,1,Arsenal,Fulham,2,1,1,1,1,...,0.925862,2020-09-12,29,1.0,86.0,0,7,8.5,0.0,196064
6,58898,Shkodran Mustafi,1,Arsenal,Fulham,2,0,0,0,0,...,0.000000,2020-09-12,0,0.0,0.0,0,0,5.0,0.0,899
7,58898,Bernd Leno,1,Arsenal,Fulham,2,0,0,0,0,...,0.000000,2020-09-12,29,1.0,90.0,0,7,5.0,2.0,400285
8,58898,Granit Xhaka,1,Arsenal,Fulham,2,0,0,1,0,...,0.485239,2020-09-12,11,1.0,77.0,0,3,5.5,0.0,42909
9,58898,Pablo Mari,1,Arsenal,Fulham,2,0,0,0,0,...,0.000000,2020-09-12,0,0.0,0.0,0,0,4.5,0.0,769


In [10]:
df_tm.tail(10)

Unnamed: 0,MatchID,ForTeam,AgainstTeam,RelativeStrength,GameWeek,Home,Possession,Goals,ShotsOnTarget,TotalShots,...,ShotsLeft,ShotsRight,ShotsInBoxConceded,CloseShotsConceded,HeadersConceded,ShotsCentreConceded,ShotsLeftConceded,ShotsRightConceded,XG,XGC
0,58898,Fulham,Arsenal,-2,1,Home,45.6,0,2,5,...,0,1,4,5,1,1,2,2,0.348293,2.160662
1,58898,Arsenal,Fulham,2,1,Away,54.4,3,6,13,...,2,2,1,3,0,0,0,1,2.160662,0.348293
2,58897,Crystal Palace,Southampton,0,1,Home,29.4,1,3,5,...,0,0,6,5,3,5,1,1,1.341855,1.797248
3,58897,Southampton,Crystal Palace,0,1,Away,70.6,0,5,9,...,1,1,5,5,2,5,0,0,1.797248,1.341855
4,58899,Liverpool,Leeds United,3,1,Home,48.8,4,6,22,...,1,4,4,4,1,3,0,1,3.193136,1.174123
5,58899,Leeds United,Liverpool,-3,1,Away,51.2,3,3,6,...,0,1,14,4,2,9,1,4,1.174123,3.193136
6,58901,Tottenham Hotspur,Everton,1,1,Home,51.6,0,5,9,...,1,2,9,8,2,3,5,2,1.897535,1.651706
7,58901,Everton,Tottenham Hotspur,-1,1,Away,48.4,1,4,15,...,5,2,8,5,4,5,1,2,1.651706,1.897535
8,58902,West Bromwich Albion,Leicester City,-2,1,Home,35.8,0,1,7,...,1,1,8,6,1,2,4,2,0.694157,2.077885
9,58902,Leicester City,West Bromwich Albion,2,1,Away,64.2,3,7,13,...,4,2,3,2,0,1,1,1,2.077885,0.694157


In [14]:
df_matches.drop('TableIndex', axis=1).to_csv('Outputs/fixtures.csv')
df_pm.to_csv('Outputs/player_data.csv')
df_tm.to_csv('Outputs/team_data.csv')