In [1]:
#Standard data manipulations
import pandas as pd
import numpy as np

#SQL
import sqlite3
#Including custom functions, stored elsewhere in the repo
from PythonFunctions.sqlfunctions import *

#Webscraping libraries
import requests
from splinter import Browser
from bs4 import BeautifulSoup
import time
from datetime import datetime

#For text manipulation
import unicodedata

import pickle

# API CALLER

In [2]:
#We have a list of teams and positions taken from site commentary...
teams = ['Arsenal', 'Aston Villa', 'Brighton and Hove Albion',
         'Burnley', 'Chelsea', 'Crystal Palace', 'Everton', 'Fulham', 'Leicester City',
         'Leeds', 'Liverpool', 'Manchester City', 'Manchester United', 'Newcastle United',
         'Sheffield United', 'Southampton', 'Tottenham Hotspur', 'West Brom',
         'West Ham United', 'Wolverhampton Wanderers']

def team_basic_df_generator():
    '''
    Generates a basic table of teams from the FPL API
    '''
    
    #Make the API call
    url = 'https://fantasy.premierleague.com/api/bootstrap-static/'
    results = requests.get(url).json()

    #We create a blank dataframe to store team information
    df_teams = pd.DataFrame(columns = ['TeamID','Team','ShortName','Strength'])

    #We loop through all the team data taking the required information
    for i in results['teams']:
        ID = i['id']
        team = i['name']
        short_name = i['short_name']
        strength = i['strength']

        df_temp = pd.DataFrame({'TeamID':[ID],
                                'Team':[team],
                                'ShortName':[short_name],
                                'Strength':[strength]})

        df_teams = pd.concat([df_teams, df_temp])

    #We set the TeamID as the index for the dataframe
    df_teams.set_index('TeamID',drop=True,inplace=True)

    #We also add the commentary names to the teams dataframe
    df_teams['CommentName'] = teams

    return df_teams
    

#We should create a function that will remove accents from player names
#e.g. "à" should become "a", and so forth. This will help with commentary
#recognition later...
def remove_accents(input_str):
    nfkd_form = unicodedata.normalize('NFKD', input_str)
    return u"".join([c for c in nfkd_form if not unicodedata.combining(c)])


#We need to amend some full names in cases where player goes by single name
#e.g. 'Sokratis'
def single_name_update(name):
    #Check if any player's name is just the same name twice
    split = name.split()
    
    #For players who have the same name twice e.g. 'Sokratis'
    if split[0] == split[1]:
        return ' '.join(split[1:])
    
    #For players who have two names twice e.g. 'David Luis'
    if (len(split)==4) & (split[:2] == split[2:]):
        return ' '.join(split[2:])
    
    return name


#Create a dictionary where the keys are the names in the df_player dictionary,
    #and the corresponding values are the names mentioned in match commentary
    player_name_key = {
    'Fabio Henrique Fabinho':'Fabinho',
    'Maximillian Aarons':'Max Aarons',
    'Alex Chamberlain':'Alex Oxlade-Chamberlain',
    'Johann Berg Gudmundsson':'Johann Gudmundsson',
    'Cedric':'Cedric Soares',
    'Andre Filipe Andre Gomes':'Andre Gomes',
    'Benjamin Chilwell':'Ben Chilwell',
    'Ricardo Domingos Pereira':'Ricardo Pereira',
    'Rui Pedro Patricio':'Rui Patricio',
    'Ruben Diogo Neves':'Ruben Neves',
    'Jonathan Jonny':'Jonny',
    'Joao Filipe Iria Moutinho':'Joao Moutinho',
    'Ruben Goncalo Vinagre':'Ruben Vinagre',
    'Mahmoud Ahmed Trezeguet':'Trezeguet',
    'Ezri Konsa':'Ezri Konsa Ngoyo',
    'Jose Ignacio Jota':'Jota',
    'Francisco Kiko Femenia':'Kiko Femenia',
    'Mathew Ryan':'Mat Ryan',
    'Solomon March':'Solly March',
    'Heurelho da Silva Gomes':'Heurelho Gomes',
    'Gabriel Fernando Jesus':'Gabriel Jesus',
    'Roberto':'Roberto Jimenez',
    'Javier Chicharito':'Chicharito',
    'Joao Pedro Cavaco Cancelo':'Joao Cancelo',
    'Bernardo Mota Bernardo Silva':'Bernardo Silva',
    'Kepa':'Kepa Arrizabalaga',
    'Jorge Luiz Jorginho':'Jorginho',
    'Robert Kenedy Kenedy':'Kenedy',
    'Joelinton Cassio Joelinton':'Joelinton',
    'Joseph Willock':'Joe Willock',
    'Daniel Ceballos':'Dani Ceballos',
    'Gabriel Teodoro Martinelli':'Gabriel Martinelli',
    'Fernando Fernandinho':'Fernandinho',
    'Sung-yueng Ki Sung-yueng':'Ki Sung-yueng',
    'Frederic Guilbert':'Frederic Guilbert',
    'Jose Angel Angelino':'Angelino',
    'Heung-Min Son':'Son Heung-Min',
    'Bamidele Alli':'Dele Alli',
    'Frederico Fred':'Fred',
    'Eric Garcia':'Eric Garcia',
    'Arnaut Danjuma':'Arnaut Danjuma Groeneveld',
    'Addji Keaninkin Marc-Israel Guehi':'Marc Guehi',
    'Jose Diogo Dalot':'Diogo Dalot',
    'Bernard Ashley-Seal':'Benny Ashley-Seal',
    'Max Kilman':'Maximilian Kilman',
    'Ayotomiwa Dele-Bashiru':'Tom Dele-Bashiru',
    'Rhu-endly Cuco Martina':'Cuco Martina',
    'Abd-Al-Ali Morakinyo Olaposi Koiki':'Ali Koiki',
    'Goncalo Bento Cardoso':'Goncalo Cardoso',
    'Edward Nketiah':'Eddie Nketiah',
    'Jose Reina':'Pepe Reina',
    'Bruno Andre Jordao':'Bruno Jordao',
    'Bruno Miguel Fernandes':'Bruno Fernandes'
    }


def player_basic_df_generator():

    '''
    Creates a table of basic table of players from the FPL API
    '''
    #Make the API call
    url = 'https://fantasy.premierleague.com/api/bootstrap-static/'
    results = requests.get(url).json()

    #Bring in the 'elements' dictionary from the json file
    #This is part containing player informaion
    players_json = results['elements']

    #We create an empty dataframe with the columns we want
    df_players = pd.DataFrame(columns = ['PlayerCode','PlayerID','FirstName',
                                         'WebName','Team','Position'])

    #We iterate through each of the players in the json file...
    #...taking the information we want from it
    for i in range(len(players_json)):
        code = players_json[i]['code']
        ID = players_json[i]['id']
        first = remove_accents(players_json[i]['first_name'])
        web = remove_accents(players_json[i]['web_name'])
        team = players_json[i]['team']
        position = players_json[i]['element_type']

        df_temp = pd.DataFrame({'PlayerCode':[code],
                                'PlayerID':[ID],
                                'FirstName':[first],
                                'WebName':[web],
                                'Team':[team],
                                'Position':[position]})

        df_players = pd.concat([df_players, df_temp])

    #We set the playerID as the index for the dataframe
    df_players.set_index('PlayerID',drop=True,inplace=True)

    positions = ['GKP','DEF','MID','FWD']

    #We then update the columns with these strings as required
    df_players['Team'] = df_players['Team'].map(lambda x: teams[x-1])
    df_players['Position'] = df_players['Position'].map(lambda x: positions[x-1])
    df_players['CommentName'] = df_players['FirstName'].map(str) + ' ' + df_players['WebName'].map(str)
    
    #Correct the comment names where players go by a single name
    df_players['CommentName'] = df_players['CommentName'].map(lambda x: single_name_update(x))

    #Iterate through the keys, and update the df_player dataframe as required
    for i in list(player_name_key.keys()):
        new_name = player_name_key[i]
        df_players.loc[df_players['CommentName']==i,'CommentName'] = new_name
        
    return df_players


def PlayerAPI(player):
    '''
    Takes the player's code, and returns the
    API JSON file associated with them.
    '''
    
    url = f'https://fantasy.premierleague.com/api/element-summary/{player}/'
    return requests.get(url).json()


def PlayerHistory(player):
    '''
    Takes the player's code, and returns the
    API JSON file associated with their game
    history from the current season.
    '''
    json_file = PlayerAPI(player)
    return json_file['history']


def PlayerAPIStats(player):
    '''
    Takes a playerID and outputs a dataframe of
    the player's statistics derived from the FPL API
    - value
    - total_points
    - minutes
    - bps
    - clean_sheets
    - selected
    - transfers in (net)
    '''
    #Get the JSON file for the player
    history = PlayerHistory(player)
    
    #Extract the data from this JSON file
    playerID = [int(player) for i in history]
    gameweeks = [i['round']-9 if i['round'] > 38 else i['round'] for i in history]
    dates = [i['kickoff_time'][:10] for i in history]
    points = [i['total_points'] for i in history]
    value = [i['value']/10 for i in history]
    minutes = [i['minutes'] for i in history]
    bps = [i['bps'] for i in history]
    cs = [i['clean_sheets'] for i in history]
    saves = [i['saves'] for i in history]
    selected = [i['selected'] for i in history]
    transfers = [i['transfers_balance'] for i in history]
    
    df_temp = pd.DataFrame({'PlayerID':playerID,
                            'GameWeek':gameweeks,
                            'Date':dates,
                            'Points':points,
                            'Price':value,
                            'MinutesPlayed':minutes,
                            'CleanSheet':cs,
                            'Saves':saves,
                            'BPS':bps,
                            'SelectedBy':selected,
                            'NetTransfersIn':transfers})
    
    return df_temp


def PlayersAPIStats(players, gameweeks=None):
    '''
    Takes a list of playerIDs a list of gameweeks
    and outputs a dataframe of the player's statistics
    derived from the FPL API for those weeks
    - points
    - price
    - bps
    - selected
    - transfers in (net)    
    '''
    #If no gameweeks specified, then use all
    #gameweeks in the retured JSON file
    if gameweeks == None:
        gameweeks = list(range(1,PlayerAPIStats(players[0])['GameWeek'].max()+1))
    
    #Create a blank dataframe
    cols = ['PlayerID', 'GameWeek', 'Date', 'Points', 'Price',
            'BPS', 'SelectedBy', 'NetTransfersIn']
    df_temp = pd.DataFrame(columns=cols)
    
    #Iterate through the players and add their stats to the dataframe
    for i in players:
        new_rows = PlayerAPIStats(i)
        df_temp = pd.concat([df_temp, new_rows])
        
    #Remove rows we don't want
    df_temp = df_temp.loc[df_temp['GameWeek'].isin(gameweeks)]
            
    #Make everything numeric
    df_temp = df_temp.apply(pd.to_numeric, errors='ignore')
    
    #Reset the index
    df_temp.reset_index(inplace=True, drop=True)
    
    return df_temp

# SCRAPER

In [3]:
def get_match_commentary_html(match, threshold = 'Lineups are announced and players are warming up.'):
    
    '''
    Takes a 5-digit integer, referring to the code of the match on premier league website.
    
    Returns the full HTML, having scrolled to the bottom of the page.
    
    '''
    
    #Create the url for the match
    match_url = f'https://www.premierleague.com/match/{match}'

    #Initiate a splinter instance of the URL
    browser.visit(match_url)
    
    #Initiate a 'first content' variable. This will allow us to identify when the
    #scrolling has reached the bottom of the page
    first_content = ''

    #Have a while loop that says, so long as the first_content variable isn't the
    #standard first piece of commentary that we see in the HTLM, keep scrolling down
    while first_content != threshold:
        browser.execute_script("window.scrollTo(10000, document.body.scrollHeight);")
        soup = BeautifulSoup(browser.html, 'html.parser')
        innerContent = soup.findAll('div',class_="innerContent")
        #This condition just makes sure we don't get an error on the first loop
        if len(innerContent) > 0:
            first_content = soup.findAll('div',class_="innerContent")[-1].get_text()
        browser.execute_script("window.scrollTo(100, -document.body.scrollHeight);")
    
    #Return the full HTML soup for analysis
    return soup


def get_commentary(match, threshold = 'Lineups are announced and players are warming up.'):
    
    '''
    Takes a 5-digit integer, referring to the code of the match on premier league website.
    
    Returns a list of match events, with the minutes in which they took place
    '''
    #get the html soup using function
    html = get_match_commentary_html(match, threshold = threshold)
    
    #create two lists for events and minutes each event happened
    events = [i.get_text() for i in html.findAll('div',class_='innerContent')]
    minutes = [i.get_text().replace(' ','').replace("'","")
               for i in html.findAll('div',class_='cardMeta')]
    
    #merge the minutes and events lists
    commentary = [remove_accents(f"{minutes[i]}. {events[i]}") for i in range(len(events))]    
    
    return commentary



def get_match_lineup_html(match):
    
    '''
    Returns the html for the line up page of the given match
    
    '''
    
    browser.visit(f'https://www.premierleague.com/match/{match}')
    #click on the 'line ups' tab to load that html
    browser.find_by_tag('li[class="matchCentreSquadLabelContainer"]').click()
    return BeautifulSoup(browser.html, 'html.parser')



def lineup_clean(player):
    #strip the strings so that we only get player names
    return player.split(
        'Yellow Card')[0].split(
        'Red Card')[0].split(
        'Substitution')[0].split(
        'Goal')[0].split(
        'Pen.')[0].split(
        'Own ')[0]



def get_match_players(match):
  
    '''
    Returns a dictionary of players who were either starters or substitutes
    for the home or away team in the match
    
    '''
    soup = get_match_lineup_html(match)
    
    #get the team names
    home_long = soup.findAll('span',class_='long')[0].get_text()
    home_short = soup.findAll('span',class_='short')[0].get_text()
    away_long = soup.findAll('span',class_='long')[1].get_text()
    away_short = soup.findAll('span',class_='short')[1].get_text()
   
    #get the player names, removing accents
    players = [remove_accents(lineup_clean(i.get_text()))
               for i in soup.findAll('span',class_='name')[20:]]
    
    #sort these into sub-groups
    home_starts = players[:11]
    home_subs = players [11:18]
    away_starts = players [18:29]
    away_subs = players[29:]
    
    return {'HomeTeam': home_long,
            'HomeStarts': home_starts,
            'HomeSubs': home_subs,
            'AwayTeam': away_long,
            'AwayStarts': away_starts,
            'AwaySubs': away_subs}



def get_match_stats_html(match):
    
    '''
    Returns the html for the line up page of the given match
    
    '''
    
    browser.visit(f'https://www.premierleague.com/match/{match}')
    #click on the 'line ups' tab to load that html
    browser.find_by_tag('li[data-tab-index="2"]').click()
    return BeautifulSoup(browser.html, 'html.parser')



def get_match_stats(match):
    
    '''
    Takes a match ID and Returns a dictionary of dictionaries,
    describing aggregate team stats for home and away teams in the match
    
    '''
    html = get_match_stats_html(match)
    
    score = html.findAll('div',class_='score fullTime')[0].get_text().split('-')
    
    stats_table = [i.get_text() for i in html.findAll('td')]

    home_stats = {stats_table[3*i + 1]:stats_table[3*i]
                  for i in range(int(len(stats_table)/3))}
    
    home_stats['Goals'] = score[0]

    away_stats = {stats_table[3*i + 1]:stats_table[3*i + 2]
                  for i in range(int(len(stats_table)/3))}
    
    away_stats['Goals'] = score[1]
    
    gameweek = html.findAll('div',class_='current')[0].get_text(
    ).split('\n')[-2][3:]

    return {'GameWeek':gameweek,
            'HomeStats':home_stats,
            'AwayStats':away_stats}



def get_full_match_info(match):
    
    '''
    Returns the line ups and match commentary for a given match
    '''
    #Break out gameweek from the match_stats dictionary
    match_stats = get_match_stats(match)
    
    gameweek = match_stats['GameWeek']
    
    stats = {'HomeStats':match_stats['HomeStats'],
             'AwayStats':match_stats['AwayStats']}
    
    return {'MatchID':match,
            'GameWeek':int(gameweek),
            'Players':get_match_players(match),
            'Events':get_commentary(match),
            'Stats':stats}

# FIXTURES

In [4]:
#Firstly create a browser for splinter to go through
# executable_path = {"executable_path": "/Users/Callum/Downloads/geckodriver"}
# browser = Browser("firefox", **executable_path, headless=False)

def fixture_detail(match):
    '''
    Takes a match code and returns a single row datafrmae showing
    the match id, the date, the gameweek, and the home and away teams
    '''
    
    #Send the splinter instance of the URL
    match_url = f'https://www.premierleague.com/match/{match}'
    browser.visit(match_url)
    #Let it load!
    time.sleep(2)

    #Get the HTML
    html = BeautifulSoup(browser.html, 'html.parser')
    
    #Get the date of the match
    try:
        date_html = html.findAll('div',class_='matchDate')[0].get_text()
        date = datetime.strptime(date_html, '%a %d %b %Y').strftime("%Y-%m-%d")
    except:
        date = 'TBC'
    
    #Get the gameweek
    gameweek = html.findAll('div',class_='long')[0].get_text().replace('Matchweek ','')
    
    #Get the home and away teams
    home = html.findAll('div',class_='team home')[0].findAll(
        'span',class_='long')[0].get_text()
    away = html.findAll('div',class_='team away')[0].findAll(
        'span',class_='long')[0].get_text()
    
    columns = ['MatchID','GameWeek','Date','HomeTeam','AwayTeam']
    
    return pd.DataFrame({'MatchID':[match],
            'GameWeek':[gameweek],
            'Date':[date],
            'HomeTeam':[home],
            'AwayTeam':[away]}, columns = columns)


def fixture_details(matches):
    '''
    Takes a list of match codes and returns a dataframe showing
    the match id, the date, the gameweek, and the home and away teams    
    '''
    
    #Instantiate a dataframe
    columns = ['MatchID','GameWeek','Date','HomeTeam','AwayTeam']
    temp_df = pd.DataFrame(columns=columns)
    
    for i in matches:
        try:
            new_row = fixture_detail(i)
            temp_df = pd.concat([temp_df, new_row])
        except:
            pass
        
    temp_df.reset_index(drop=True, inplace=True)
        
    return temp_df

In [5]:
# #Populate a fixtures dataframe
# df_fixtures = fixture_details(range(58896, 59276))

# #Create a separate dataframe, which has a row for each individual team
# df_matches1 = df_fixtures[['MatchID','GameWeek','Date','HomeTeam','AwayTeam']]
# df_matches2 = df_fixtures[['MatchID','GameWeek','Date','AwayTeam','HomeTeam']]

# df_matches1['Home'] = ['Home' for i in range(len(df_matches1))]
# df_matches1.columns = ['MatchID','GameWeek','Date','Team','Opposition','Home']

# df_matches2['Home'] = ['Away' for i in range(len(df_matches2))]
# df_matches2.columns = ['MatchID','GameWeek','Date','Team','Opposition','Home']

# df_matches = pd.concat([df_matches1, df_matches2])
# df_matches.sort_values(['GameWeek','Date','MatchID','Home'], inplace=True)

# df_matches.reset_index(drop=True, inplace=True)


# #Pickle both of these
# with open('Data/df_fixtures.pickle', 'wb') as f:
#     pickle.dump(df_fixtures, f, pickle.HIGHEST_PROTOCOL)
    
# with open('Data/df_matches.pickle', 'wb') as f:
#     pickle.dump(df_matches, f, pickle.HIGHEST_PROTOCOL)    

# MATCH CLASS

In [6]:
#Note - we have some players who are referred to differently in the
#line up, vs the commentary. We need to correct this in the match
#line ups now to save us some bother later on...

def ProblemChildReplacer(text):
    '''
    Takes a string (text) and checks if its contains an
    entry in the dictionary of problem names. If so,
    then it replaces with the key of that player's name.
    '''
    problem_children = {
        'Joseph Willock':'Joe Willock',
        'Ki Sung-Yueng':'Ki Sung-yueng' }
    
    for name in list(problem_children.keys()):
        if name in text:
            text = text.replace(name,problem_children[name])
            
    return text
    

class Match(object):
    
    def __init__(self, match):
        #Record of match ID
        self.match_id = int(match['MatchID'])
        self.game_week = int(match['GameWeek'])
        
        #Team names for a given match
        self.home_team = match['Players']['HomeTeam']
        self.away_team = match['Players']['AwayTeam']
        self.teams = [self.home_team, self.away_team]
        
        #Player lists for a given match
        self.home_starts = [ProblemChildReplacer(i)
                            for i in match['Players']['HomeStarts']]
        self.home_subs = [ProblemChildReplacer(i)
                            for i in match['Players']['HomeSubs']]
        self.away_starts = [ProblemChildReplacer(i)
                            for i in match['Players']['AwayStarts']]
        self.away_subs = [ProblemChildReplacer(i)
                            for i in match['Players']['AwaySubs']]
        
        self.starts = self.home_starts + self.away_starts
        self.subs = self.home_subs + self.away_subs
        
        self.home_players = self.home_starts + self.home_subs
        self.away_players = self.away_starts + self.away_subs
                
        self.players = self.home_players + self.away_players
        
        
        #Stats for a given match
        self.stats = match['Stats']
        self.home_stats = match['Stats']['HomeStats']
        self.away_stats = match['Stats']['AwayStats']    
        
        #Events for a given match
        self.events = list(map(lambda x: ProblemChildReplacer(x),
                               match['Events']))

        #Shots
        self.goals = list(filter(lambda x: 'Goal!Goal!' in x, self.events))
        self.shots_missed = list(filter(lambda x: 'Attempt missed' in x, self.events))
        self.shots_blocked = list(filter(lambda x: 'Attempt blocked' in x, self.events))
        self.shots_saved = list(filter(lambda x: ('Attempt saved' in x) or
                                       ('Penalty saved' in x), self.events))
        self.woodwork = list(filter(lambda x: ' hits the ' in x, self.events))
        self.own_goals = list(filter(lambda x: 'Own Goal' in x, self.events))
        
        #Penalties
        self.pens_awarded = list(filter(lambda x: 'foul in the penalty area' in x, self.events))
        self.pens_saved = list(filter(lambda x: 'Penalty saved' in x, self.events))
        
        #Assists
        self.assists = list(filter(lambda x: 'Assisted by ' in x, self.events))
        
        #Fouls
        self.fouls = list(filter(lambda x: ('Card!' in x) or ('. Foul by' in x), self.events))
                
        #Corners
        self.corners = list(filter(lambda x: '. Corner,' in x, self.events))
        
        #Substitutions
        self.substitutions = list(filter(lambda x: 'SubstitutionSubstitution' in x, self.events))
    
    
    def player_minutes(self):

        '''
        Returns a dataframe of minutes played by
        each player in the match
        '''

        #Declare the starting players and subs.
        #Create an initial dataframe, assuming 90 minutes for starts,
        #and 0 minutes for subs
        starts = pd.DataFrame({'Player':self.starts,
                               'GameWeek':[self.game_week
                                           for i in range(len(self.starts))],
                               'Minutes':[90 for i in range(len(self.starts))]})
        subs = pd.DataFrame({'Player':self.subs,
                             'GameWeek':[self.game_week
                                         for i in range(len(self.subs))],
                             'Minutes':[0 for i in range(len(self.subs))]})
        df = pd.concat([starts,subs])


        #Iterate through all the match substitution events,
        #and update the player table accordingly...
        for i in self.substitutions:

            #Split the event string by 'full stop'
            event_split = i.split('. ')
            time = event_split[0]

            #Account for when substitutions happen in stoppage time
            if '+' in time:
                time = time.split('+')[0]

            #Convert time to integer
            time = int(time)

            #Split the player names in the last sentence
            player_split = event_split[-1].split(' replaces ')
            on = player_split[0]
            off = player_split[1][:-1]

            #Update the dataframe with the new, known minutes
            df.loc[df['Player'] == on, 'Minutes'] = 91-time
            df.loc[df['Player'] == off, 'Minutes'] = time

        return df
    
    
    
def get_matches(matches, verbose=False):
    
    '''
    Takes a list of match codes, then gets their data.
    Returns a list of dictionaries encompassing the match data.
    '''
    
    #Initiate an empty list to store them in
    match_list = []
    
    #Iterate through the match codes...
    for n, i in enumerate(matches):
        #Get the match information
        match = get_full_match_info(i)

        #Store it in the list
        match_list.append(match)
                
        if verbose==True:
            print(f'Stored match {n/len(matches)} (ID: {i})')
    
    return match_list


def match_objects(matches):
    
    '''
    Takes a list of match dictionaries. Returns a list of 
    match objects, instantiated from the dictionaries.
    '''
    
    match_list = [Match(i) for i in matches]
    
    return match_list

# Event Object

In [7]:
class Event(object):
    
    #Each event has an original text, and a timestamp (which may be None, if full time event, etc.)
    def __init__(self, event_string):
        self.event_text = str(event_string)
        self.time = event_string.split('.')[0]
        

#Declare possible possible options for shots
shot_types = ['right footed shot', 'left footed shot', 'header', 'an attempt']

shot_positions = ['a difficult angle', 'a difficult angle and long range',
                  'long range', 'outside the box', 'the box',
                 'the six yard box', 'very close range', 'penalty', 'Penalty']

shot_sides = ['the centre', 'the left', 'the right']

miss_positions = ['to the left', 'to the right', 'the top left corner',
                  'the top right corner', 'high and wide to the left',
                  'high and wide to the right', 'just a bit too high', 'too high']

miss_situations = ['following a set piece situation', 'from a direct free kick',
                  'following a corner']

save_positions = ['top left corner', 'top centre of the goal', 'top right corner',
                  'bottom left corner', 'centre of the goal', 'bottom right corner']

woodworks = ['bar','hits the left post','hits the right post']

goal_positions = ['top left corner', 'high centre of the goal', 'top right corner',
                   'bottom left corner', 'centre of the goal', 'bottom right corner']

goal_situations = ['converts the penalty','from a free kick']


class Shot(Event):
    
    def __init__(self, event_string):
        super().__init__(event_string)
        self.shot_type = list(filter(lambda x: x in event_string, shot_types))[0]
        
        if ('more than' in event_string) or ('from a free kick' in event_string):
            self.shot_position = 'long range'
        else:
            self.shot_position = list(
                filter(lambda x: x in event_string, shot_positions))[0].lower()
        
        if 'very close range' in event_string:
            self.shot_side = 'N/A'
        else:
            #This will ensure that we don't confuse 'left side'/'right side'
            #with the miss/save positions, etc.
            if 'difficult angle' in event_string:
                if 'is saved' in event_string:
                    shot_side_text = event_string.split('is saved')[0]
                else:
                    shot_side_text = event_string.split('to the')[0]
            else:
                shot_side_text = event_string.split('box')[0]
                
            if (('Penalty missed!' in event_string) or
            ('converts the penalty' in event_string) or
            ('outside the box' in event_string) or
            ('more than 35 yards' in event_string)):
                self.shot_side = 'N/A'
            elif len([i for i in shot_sides if i in shot_side_text]) > 0:
                self.shot_side = [i for i in shot_sides if i in shot_side_text][0]
            else:
                self.shot_side = 'N/A'
    
    def player(self, players):
        text = self.event_text.split('Assisted')[0]
        return [i for i in players if i in text][0]
    
    def assisted_by(self, players):
        text = self.event_text.split('Assisted')
        if len(text) > 1:
            return [i for i in players if i in text[1]][0]
        else:
            return 'N/A'
    
    def for_team(self, teams):
        return [i for i in teams if i in self.event_text][0]

    def against_team(self, teams):
        return list(filter(lambda x: x not in
                           [[i for i in teams if i in self.event_text][0]],teams))[0]
    
class ShotMissed(Shot):
    def __init__(self, event_string):
        super().__init__(event_string)
        self.miss_position = [i for i in miss_positions if i in event_string][0]
        self.close = [i if i in event_string else 'not close' for i in ['close']][0]
        
        if len([i for i in miss_situations if i in event_string]) > 0:
            self.miss_situation = [i for i in miss_situations if i in event_string][0]
        else:
            self.miss_situation = 'N/A'

class ShotSaved(Shot):
    def __init__(self, event_string):
        super().__init__(event_string)
        self.save_position = [i for i in save_positions if i in event_string][0]

class ShotBlocked(Shot):
    def __init__(self, event_string):
        super().__init__(event_string)
        
class Woodwork(Shot):
    def __init__(self, event_string):
        super().__init__(event_string)
        self.woodwork_type = [i for i in woodworks if i in event_string][0]
                
class Goal(Shot):
    def __init__(self, event_string):
        super().__init__(event_string)
        self.goal_position = [i for i in goal_positions if i in event_string][0]

        if len([i for i in goal_situations if i in event_string]) > 0:
            self.goal_situation = [i for i in goal_situations if i in event_string][0]
        else:
            self.goal_situation = 'non-dead ball'
            
    #We need a different definition to assign teams given
    #the structure of the string for goal events
    def for_team(self, teams):
        text = self.event_text.split('.')[2]
        return [i for i in teams if i in text][0]
    
    def against_team(self, teams):
        text = self.event_text.split('.')[2]
        return list(filter(lambda x: x not in text,teams))[0]        

class OwnGoal(Event):
    
    def player(self, players):
        return [i for i in players if i in self.event_text][0]    
    
        
#Declare possible possible outcomes for assists
assist_outcomes = ['Attempt saved', 'Attempt blocked', 'Attempt missed', 'Goal!',
                   'hits the bar', 'hits the left post', 'hits the right post']

assist_types = ['following a set piece situation',
                'following a corner', 'following a fast break']


class Assist(Event):
    
    def __init__(self, event_string):
        super().__init__(event_string)
        self.outcome = list(filter(lambda x: x in event_string, assist_outcomes))[0]
        
        if len([i for i in assist_type if i in event_string]) > 0:
            self.assist_type = [i for i in assist_types if i in event_string][0]
        else:
            self.assist_type = 'N/A'
    
    def player(self, players):
        text = self.event_text.split('Assisted')[1]        
        return [i for i in players if i in text][0]


cards = ['Second yellow','Yellow','Red']
foul_offences = ['handball','bad foul']
    
class Foul(Event):
    
    def __init__(self, event_string):
        super().__init__(event_string)
        
        if len([i for i in cards if i in event_string]) > 0:
            self.card = [i for i in cards if i in event_string][0]
        else:
            self.card = 'none'
            
        if len([i for i in foul_offences if i in event_string]) > 0:
            self.offence = [i for i in foul_offences if i in event_string][0]
        else:
            self.offence = 'foul'            

    
    def player(self, players):
        return [i for i in players if i in self.event_text][0]

    
class Penalty(Event):
    
    def __init__(self, event_string):
        super().__init__(event_string)

# Shot Tables

In [8]:
def strength(team):
    '''
    Returns the strength of the team, as defined by FPL
    '''
    
    return int(df_teams.loc[df_teams['CommentName']==team, 'Strength'].item())



def match_shots_missed(self):
    
    '''
    Takes in a Match object, and reuturns a dataframe of missed shots
    '''
    
    #extract required information from the match object
    match_id = self.match_id
    game_week = self.game_week
    match_players = self.players
    match_teams = self.teams    
    #create event objects out of the strings in the shots_missed attribute
    shots_missed = [ShotMissed(i) for i in self.shots_missed]
    
    #declare all the columns 
    cols = ['MatchID','GameWeek','Player','ForTeam','AgainstTeam','Time','ShotOutcome',
            'AssistedBy','ShotType','ShotPosition','ShotSide','MissPosition',
            'Close','MissSituation']
    
    #create an empty dataframe with the desired columns
    df_temp = pd.DataFrame(columns=cols)
    
    #go through and extract the different statistics for each event
    match = [match_id for i in shots_missed]
    gameweek = [game_week for i in shots_missed]
    players = [i.player(match_players) for i in shots_missed]
    forteams = [i.for_team(match_teams) for i in shots_missed]
    againstteams = [i.against_team(match_teams) for i in shots_missed]
    time = [i.time for i in shots_missed]
    outcome = ['Miss' for i in shots_missed]
    assisted = [i.assisted_by(match_players) for i in shots_missed]
    shottype = [i.shot_type for i in shots_missed]
    position = [i.shot_position for i in shots_missed]
    side = [i.shot_side for i in shots_missed]
    misspos = [i.miss_position for i in shots_missed]
    close = [i.close for i in shots_missed]
    misssit = [i.miss_situation for i in shots_missed]

    new_df = pd.DataFrame({'MatchID':match,
                           'GameWeek':gameweek,
                           'Player':players,
                           'ForTeam':forteams,
                           'AgainstTeam':againstteams,
                           'Time':time,
                           'ShotOutcome':outcome,
                           'AssistedBy':assisted,
                           'ShotType':shottype,
                           'ShotPosition':position,
                           'ShotSide':side,
                           'MissPosition':misspos,
                           'Close':close,
                           'MissSituation':misssit})

    df_temp = pd.concat([df_temp,new_df])
    
    df_temp['RelativeStrength'] = df_temp['ForTeam'].map(
        lambda x: strength(x)) - df_temp['AgainstTeam'].map(
        lambda x: strength(x))
    
    return df_temp

#Set the above function as a method for the Match class
Match.shots_missed_table = match_shots_missed



def match_shots_saved(self):
    
    '''
    Takes in a Match object, and reuturns a dataframe of saved shots
    '''
    
    #extract required information from the match object
    match_id = self.match_id
    game_week = self.game_week
    match_players = self.players
    match_teams = self.teams
    
    #create event objects out of the strings in the shots_saved attribute
    shots_saved = [ShotSaved(i) for i in self.shots_saved]
    
    #declare all the columns 
    cols = ['MatchID','GameWeek','Player','ForTeam','AgainstTeam','Time',
            'ShotOutcome','AssistedBy','ShotType','ShotPosition',
            'ShotSide','SavePosition','Close']
    
    #create an empty dataframe with the desired columns
    df_temp = pd.DataFrame(columns=cols)
    
    #go through and extract the different statistics for each event
    match = [match_id for i in shots_saved]
    gameweek = [game_week for i in shots_saved]    
    players = [i.player(match_players) for i in shots_saved]
    forteams = [i.for_team(match_teams) for i in shots_saved]
    againstteams = [i.against_team(match_teams) for i in shots_saved]
    time = [i.time for i in shots_saved]
    outcome = ['Saved' for i in shots_saved]
    assisted = [i.assisted_by(match_players) for i in shots_saved]
    shottype = [i.shot_type for i in shots_saved]
    position = [i.shot_position for i in shots_saved]
    side = [i.shot_side for i in shots_saved]
    savepos = [i.save_position for i in shots_saved]
    close = ['close' for i in shots_saved]


    new_df = pd.DataFrame({'MatchID':match,
                           'GameWeek':gameweek,
                           'Player':players,
                           'ForTeam':forteams,
                           'AgainstTeam':againstteams,
                           'Time':time,
                           'ShotOutcome':outcome,
                           'AssistedBy':assisted,
                           'ShotType':shottype,
                           'ShotPosition':position,
                           'ShotSide':side,
                           'SavePosition':savepos,
                           'Close':close})

    df_temp = pd.concat([df_temp,new_df])
    
    df_temp['RelativeStrength'] = df_temp['ForTeam'].map(
        lambda x: strength(x)) - df_temp['AgainstTeam'].map(
        lambda x: strength(x))
    
    return df_temp

#Set the above function as a method for the Match class
Match.shots_saved_table = match_shots_saved




def match_shots_blocked(self):
    
    '''
    Takes in a Match object, and reuturns a dataframe of blocked shots
    '''
    
    #extract required information from the match object
    match_id = self.match_id
    game_week = self.game_week
    match_players = self.players
    match_teams = self.teams
    
    #create event objects out of the strings in the shots_saved attribute
    shots_blocked = [ShotBlocked(i) for i in self.shots_blocked]
    
    #declare all the columns 
    cols = ['MatchID','GameWeek','Player','ForTeam','AgainstTeam','Time',
            'ShotOutcome','AssistedBy','ShotType','ShotPosition',
            'ShotSide','Close']
    
    #create an empty dataframe with the desired columns
    df_temp = pd.DataFrame(columns=cols)
    
    #go through and extract the different statistics for each event
    match = [match_id for i in shots_blocked]
    gameweek = [game_week for i in shots_blocked]    
    players = [i.player(match_players) for i in shots_blocked]
    forteams = [i.for_team(match_teams) for i in shots_blocked]
    againstteams = [i.against_team(match_teams) for i in shots_blocked]
    time = [i.time for i in shots_blocked]
    outcome = ['Blocked' for i in shots_blocked]
    assisted = [i.assisted_by(match_players) for i in shots_blocked]
    shottype = [i.shot_type for i in shots_blocked]
    position = [i.shot_position for i in shots_blocked]
    side = [i.shot_side for i in shots_blocked]
    close = ['not close' for i in shots_blocked]

    new_df = pd.DataFrame({'MatchID':match,
                           'GameWeek':gameweek,
                           'Player':players,
                           'ForTeam':forteams,
                           'AgainstTeam':againstteams,
                           'Time':time,
                           'ShotOutcome':outcome,
                           'AssistedBy':assisted,
                           'ShotType':shottype,
                           'ShotPosition':position,
                           'ShotSide':side,
                           'Close':close})

    df_temp = pd.concat([df_temp,new_df])
    
    df_temp['RelativeStrength'] = df_temp['ForTeam'].map(
        lambda x: strength(x)) - df_temp['AgainstTeam'].map(
        lambda x: strength(x))
    
    return df_temp

#Set the above function as a method for the Match class
Match.shots_blocked_table = match_shots_blocked




def match_woodwork(self):
    
    '''
    Takes in a Match object, and reuturns a dataframe
    of shots that hit the post or bar
    '''
    
    #extract required information from the match object
    match_id = self.match_id
    game_week = self.game_week
    match_players = self.players
    match_teams = self.teams
    
    #create event objects out of the strings in the shots_saved attribute
    woodwork = [Woodwork(i) for i in self.woodwork]
    
    #declare all the columns 
    cols = ['MatchID','GameWeek','Player','ForTeam','AgainstTeam','Time',
            'ShotOutcome','WoodworkType','AssistedBy','ShotType',
            'ShotPosition','ShotSide','Close']
    
    #create an empty dataframe with the desired columns
    df_temp = pd.DataFrame(columns=cols)
    
    #go through and extract the different statistics for each event
    match = [match_id for i in woodwork]
    gameweek = [game_week for i in woodwork]
    players = [i.player(match_players) for i in woodwork]
    forteams = [i.for_team(match_teams) for i in woodwork]
    againstteams = [i.against_team(match_teams) for i in woodwork]
    time = [i.time for i in woodwork]
    outcome = ['Woodwork' for i in woodwork]
    shottype = [i.woodwork_type for i in woodwork]
    assisted = [i.assisted_by(match_players) for i in woodwork]
    shottype = [i.shot_type for i in woodwork]
    position = [i.shot_position for i in woodwork]
    side = [i.shot_side for i in woodwork]
    close = ['close' for i in woodwork]

    new_df = pd.DataFrame({'MatchID':match,
                           'GameWeek':gameweek,
                           'Player':players,
                           'ForTeam':forteams,
                           'AgainstTeam':againstteams,
                           'Time':time,
                           'ShotOutcome':outcome,
                           'WoodworkType':shottype,
                           'AssistedBy':assisted,
                           'ShotType':shottype,
                           'ShotPosition':position,
                           'ShotSide':side,
                           'Close':close})

    df_temp = pd.concat([df_temp,new_df])
    
    df_temp['RelativeStrength'] = df_temp['ForTeam'].map(
        lambda x: strength(x)) - df_temp['AgainstTeam'].map(
        lambda x: strength(x))    
    
    return df_temp

#Set the above function as a method for the Match class
Match.woodwork_table = match_woodwork



def match_goals(self):
    
    '''
    Takes in a Match object, and reuturns a dataframe of goals
    '''
    
    #extract required information from the match object
    match_id = self.match_id
    game_week = self.game_week
    match_players = self.players
    match_teams = self.teams
    
    #create event objects out of the strings in the shots_missed attribute
    goals = [Goal(i) for i in self.goals]
    
    #declare all the columns 
    cols = ['MatchID','GameWeek','Player','ForTeam','AgainstTeam','Time','ShotOutcome',
            'AssistedBy','ShotType','ShotPosition','ShotSide','GoalPosition',
            'Close','GoalSituation']
    
    #create an empty dataframe with the desired columns
    df_temp = pd.DataFrame(columns=cols)
    
    #go through and extract the different statistics for each event
    match = [match_id for i in goals]
    gameweek = [game_week for i in goals]
    players = [i.player(match_players) for i in goals]
    forteams = [i.for_team(match_teams) for i in goals]
    againstteams = [i.against_team(match_teams) for i in goals]
    time = [i.time for i in goals]
    outcome = ['Goal' for i in goals]
    assisted = [i.assisted_by(match_players) for i in goals]
    shottype = [i.shot_type for i in goals]
    position = [i.shot_position for i in goals]
    side = [i.shot_side for i in goals]
    close = ['close' for i in goals]
    goalpos = [i.goal_position for i in goals]
    goalsit = [i.goal_situation for i in goals]

    new_df = pd.DataFrame({'MatchID':match,
                           'GameWeek':gameweek,
                           'Player':players,
                           'ForTeam':forteams,
                           'AgainstTeam':againstteams,
                           'Time':time,
                           'ShotOutcome':outcome,
                           'AssistedBy':assisted,
                           'ShotType':shottype,
                           'ShotPosition':position,
                           'ShotSide':side,
                           'GoalPosition':goalpos,
                           'Close':close,
                           'GoalSituation':goalsit})

    df_temp = pd.concat([df_temp,new_df])
    
    df_temp['RelativeStrength'] = df_temp['ForTeam'].map(
        lambda x: strength(x)) - df_temp['AgainstTeam'].map(
        lambda x: strength(x))
    
    return df_temp

#Set the above function as a method for the Match class
Match.goals_table = match_goals




def match_shots(self):
    
    '''
    Takes a match object and a collated list of all shots in a dataframe
    '''
    
    #Create dataframes of the different shot types
    goals = self.goals_table()
    missed = self.shots_missed_table()
    blocked = self.shots_blocked_table()
    saved = self.shots_saved_table()
    woodwork = self.woodwork_table()
    
    #Create a list of these to iterate through
    shot_dfs = [goals, woodwork, blocked, saved, missed]
    
    #Identify the columns shared by these dfs
    columns = ['MatchID','GameWeek','Player',
               'ForTeam','AgainstTeam','RelativeStrength',
               'Time','ShotOutcome','AssistedBy','ShotType',
               'ShotPosition','ShotSide','Close']
    
    #Inititate a new dataframe with the appropriate columns
    df_temp = pd.DataFrame(columns = columns)
    
    #Iterate through the dfs and append the rows as required
    for i in shot_dfs:
        new_rows = i[columns]
        df_temp = pd.concat([df_temp,new_rows])
        
    df_temp.reset_index(inplace=True,drop=True)
    
    return df_temp

#Set the above function as a method for the Match class
Match.shots_table = match_shots



def combine_shot_tables(matches):
    
    '''
    Takes a list of match objects, and outputs a concatonated
    dataframe of all the shots taken in those matches.
    '''

    columns = ['MatchID','GameWeek','Player',
               'ForTeam','AgainstTeam','RelativeStrength',
               'Time','ShotOutcome','AssistedBy','ShotType',
               'ShotPosition','ShotSide','Close']
    
    #Inititate a new dataframe with the appropriate columns
    df_temp = pd.DataFrame(columns = columns)

    #Iterate through match objects...
    for match in matches:
        new_rows = match.shots_table()
        #And append thier shot tables to the dataframe
        df_temp = pd.concat([df_temp,new_rows])
    
    df_temp.reset_index(inplace=True, drop=True)
        
    #Make everything numeric
    df_temp = df_temp.apply(pd.to_numeric, errors='ignore')
    
    return df_temp


#And using the function...
df_shots = combine_shot_tables(match_list)


def shot_filter(player=None, event='shot', gameweeks=None,
                shot_outcomes=None, shot_positions=None,
                side=None, shot_type=None, close=None, team=None,
                df = df_shots):
    
    '''
    Filters a shot dataframe according to inputs.
    
    Parameters:
    - player (str):
    - event (str):
    - gameweeks (list):
    - shot_outcomes (list):
    - shot_positions (list):
    - shot_side (list):
    - close (str):
    - team (str):
    
    '''
    
    #Extract all possible outcomes for each column    
    if gameweeks == None:
        gameweeks = list(range(max(df_shots['GameWeek'])+1))
        
    if shot_outcomes == None:
        shot_outcomes = df['ShotOutcome'].unique()
                    
    if shot_positions == None:
        shot_positions = df['ShotPosition'].unique()
        
    if shot_type == None:
        shot_type = df['ShotType'].unique()
    elif type(side) != list:
        shot_type = [shot_type]
        
    if side == None:
        side = df['ShotSide'].unique()
    elif type(side) != list:
        side = [side]
        
    if team == None:
        team = df['ForTeam'].unique()
    else:
        team = [team]
                        
    if close == None:
        close = ['close','not close']
    else:
        close = [close]
    
    #Now perform a big loc to filter as required
    df = df.loc[(df['GameWeek'].isin(gameweeks))
               & (df['ShotOutcome'].isin(shot_outcomes))
               & (df['ShotPosition'].isin(shot_positions))
               & (df['ShotType'].isin(shot_type))
               & (df['Close'].isin(close))
               & (df['ForTeam'].isin(team))
               & (df['ShotSide'].isin(side))]
    
    
    if player == None:
        player = df['Player'].unique()
    else:
        player = [player]
    
    #Filter on either shots or assists, depending on input
    if event == 'shot':
        return df.loc[df['Player'].isin(player)]
    elif event == 'assist':
        return df.loc[df['AssistedBy'].isin(player)]


def goals_in_week(player, event='shot', gameweek=None, df = df_shots):
    
    '''
    Returns the number of goals that a given player
    scored / assisted in the gameweek
    '''
    
    df_temp = shot_filter(player, gameweeks = [gameweek],
                          event=event, shot_outcomes=['Goal'], df = df)
    
    return len(df_temp)



def shots_in_week(player, event='shot', side=None, gameweek=None, df = df_shots):
    
    '''
    Returns the number of total shots that a given player
    hit / assisted in the gameweek
    '''
    if side != None:
        side = [side]
    
    df_temp = shot_filter(player, gameweeks = [gameweek],
                          event=event, side=side, df = df)    
    
    return len(df_temp)



def headers_in_week(player, event='shot', gameweek=None, df = df_shots):
    
    '''
    Returns the number of total headers that a given player
    hit / assisted in the gameweek
    '''
    
    df_temp = shot_filter(player, gameweeks = [gameweek],
                          event=event, shot_type = 'header', df = df)    
    
    return len(df_temp)



def shots_close_in_week(player, event='shot', gameweek=None, df = df_shots):
    
    '''
    Returns the number of close shots that a given player
    hit / assisted in the gameweek
    '''
    
    df_temp = shot_filter(player, gameweeks = [gameweek],
                          close='close', event=event, df = df)
    
    return len(df_temp)



def shots_on_target_in_week(player, event='shot', gameweek=None, df = df_shots):
    
    '''
    Returns the number of total shots on target that a given player
    hit / assisted in the gameweek
    '''
    
    df_temp = shot_filter(player, gameweeks = [gameweek],
                          shot_outcomes=['Saved','Goal'],
                          event=event, df = df)
    
    return len(df_temp)



def shots_in_box_in_week(player, event='shot', gameweek=None, df = df_shots):
    
    '''
    Returns the number of total shots in the box that a given player
    hit / assisted in the gameweek
    '''
    
    df_temp = shot_filter(player, gameweeks = [gameweek],
                          shot_positions = ['the box', 'the six yard box'],
                          event=event, df = df)
    
    return len(df_temp)


def player_gameweek_row(player, gameweek=None, df=df_shots, verbose=True):
    
    '''
    Creates an aggregated view of a player's
    attacking performance in that gameweek
    '''
    if verbose==True:
        print(f'{player}, gameweek {gameweek}')
    
    for_team = df_players.loc[df_players['CommentName']==player]['Team'].item()
    
    against_team = shot_filter(team=for_team,
                               gameweeks=[gameweek],
                               df=df)['AgainstTeam'].mode().item()
    
    strength = shot_filter(team=for_team,
                           gameweeks=[gameweek],
                           df=df)['RelativeStrength'].mode().item()
    
    goals = [goals_in_week(player, gameweek=gameweek, df=df)]
    sot = [shots_on_target_in_week(player, gameweek=gameweek, df=df)]
    sib = [shots_in_box_in_week(player, gameweek=gameweek, df=df)]
    close = [shots_close_in_week(player, gameweek=gameweek, df=df)]
    shots = [shots_in_week(player, gameweek=gameweek, df=df)]
    headers = [headers_in_week(player, gameweek=gameweek, df=df)]
    shots_centre = [shots_in_week(player, gameweek=gameweek, df=df, side='the centre')]
    shots_left = [shots_in_week(player, gameweek=gameweek, df=df, side='the left')]
    shots_right = [shots_in_week(player, gameweek=gameweek, df=df, side='the right')]
    
    goal_ass = [goals_in_week(player, gameweek=gameweek,
                              df=df, event='assist')]
    sot_ass = [shots_on_target_in_week(player, gameweek=gameweek,
                                       df=df, event='assist')]
    sib_ass = [shots_in_box_in_week(player, gameweek=gameweek,
                                    df=df, event='assist')]
    close_ass = [shots_close_in_week(player, gameweek=gameweek,
                                 df=df, event='assist')]
    total_ass = [shots_in_week(player, gameweek=gameweek,
                             df=df, event='assist')]
    headers_ass = [headers_in_week(player, gameweek=gameweek,
                                   df=df, event='assist')]
    ass_centre = [shots_in_week(player, gameweek=gameweek, df=df,
                                event='assist', side='the centre')]
    ass_left = [shots_in_week(player, gameweek=gameweek, df=df,
                                event='assist', side='the left')]
    ass_right = [shots_in_week(player, gameweek=gameweek, df=df,
                                event='assist', side='the right')]
    
    
    df_temp = pd.DataFrame({'GameWeek':[gameweek],
                            'Player':[player],
                            'ForTeam': for_team,
                            'AgainstTeam': against_team,
                            'RelativeStrength': strength,
                            'Goals': goals,
                            'ShotsOnTarget': sot,
                            'ShotsInBox': sib,
                            'CloseShots': close,
                            'TotalShots': shots,
                            'Headers': headers,
                            'ShotsCentre': shots_centre,
                            'ShotsLeft': shots_left,
                            'ShotsRight': shots_right,
                            'GoalAssists': goal_ass,
                            'ShotOnTargetCreated': sot_ass,
                            'ShotInBoxCreated': sib_ass,
                            'CloseShotCreated': close_ass,
                            'TotalShotCreated': total_ass,
                            'HeadersCreated': headers_ass,
                            'CreatedCentre': ass_centre,
                            'CreatedLeft': ass_left,
                            'CreatedRight': ass_right})
    
    return df_temp




def player_gameweeks(player, gameweeks=None, df = df_shots, verbose=True):
    
    '''
    Returns a dataframe showing a gameweek
    by gameweek performance of a given player
    '''
    
    #Declare the reqiured columns and make an empty dataframe
    cols = ['GameWeek','Player','ForTeam','AgainstTeam','RelativeStrength',
            'Goals','ShotsOnTarget','ShotsInBox','CloseShots',
            'TotalShots','Headers','ShotsCentre','ShotsLeft','ShotsRight',
            'GoalAssists','ShotOnTargetCreated','ShotInBoxCreated',
            'CloseShotCreated','TotalShotCreated','HeadersCreated',
            'CreatedCentre','CreatedLeft','CreatedRight']
    
    df_temp = pd.DataFrame(columns = cols)
    
    #Iterate through the gameweeks and append the new rows for the given player
    for i in gameweeks:
        new_row = player_gameweek_row(player, gameweek=i, df = df, verbose=verbose)
        df_temp = pd.concat([df_temp, new_row])
        
    return df_temp



def players_gameweeks(players, gameweeks=None, df = df_shots, verbose=True):
    
    '''
    Returns a dataframe showing a gameweek
    by gameweek performance of a given list of players
    '''
    
    #Declare the reqiured columns and make an empty dataframe
    cols = ['GameWeek','Player','ForTeam','AgainstTeam','RelativeStrength',
            'Goals','ShotsOnTarget','ShotsInBox','CloseShots',
            'TotalShots','Headers','ShotsCentre','ShotsLeft','ShotsRight',
            'GoalAssists','ShotOnTargetCreated','ShotInBoxCreated',
            'CloseShotCreated','TotalShotCreated','HeadersCreated',
            'CreatedCentre','CreatedLeft','CreatedRight']
    
    df_temp = pd.DataFrame(columns = cols)
    
    #Iterate through the gameweeks and append the new rows for the given player
    for i in players:
        new_rows = player_gameweeks(i, gameweeks=gameweeks, df=df, verbose=verbose)
        df_temp = pd.concat([df_temp, new_rows])
        
    df_temp.reset_index(drop=True, inplace=True)
    
    #Make everything numeric
    df_temp = df_temp.apply(pd.to_numeric, errors='ignore')
    
    return df_temp



def df_player_games_creator(match_list):
    '''
    Takes a list of match objects, and creates a stump dataframe of players
    in each game week, with the number of minutes that they played.
    
    '''
    #Declare an empty dataframe with the right columns
    df_games = pd.DataFrame(columns=['Player','GameWeek','Minutes'])

    #Iterate through match objects and get the minutes for each
    for i in match_list:
        df_temp = i.player_minutes()
        df_games = pd.concat([df_games,df_temp])

    #Now ensure that we have a row for each player in each game
    #Iterate through the unique player names...
    for i in df_games['Player'].unique():
        #And the unique gameweeks...
        for j in df_games['GameWeek'].unique():
            df_temp = df_games.loc[(df_games['Player']==i)
                                    &(df_games['GameWeek']==j)]
            #If there is no record of that player in the gameweek...
            if len(df_temp) == 0:
                #Create a new row, where they played no minutes
                new_row = pd.DataFrame({'Player':i,
                                       'GameWeek':j,
                                       'Minutes':[0]})
                #Append it to the dataframe
                df_games = pd.concat([df_games, new_row])

    df_games.reset_index(drop=True,inplace=True)
    
    
    return df_games

    

def df_player_games_extender(df, shot_df, verbose=True):
    
    '''
    Takes the player_games dataframe and calculates the required
    columns for shots and assists summary
    
    df=df_player_games, shot_df=df_shots
    '''
    
    #Declare an empty dataframe, which we'll concatenate
    #to the player_games dataframe
    
    cols = ['Player','GameWeek','Minutes','ForTeam','AgainstTeam',
            'RelativeStrength','Goals','ShotsOnTarget','ShotsInBox','CloseShots',
            'TotalShots','Headers','ShotsCentre','ShotsLeft','ShotsRight',
            'GoalAssists','ShotOnTargetCreated','ShotInBoxCreated',
            'CloseShotCreated','TotalShotCreated','HeadersCreated',
            'CreatedCentre','CreatedLeft','CreatedRight']
    
    df_temp = pd.DataFrame(columns = cols)
    
    #Iterate through rows of the player games dataframe
    for i in range(len(df)):
        #Include a try/except, in case teams missed gameweeks
        #e.g. Liverpool in gameweek 18
        try:
            #Create a row stub from the input dataframe
            row_stub = df.iloc[[i]].reset_index(drop=True)
            #Create the new stats that we'll attach to this stub
            player = df.iloc[i]['Player']
            gameweek = df.iloc[i]['GameWeek']
            new_row = player_gameweek_row(player, gameweek, df=shot_df,
                                         verbose=verbose).iloc[:,2:]
            #Join the new data to the stub
            new_row = pd.concat([row_stub, new_row],axis=1)
            #And append to the grand output dataframe
            df_temp = pd.concat([df_temp, new_row])
        except:
            pass
    
    df_temp.reset_index(drop=True,inplace=True)
    
    #Make everything numeric
    df_temp = df_temp.apply(pd.to_numeric, errors='ignore')
    df_temp = df_temp[cols]
    
    return df_temp

NameError: name 'match_list' is not defined

# Generalised Functionality

In [12]:
#For the player detail dataframe, we will need to
#build out a function that deals with irregular game weeks

def PlayerBasicsGenerator(matches):
    '''
    Takes a list of match objects, then creates a dataframe of
    all the players assocated with the teams that played in those matches
    and how long they played in each match (includes non-matchday-squad players)
    '''
    
    df_temp = pd.DataFrame(columns=['Player','GameWeek','Minutes'])
    
    for i in matches:
        gameweek = i.game_week
        teams = i.teams
        minutes = i.player_minutes()
        
        #Isolate all players in teams
        required_players = df_players.loc[df_players['Team'].isin(teams)]
        required_players = required_players[['CommentName']]
        required_players.columns = ['Player']
        
        #Join with the minutes dataframe
        merged_df = pd.merge(required_players, minutes, how='outer')
        
        #Fill in the missing gameweeks/minutes with 0
        merged_df['GameWeek'] = gameweek
        merged_df.loc[merged_df['Minutes'].isna(), 'Minutes'] = 0
        
        #Change minutes back to integer
        merged_df['Minutes'] = merged_df['Minutes'].astype('int64')
        
        #Add this to the temporary dataframe
        df_temp = pd.concat([df_temp, merged_df])
        
    return df_temp



def CoreDataUpdater(matches, cursor, connection, verbose=True):
    '''
    Takes a list of match codes, then updates the following SQL tables:
        - ShotsDetail
        - PlayerMatchesDetail
        - TeamMatchesDetail
    The function does this on a match by match basis - there is a risk that the
    scraping for a particular match fails, in which case all work that has not
    been uploaded to SQL database would be lost.
    '''
    
    #Check that we have a list of matches
    if type(matches) != list:
        matches = [matches]
    
    #Iterate through the matches one at a time
    for i in matches:
        #Have everything in a try/except in case match has not been played yet
        try:
            #Scrape the webpage for that match and create a match object
            match_scrape = get_matches([i], verbose=verbose)[0]
            match_object = Match(match_scrape)
            if verbose==True:
                print(f'Match {i} object successfully instantiated')

            #Create the shot detail dataframe for that match
            temp_shot_detail_df = combine_shot_tables([match_object])
            if verbose==True:
                print(f'Match {i} shot detail dataframe successfully created')

            #Upload to sql...
            populate_sql_from_dataframe(temp_shot_detail_df,
                                        'ShotsDetail', cursor)

            #Create the player matches detail dataframe
            #Firstly, we need to create a simple player games table for the match
            temp_player_detail_df = PlayerBasicsGenerator([match_object])

            #Then we use the extender to populate
            temp_player_detail_df = df_player_games_extender(temp_player_detail_df,
                                                             shot_df = temp_shot_detail_df,
                                                             verbose=False)
            if verbose==True:
                print(f'Match {i} player detail dataframe successfully created')

            #Upload to sql
            populate_sql_from_dataframe(temp_player_detail_df,
                                        'PlayerMatchesDetail', cursor)

            #Try to create the team dataframe - this can fail if browser
            #doesn't 'click' on the stats tab properly
            try:
                #Create team dataframe
                temp_team_detail_df = team_table_extended(match_object,
                                                          df_ref=temp_player_detail_df)
                if verbose==True:
                    print(f'Match {i} team detail dataframe successfully created')
                #Upload to sql
                populate_sql_from_dataframe(temp_team_detail_df,
                                            'TeamMatchesDetail', cursor)
            except:
                print(f'\nFAILURE: Match {i} team detail dataframe NOT created\n')

            #Commit sql changes
            connection.commit()
            if verbose==True:
                print(f'Match {i} SQL entries committed')
        except:
            print(f'\nFAILURE: Match {i} failed to scrape - may not have been played yet\n')