In [1]:
import pandas as pd

In [2]:
pd.options.mode.chained_assignment = None
data_path = '../data/'

## Create Goal Difference

The idea here is to get goal difference of each team after each game. This is to be plotted later. I will be plotting the GD of Pl big six (Manchester United, Liverpool, Arsenal, Chelsea, Manchester City and Tottenham).

In [3]:
# load data
results = pd.read_csv(data_path + 'epl_results_2022-23.csv')

In [4]:
def calculate_goal_diff(results: pd.DataFrame, team: str):
    """
    This function calculates the goal difference of each matches.

    Parameters
    ----------
    results: pd.DataFrame()
    team: str

    Returns
    -------
    gd: pd.DataFrame()
    """
    
    # get all matches played by the team
    matches = results.loc[(results['AwayTeam'] == team) | (results['HomeTeam'] == team)]
    matches['GD'] = 0 # create a new column called GD

    matches_played = matches.shape[0]

    # # calculate goal difference depending on team is home or away
    matches.loc[matches['HomeTeam'] == team, 'GD'] = (matches['FTHG'] - matches['FTAG'])
    matches.loc[matches['AwayTeam'] == team, 'GD'] = matches['FTAG'] - matches['FTHG']

    # add up the goal differnce after each match
    gd = pd.DataFrame(matches['GD'].cumsum())
    gd.reset_index(inplace=True)
    gd.drop(columns=['index'], inplace=True)
    gd.insert(0, 'MatchDay', range(matches_played))

    return gd

In [5]:
# save the goal difference of PL big six
big_six = ['Man United', 'Man City', 'Liverpool', 'Arsenal', 'Tottenham', 'Chelsea']

for team in big_six:
    gd = calculate_goal_diff(results, team)
    gd.to_csv(data_path + team + '_goal_diff.csv', index=False)