# Premier League Goals Accumulation - Bar Chart Race
By ***Ahmad Zaenun Faiz***

This is a data visualization using Python to visualize the accumulation of goal in the top tier English Football League/Premier League. I use Matplotlib, Pandas and NumPy library to make this visualization.

### Reference: 
* Bar Chart Race Tutorial: https://www.dunderdata.com/blog/create-a-bar-chart-race-animation-in-python-with-matplotlib
* Data source: James P. Curley (2016). engsoccerdata: English Soccer Data 1871-2016. R package version 0.1.5. https://github.com/jalapic/engsoccerdata
* Python Module use: Pandas, Matplotlib, Bar Chart Race 

In [1]:
import pandas as pd
import bar_chart_race as bcr

## Data Wrangling

In [19]:
fc = pd.read_csv('https://raw.githubusercontent.com/ahmadzfaiz/python-data-visual/main/data/1.%20English%20Football%20Match/england.csv', low_memory=False)

fc

Unnamed: 0,Date,Season,home,visitor,hgoal,vgoal,division,tier,totgoal,goaldif,result
0,1888-09-08,1888,Bolton Wanderers,Derby County,3,6,1,1,9,-3,A
1,1888-09-08,1888,Everton,Accrington F.C.,2,1,1,1,3,1,H
2,1888-09-08,1888,Preston North End,Burnley,5,2,1,1,7,3,H
3,1888-09-08,1888,Stoke City,West Bromwich Albion,0,2,1,1,2,-2,A
4,1888-09-08,1888,Wolverhampton Wanderers,Aston Villa,1,1,1,1,2,0,D
...,...,...,...,...,...,...,...,...,...,...,...
199879,3/7/2020,2019,Plymouth Argyle,Macclesfield,3,0,4,4,3,3,H
199880,3/7/2020,2019,Salford City,Bradford City,2,0,4,4,2,2,H
199881,3/7/2020,2019,Swindon Town,Forest Green Rovers,0,2,4,4,2,-2,A
199882,3/7/2020,2019,Walsall,Exeter City,3,1,4,4,4,2,H


In [20]:
home = fc[['tier', 'Date', 'Season', 'home', 'result']]
away = fc[['tier', 'Date', 'Season', 'visitor', 'result']]

home = home.loc[(home['tier'] == 1) & (home['Season'] == 2019)]
away = away.loc[(away['tier'] == 1) & (away['Season'] == 2019)]

home

Unnamed: 0,tier,Date,Season,home,result
198112,1,8/9/2019,2019,Liverpool,H
198113,1,8/10/2019,2019,West Ham United,A
198114,1,8/10/2019,2019,AFC Bournemouth,D
198115,1,8/10/2019,2019,Burnley,H
198116,1,8/10/2019,2019,Crystal Palace,D
...,...,...,...,...,...
198487,1,7/26/2020,2019,Leicester City,A
198488,1,7/26/2020,2019,Manchester City,H
198489,1,7/26/2020,2019,Newcastle United,A
198490,1,7/26/2020,2019,Southampton,H


In [21]:
home['point'] = None

home.loc[home['result'] == 'H', 'point'] = 3
home.loc[home['result'] == 'D', 'point'] = 1
home.loc[home['result'] == 'A', 'point'] = 0

home

Unnamed: 0,tier,Date,Season,home,result,point
198112,1,8/9/2019,2019,Liverpool,H,3
198113,1,8/10/2019,2019,West Ham United,A,0
198114,1,8/10/2019,2019,AFC Bournemouth,D,1
198115,1,8/10/2019,2019,Burnley,H,3
198116,1,8/10/2019,2019,Crystal Palace,D,1
...,...,...,...,...,...,...
198487,1,7/26/2020,2019,Leicester City,A,0
198488,1,7/26/2020,2019,Manchester City,H,3
198489,1,7/26/2020,2019,Newcastle United,A,0
198490,1,7/26/2020,2019,Southampton,H,3


In [22]:
away['point'] = None

away.loc[away['result'] == 'H', 'point'] = 0
away.loc[away['result'] == 'D', 'point'] = 1
away.loc[away['result'] == 'A', 'point'] = 3

away

Unnamed: 0,tier,Date,Season,visitor,result,point
198112,1,8/9/2019,2019,Norwich City,H,0
198113,1,8/10/2019,2019,Manchester City,A,3
198114,1,8/10/2019,2019,Sheffield United,D,1
198115,1,8/10/2019,2019,Southampton,H,0
198116,1,8/10/2019,2019,Everton,D,1
...,...,...,...,...,...,...
198487,1,7/26/2020,2019,Manchester United,A,3
198488,1,7/26/2020,2019,Norwich City,H,0
198489,1,7/26/2020,2019,Liverpool,A,3
198490,1,7/26/2020,2019,Sheffield United,H,0


In [26]:
home['Club'] = home['home']
away['Club'] = away['visitor']

home = home[['Date', 'Club', 'point']]
away = away[['Date', 'Club', 'point']]

away

Unnamed: 0,Date,Club,point
198112,8/9/2019,Norwich City,0
198113,8/10/2019,Manchester City,3
198114,8/10/2019,Sheffield United,1
198115,8/10/2019,Southampton,0
198116,8/10/2019,Everton,1
...,...,...,...
198487,7/26/2020,Manchester United,3
198488,7/26/2020,Norwich City,0
198489,7/26/2020,Liverpool,3
198490,7/26/2020,Sheffield United,0


In [27]:
frames = [home, away]
dcc = pd.concat(frames)

dcc

Unnamed: 0,Date,Club,point
198112,8/9/2019,Liverpool,3
198113,8/10/2019,West Ham United,0
198114,8/10/2019,AFC Bournemouth,1
198115,8/10/2019,Burnley,3
198116,8/10/2019,Crystal Palace,1
...,...,...,...
198487,7/26/2020,Manchester United,3
198488,7/26/2020,Norwich City,0
198489,7/26/2020,Liverpool,3
198490,7/26/2020,Sheffield United,0


In [28]:
ctb = pd.crosstab(dcc['Date'], columns = dcc['Club'], values=dcc['point'], aggfunc = 'first')

In [29]:
ctb['Timeline'] = ctb.index
ctb = ctb.reset_index(drop=True)

ctb['day'] = pd.DatetimeIndex(ctb['Timeline']).day
ctb['month'] = pd.DatetimeIndex(ctb['Timeline']).month
ctb['year'] = pd.DatetimeIndex(ctb['Timeline']).year

ctb['Date'] = pd.to_datetime(dict(year=ctb.year, month=ctb.month, day=ctb.day))
ctb = ctb.sort_values('Date').reset_index(drop=True)

In [30]:
del ctb['Timeline']
del ctb['day']
del ctb['month']
del ctb['year']

ctb

Club,AFC Bournemouth,Arsenal,Aston Villa,Brighton & Hove Albion,Burnley,Chelsea,Crystal Palace,Everton,Leicester City,Liverpool,...,Manchester United,Newcastle United,Norwich City,Sheffield United,Southampton,Tottenham Hotspur,Watford,West Ham United,Wolverhampton Wanderers,Date
0,,,,,,,,,,3.0,...,,,0.0,,,,,,,2019-08-09
1,1.0,,0.0,3.0,3.0,,1.0,1.0,,,...,,,,1.0,0.0,3.0,0.0,0.0,,2019-08-10
2,,3.0,,,,0.0,,,1.0,,...,3.0,0.0,,,,,,,1.0,2019-08-11
3,3.0,3.0,0.0,1.0,0.0,,,3.0,,3.0,...,,0.0,3.0,,0.0,1.0,0.0,1.0,,2019-08-17
4,,,,,,1.0,0.0,,1.0,,...,,,,3.0,,,,,,2019-08-18
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
110,0.0,,,,,,,,0.0,,...,,,,,3.0,3.0,,,,2020-07-19
111,,,,1.0,,,0.0,3.0,,,...,,1.0,,0.0,,,,,3.0,2020-07-20
112,,0.0,3.0,,,,,,,,...,,,,,,,0.0,,,2020-07-21
113,,,,,,0.0,,,,3.0,...,1.0,,,,,,,1.0,,2020-07-22


In [31]:
df = ctb
df.fillna(0, inplace=True)

df = df.set_index('Date').cumsum()

df

Club,AFC Bournemouth,Arsenal,Aston Villa,Brighton & Hove Albion,Burnley,Chelsea,Crystal Palace,Everton,Leicester City,Liverpool,Manchester City,Manchester United,Newcastle United,Norwich City,Sheffield United,Southampton,Tottenham Hotspur,Watford,West Ham United,Wolverhampton Wanderers
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1
2019-08-09,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2019-08-10,1.0,0.0,0.0,3.0,3.0,0.0,1.0,1.0,0.0,3.0,3.0,0.0,0.0,0.0,1.0,0.0,3.0,0.0,0.0,0.0
2019-08-11,1.0,3.0,0.0,3.0,3.0,0.0,1.0,1.0,1.0,3.0,3.0,3.0,0.0,0.0,1.0,0.0,3.0,0.0,0.0,1.0
2019-08-17,4.0,6.0,0.0,4.0,3.0,0.0,1.0,4.0,1.0,6.0,4.0,3.0,0.0,3.0,1.0,0.0,4.0,0.0,1.0,1.0
2019-08-18,4.0,6.0,0.0,4.0,3.0,1.0,1.0,4.0,2.0,6.0,4.0,3.0,0.0,3.0,4.0,0.0,4.0,0.0,1.0,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2020-07-19,31.0,53.0,31.0,37.0,54.0,63.0,42.0,46.0,62.0,93.0,75.0,62.0,43.0,21.0,54.0,49.0,58.0,34.0,37.0,56.0
2020-07-20,31.0,53.0,31.0,38.0,54.0,63.0,42.0,49.0,62.0,93.0,75.0,62.0,44.0,21.0,54.0,49.0,58.0,34.0,37.0,59.0
2020-07-21,31.0,53.0,34.0,38.0,54.0,63.0,42.0,49.0,62.0,93.0,78.0,62.0,44.0,21.0,54.0,49.0,58.0,34.0,37.0,59.0
2020-07-22,31.0,53.0,34.0,38.0,54.0,63.0,42.0,49.0,62.0,96.0,78.0,63.0,44.0,21.0,54.0,49.0,58.0,34.0,38.0,59.0


## Bar Chart Race

In [33]:
def summary(values, ranks):
    text = '©2022 Ahmad Zaenun Faiz | Data source: James P. Curley (2016)\n engsoccerdata: English Soccer Data 1871-2016'
    return {'x': .99, 'y': .02, 's': text, 'ha': 'right', 'size': 4}

chart = bcr.bar_chart_race(
                    df = df,
                    # n_bars=6,
                    figsize=(5, 5),
                    fixed_max=True, 
                    steps_per_period=20,
                    cmap='Set1',
                    filter_column_colors=True,
                    title='Posisi Klasemen Premier League Season 19/20',
                    # title_size='smaller',
                    period_fmt= '%d %B %Y',
                    period_summary_func=summary
                )

chart

  df_values.iloc[:, 0] = df_values.iloc[:, 0].fillna(method='ffill')
  ax.set_yticklabels(self.df_values.columns)
  ax.set_xticklabels([max_val] * len(ax.get_xticks()))
