# English football: How the table evolved each week of the 2020-21 season

### Import Python tools and Jupyter configuration

In [1]:
%load_ext lab_black

In [2]:
import pandas as pd
import requests
import altair as alt
import altair_latimes as lat
import matplotlib.pyplot as plt
import pageviewapi
import datetime as dt
import requests
from bs4 import BeautifulSoup

In [3]:
pd.options.display.max_columns = 100
pd.options.display.max_rows = 1000
alt.data_transformers.disable_max_rows()
pd.options.display.max_colwidth = None

In [4]:
today = dt.datetime.today().strftime("%Y%m%d")

### Get all the tables and combine them into a dataframe

In [5]:
dfs = []
tables = []

for r in range(1, 39):
    tables.append(
        pd.read_html(
            "https://www.worldfootball.net/schedule/eng-premier-league-2020-2021-spieltag/"
            + str(r)
        )[3]
    )

for t in tables:
    dfs.append(pd.DataFrame(t))

In [6]:
df = pd.concat(dfs)

### Clean up the dataframe

In [7]:
df.drop(["Team"], axis=1, inplace=True)

In [8]:
df.rename(
    columns={
        "#": "place",
        "Team.1": "club",
        "M.": "match_number",
        "W": "wins",
        "D": "draws",
        "L": "losses",
        "goals": "goals",
        "Dif.": "differential",
        "Pt.": "points",
    },
    inplace=True,
)

### Split goals columns into two

In [9]:
df[["goals_for", "goals_against"]] = df["goals"].str.split(":", expand=True)

In [10]:
df.drop(["goals"], axis=1, inplace=True)

In [11]:
df["match_number"] = df["match_number"].astype(str)

In [12]:
df["goals_for"] = df["goals_for"].astype(str)
df["goals_against"] = df["goals_against"].astype(int)

In [13]:
df.head(20)

Unnamed: 0,place,club,match_number,wins,draws,losses,differential,points,goals_for,goals_against
0,1.0,Arsenal FC,1,1,0,0,3,3,3,0
1,,Leicester City,1,1,0,0,3,3,3,0
2,3.0,Chelsea FC,1,1,0,0,2,3,3,1
3,4.0,Manchester City,1,1,0,0,2,3,2,0
4,,Newcastle United,1,1,0,0,2,3,2,0
5,,Wolverhampton Wanderers,1,1,0,0,2,3,2,0
6,7.0,Liverpool FC,1,1,0,0,1,3,4,3
7,8.0,Crystal Palace,1,1,0,0,1,3,1,0
8,,Everton FC,1,1,0,0,1,3,1,0
9,,Manchester United,1,1,0,0,1,3,1,0


In [14]:
df["place"] = df["place"].fillna(method="ffill")

In [15]:
df.head()

Unnamed: 0,place,club,match_number,wins,draws,losses,differential,points,goals_for,goals_against
0,1.0,Arsenal FC,1,1,0,0,3,3,3,0
1,1.0,Leicester City,1,1,0,0,3,3,3,0
2,3.0,Chelsea FC,1,1,0,0,2,3,3,1
3,4.0,Manchester City,1,1,0,0,2,3,2,0
4,4.0,Newcastle United,1,1,0,0,2,3,2,0


---

### One club

In [16]:
df[df["club"] == "Tottenham Hotspur"].head()

Unnamed: 0,place,club,match_number,wins,draws,losses,differential,points,goals_for,goals_against
13,12.0,Tottenham Hotspur,1,0,0,1,-1,0,0,1
6,7.0,Tottenham Hotspur,2,1,0,1,2,3,5,3
9,10.0,Tottenham Hotspur,3,1,1,1,2,4,6,4
5,6.0,Tottenham Hotspur,4,2,1,1,7,7,12,5
8,9.0,Tottenham Hotspur,5,2,2,1,7,8,15,8


### Chart them all

In [17]:
alt.Chart(df).mark_line(interpolate="step-after", size=2).encode(
    x=alt.X(
        "match_number:Q",
        title="Week",
        scale=alt.Scale(domain=(0, 38), zero=False),
        axis=alt.Axis(tickCount=5, grid=False),
    ),
    y=alt.Y(
        "place:Q",
        title="Place",
        scale=alt.Scale(domain=(20, 2), zero=False),
        axis=alt.Axis(
            format="",
            tickSize=0,
            domainOpacity=0,
            tickCount=5,
            offset=4,
            gridWidth=0.6,
            gridColor="#dddddd",
        ),
    ),
    facet=alt.Facet("club", title="", columns=5),
).properties(
    width=150,
    height=100,
    title="2020-21: Club standing in Premier League table, by week",
).configure_view(
    strokeOpacity=0
).configure_axis(
    labelFontSize=13
)

In [18]:
alt.Chart(df[df["club"] == "Manchester City"]).mark_line(
    interpolate="step-after", size=2
).encode(
    x=alt.X(
        "match_number:Q",
        title="Game",
        scale=alt.Scale(domain=(0, 38), zero=False),
        axis=alt.Axis(tickCount=5, grid=False),
    ),
    y=alt.Y(
        "place:Q",
        title="Place",
        scale=alt.Scale(domain=(20, 2), zero=False),
        axis=alt.Axis(
            format="",
            tickSize=0,
            domainOpacity=0,
            tickCount=5,
            offset=4,
            gridWidth=0.6,
            gridColor="#dddddd",
        ),
    ),
).properties(
    width=600,
    height=300,
    title="2020-21: Manchester City standing in Premier League table, by week",
).configure_view(
    strokeOpacity=0
).configure_axis(
    labelFontSize=13
)

---

### Exports

In [19]:
df.to_csv("output/premier-league-weekly-standings.csv", index=False)

In [20]:
df.to_json("output/premier-league-weekly-standings.json", indent=2, orient="records")