In [162]:
import pandas as pd
import urllib.request

from plotly.subplots import make_subplots
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio



In [131]:
background_color = '#f7f8f3'
sec_color = '#BBC4C2'
font_path = "C:/Users/jerry/Documents/Personal/09 Repositories/fonts/Roboto/Roboto-Black.ttf"
font_props = font_manager.FontProperties(fname=font_path)
logo_path = "C:/Users/jerry/Documents/Personal/09 Repositories/logos/"

In [147]:
url = 'https://fbref.com/en/comps/9/schedule/Premier-League-Scores-and-Fixtures'
df = pd.read_html('https://fbref.com/en/comps/9/schedule/Premier-League-Scores-and-Fixtures', 
                  attrs = {'id':'sched_2024-2025_9_1'})[0]

In [144]:
df

Unnamed: 0,Wk,Day,Date,Time,Home,xG,Score,xG.1,Away,Attendance,Venue,Referee,Match Report,Notes
0,1.0,Fri,2024-08-16,20:00,Manchester Utd,2.4,1–0,0.4,Fulham,73297.0,Old Trafford,Robert Jones,Match Report,
1,1.0,Sat,2024-08-17,12:30,Ipswich Town,0.5,0–2,2.6,Liverpool,30014.0,Portman Road Stadium,Tim Robinson,Match Report,
2,1.0,Sat,2024-08-17,15:00,Newcastle Utd,0.3,1–0,1.8,Southampton,52196.0,St James' Park,Craig Pawson,Match Report,
3,1.0,Sat,2024-08-17,15:00,Nott'ham Forest,1.3,1–1,1.2,Bournemouth,29763.0,The City Ground,Michael Oliver,Match Report,
4,1.0,Sat,2024-08-17,15:00,Everton,0.5,0–3,1.4,Brighton,39217.0,Goodison Park,Simon Hooper,Match Report,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
412,38.0,Sun,2025-05-25,16:00,Fulham,,,,Manchester City,,Craven Cottage,,Head-to-Head,
413,38.0,Sun,2025-05-25,16:00,Nott'ham Forest,,,,Chelsea,,The City Ground,,Head-to-Head,
414,38.0,Sun,2025-05-25,16:00,Manchester Utd,,,,Aston Villa,,Old Trafford,,Head-to-Head,
415,38.0,Sun,2025-05-25,16:00,Wolves,,,,Brentford,,Molineux Stadium,,Head-to-Head,


In [164]:
def plot_xgd(url):
    # Read the HTML table from the given URL, filtering by the specified table id
    df = pd.read_html(url, attrs={'id': 'sched_2024-2025_9_1'})[0]
    
    # Drop rows with missing 'Score' values and unnecessary columns in one go
    df = df.dropna(subset=['Score'])

    # Convert 'Wk' to integer, 'datetime' from 'Date' + 'Time', and 'Attendance' to integer
    df['Wk'] = df['Wk'].astype(int)
    df['datetime'] = pd.to_datetime(df['Date'] + ' ' + df['Time'])
    df['Attendance'] = df['Attendance'].fillna(0).astype(int)
    
    # Split 'Score' into 'home_goals' and 'away_goals' using en dash, and convert to integers
    df[['home_goals', 'away_goals']] = df['Score'].str.split('–', expand=True).astype(int)
    
    # Rename xG columns in one step
    df.rename(columns={'xG': 'xG_home', 'xG.1': 'xG_away'}, inplace=True)
    
    # Aggregate xG data by unique Home and Away match-ups
    xGD_df = df.groupby(['Home', 'Away']).agg(
        xG_for=('xG_home', 'sum'),
        xG_against=('xG_away', 'sum')
    ).reset_index()
    
    # List of unique teams to use for plotting
    teams = xGD_df['Home'].unique()
    
    # Create a 4x5 grid of subplots with shared x and y axes
    fig = make_subplots(rows=4, cols=5, subplot_titles=teams, vertical_spacing=0.1, horizontal_spacing=0.05)

    # Loop through each team and add data to subplots
    for idx, team in enumerate(teams):
        row, col = (idx // 5) + 1, (idx % 5) + 1
        
        # Filter team data (both home and away)
        team_data_home = xGD_df[xGD_df['Home'] == team]
        team_data_away = xGD_df[xGD_df['Away'] == team]

        # Helper function to add data points
        def add_data_points(data, x_col, y_col, opponent_col, hover_format):
            for _, row_data in data.iterrows():
                color = '#ED2461' if row_data[x_col] > row_data[y_col] else '#398170'
                fig.add_trace(
                    go.Scatter(
                        x=[row_data[x_col]], 
                        y=[row_data[y_col]],
                        mode='markers', 
                        marker=dict(color=color, size=12),
                        text=hover_format.format(opponent=row_data[opponent_col], xG_for=row_data[x_col], xG_against=row_data[y_col]),
                        hoverinfo='text',
                        showlegend=False
                    ),
                    row=row, col=col
                )
        
        # Add home and away performances
        add_data_points(team_data_home, 'xG_against', 'xG_for', 'Away', "Opponent: {opponent}<br>xG For: {xG_for}<br>xG Against: {xG_against}")
        add_data_points(team_data_away, 'xG_for', 'xG_against', 'Home', "Opponent: {opponent}<br>xG For: {xG_against}<br>xG Against: {xG_for}")

        # Add a reference line for xG parity (x=y line)
        fig.add_trace(
            go.Scatter(x=[0, 5], y=[0, 5], mode='lines', line=dict(color='#747474', dash='dash'), showlegend=False),
            row=row, col=col
        )

    # General layout settings for the plot
    fig.update_layout(
        height=900,
        width=1200,
        title={
            'text': 'How have each Premier League team performed in both boxes?',
            'font': dict(size=18, color='black', family='Arial Black'),
        },
        plot_bgcolor=background_color,
        paper_bgcolor=background_color
    )

    # Adding subtitle using annotation
    fig.add_annotation(
        text='Expected goals (xG) for and against per game | Premier League, 2024-25',
        xref='paper', yref='paper',
        x=-0.02, y=1.06,
        showarrow=False,
        font=dict(size=16, color='gray', family='Raleway'),
        align='center'
    )

    # Axis labels formatting for the entire figure
    for i in range(1, 21):  # Assuming 20 subplots
        fig['layout'][f'xaxis{i}'].update(tickvals=[0, 1, 2, 3, 4, 5], gridcolor=sec_color, griddash='dot')
        fig['layout'][f'yaxis{i}'].update(tickvals=[0, 1, 2, 3, 4, 5], gridcolor=sec_color, griddash='dot')
    
    # X and Y axis labels for the entire plot
    fig.add_annotation(
        text='xG Against',
        xref='paper', yref='paper',
        x=0.5, y=-0.07,
        showarrow=False,
        font=dict(size=16, color='black', family='Raleway'),
        align='center'
    )
    fig.add_annotation(
        text='xG For',
        xref='paper', yref='paper',
        x=-0.06, y=0.5,
        showarrow=False,
        font=dict(size=16, color='black', family='Raleway'),
        textangle=-90,
        align='center'
    )

    # Display the final plot
    fig.show()
    fig.write_image("plot_xgd.png", format="png", width=1600, height=900, scale=2)


In [165]:
plot_xgd(url)