In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [None]:
## Top five european leagues

data = pd.DataFrame()

for i in range(2018, 2024):
    season = str(i-1) + "-" + str(i)
    link_eng = f"https://fbref.com/en/comps/9/{season}/schedule/{season}-Premier-League-Scores-and-Fixtures"
    page_eng = pd.read_html(link_eng)
    frame_eng = page_eng[0]

    link_ita = f"https://fbref.com/en/comps/11/{season}/schedule/{season}-Serie-A-Scores-and-Fixtures"
    page_ita = pd.read_html(link_ita)
    frame_ita = page_ita[0]

    link_esp = f"https://fbref.com/en/comps/12/{season}/schedule/{season}-La-Liga-Scores-and-Fixtures"
    page_esp = pd.read_html(link_esp)
    frame_esp = page_esp[0]

    link_ger = f"https://fbref.com/en/comps/20/{season}/schedule/{season}-Bundesliga-Scores-and-Fixtures"
    page_ger = pd.read_html(link_ger)
    frame_ger = page_ger[0]

    link_fra = f"https://fbref.com/en/comps/13/{season}/schedule/{season}-Ligue-1-Scores-and-Fixtures"
    page_fra = pd.read_html(link_fra)
    frame_fra = page_fra[0]
    
    df = pd.concat([frame_eng, frame_esp, frame_ita, frame_ger, frame_fra])
    
    df = df[df['Score'].notna()]
    df = df[df['xG'].notna()]
    df = df.reset_index(drop=True)
    df['Score'] = df['Score'].astype('string')
    
    df[['Score1', 'Score2']] = df['Score'].str.split('–', expand=True)
    
    df['Score1'] = df['Score1'].astype(int)
    df['Score2'] = df['Score2'].astype(int)
    
    df = df[['Wk', 'Home', 'xG', 'Score1', 'Score', 'Score2', 'xG.1', 'Away']]
    df['Season'] = str(i)

    df_1 = df[['Home', 'xG', 'Score1', 'Score', 'Away', 'Season']]
    df_2 = df[['Away', 'xG.1', 'Score2', 'Score', 'Home', 'Season']]

    df_1 = df_1.rename(columns={"Home":"Team", "Score1":"Goals", "Score":"Result", "Away":"Opponent"})
    df_2 = df_2.rename(columns={"Away":"Team", "Score2":"Goals", "xG.1":"xG", "Score":"Result", "Home":"Opponent"})

    df = pd.concat([df_1, df_2])
    df = df.reset_index(drop=True)
    df = df.loc[df['Goals'] == 0]
    df = df.sort_values(by="xG", ascending=False)
    
    data = pd.concat([data,df])
    
## Champions League

data_cl = pd.DataFrame()

for i in range(2018, 2024):
    season = str(i-1) + "-" + str(i)

    link = f"https://fbref.com/en/comps/8/{season}/schedule/{season}-Champions-League-Scores-and-Fixtures"
    
    page = pd.read_html(link)
    frame = page[0]

    df = frame

    df = df[df['Score'].notna()]
    df = df[df['xG'].notna()]
    df = df.reset_index(drop=True)
    df['Score'] = df['Score'].astype('string')

    df = df.loc[df['Notes'] != "Notes"]

    df[['Score1', 'Score2']] = df['Score'].str.split('–', expand=True)

    df['Score1'] = df['Score1'].astype(int)
    df['Score2'] = df['Score2'].astype(int)

    df = df[['Wk', 'Home', 'xG', 'Score1', 'Score', 'Score2', 'xG.1', 'Away']]
    df['Season'] = str(i)

    df_1 = df[['Home', 'xG', 'Score1', 'Score', 'Away', 'Season']]
    df_2 = df[['Away', 'xG.1', 'Score2', 'Score', 'Home', 'Season']]

    df_1 = df_1.rename(columns={"Home":"Team", "Score1":"Goals", "Score":"Result", "Away":"Opponent"})
    df_2 = df_2.rename(columns={"Away":"Team", "Score2":"Goals", "xG.1":"xG", "Score":"Result", "Home":"Opponent"})

    df = pd.concat([df_1, df_2])
    df = df.reset_index(drop=True)
    df = df.loc[df['Goals'] == 0]
    df = df.sort_values(by="xG", ascending=False)
    
    data_cl = pd.concat([data_cl,df])
    
## World cups

world_cups = [2018, 2022]

data_wc = pd.DataFrame()

for cup in world_cups:
    link = f"https://fbref.com/en/comps/1/{cup}/schedule/{cup}-FIFA-World-Cup-Scores-and-Fixtures"
    
    page = pd.read_html(link)
    frame = page[0]

    df = frame

    df = df[df['Score'].notna()]
    df = df[df['xG'].notna()]
    df = df.reset_index(drop=True)
    df['Score'] = df['Score'].astype('string')

    df[['Score1', 'Score2']] = df['Score'].str.split('–', expand=True)

    for i in range(len(df)):
        df['Score1'][i] = df['Score1'][i][-1]
        df['Score2'][i] = df['Score2'][i][0]
    
    df['Score1'] = df['Score1'].astype(int)
    df['Score2'] = df['Score2'].astype(int)

    df = df[['Wk', 'Home', 'xG', 'Score1', 'Score', 'Score2', 'xG.1', 'Away']]
    
    df['Season'] = cup

    df_1 = df[['Home', 'xG', 'Score1', 'Score', 'Away', 'Season']]
    df_2 = df[['Away', 'xG.1', 'Score2', 'Score', 'Home', 'Season']]

    df_1 = df_1.rename(columns={"Home":"Team", "Score1":"Goals", "Score":"Result", "Away":"Opponent"})
    df_2 = df_2.rename(columns={"Away":"Team", "Score2":"Goals", "xG.1":"xG", "Score":"Result", "Home":"Opponent"})

    df = pd.concat([df_1, df_2])
    df = df.reset_index(drop=True)
    df = df.loc[df['Goals'] == 0]
    df = df.sort_values(by="xG", ascending=False)
    
    data_wc = pd.concat([data_wc,df])
    
## Euro

data_ec = pd.DataFrame()

euro_cups = [2021]

for cup in euro_cups:
    link = f"https://fbref.com/en/comps/676/{cup}/schedule/{cup}-European-Championship-Scores-and-Fixtures"
    
    page = pd.read_html(link)
    frame = page[0]

    df = frame

    df = df[df['Score'].notna()]
    df = df[df['xG'].notna()]
    df = df.reset_index(drop=True)
    df['Score'] = df['Score'].astype('string')

    df[['Score1', 'Score2']] = df['Score'].str.split('–', expand=True)

    for i in range(len(df)):
        df['Score1'][i] = df['Score1'][i][-1]
        df['Score2'][i] = df['Score2'][i][0]
    
    df['Score1'] = df['Score1'].astype(int)
    df['Score2'] = df['Score2'].astype(int)

    df = df[['Wk', 'Home', 'xG', 'Score1', 'Score', 'Score2', 'xG.1', 'Away']]
    
    df['Season'] = cup

    df_1 = df[['Home', 'xG', 'Score1', 'Score', 'Away', 'Season']]
    df_2 = df[['Away', 'xG.1', 'Score2', 'Score', 'Home', 'Season']]

    df_1 = df_1.rename(columns={"Home":"Team", "Score1":"Goals", "Score":"Result", "Away":"Opponent"})
    df_2 = df_2.rename(columns={"Away":"Team", "Score2":"Goals", "xG.1":"xG", "Score":"Result", "Home":"Opponent"})

    df = pd.concat([df_1, df_2])
    df = df.reset_index(drop=True)
    df = df.loc[df['Goals'] == 0]
    df = df.sort_values(by="xG", ascending=False)
    
    data_ec = pd.concat([data_ec,df])
    
## Copa America

data_copa = pd.DataFrame()

copa_americas = [2019, 2021]

for cup in copa_americas:
    link = f"https://fbref.com/en/comps/685/{cup}/schedule/{cup}-Copa-America-Scores-and-Fixtures"
    
    page = pd.read_html(link)
    frame = page[0]

    df = frame

    df = df[df['Score'].notna()]
    df = df[df['xG'].notna()]
    df = df.reset_index(drop=True)
    df['Score'] = df['Score'].astype('string')

    df[['Score1', 'Score2']] = df['Score'].str.split('–', expand=True)

    for i in range(len(df)):
        df['Score1'][i] = df['Score1'][i][-1]
        df['Score2'][i] = df['Score2'][i][0]
    
    df['Score1'] = df['Score1'].astype(int)
    df['Score2'] = df['Score2'].astype(int)

    df = df[['Wk', 'Home', 'xG', 'Score1', 'Score', 'Score2', 'xG.1', 'Away']]
    
    df['Season'] = cup

    df_1 = df[['Home', 'xG', 'Score1', 'Score', 'Away', 'Season']]
    df_2 = df[['Away', 'xG.1', 'Score2', 'Score', 'Home', 'Season']]

    df_1 = df_1.rename(columns={"Home":"Team", "Score1":"Goals", "Score":"Result", "Away":"Opponent"})
    df_2 = df_2.rename(columns={"Away":"Team", "Score2":"Goals", "xG.1":"xG", "Score":"Result", "Home":"Opponent"})

    df = pd.concat([df_1, df_2])
    df = df.reset_index(drop=True)
    df = df.loc[df['Goals'] == 0]
    df = df.sort_values(by="xG", ascending=False)
    
    data_copa = pd.concat([data_copa,df])
    
club_df = pd.concat([data, data_cl])
club_df['xG'] = club_df['xG'].astype(float)
club_df = club_df.sort_values(by="xG", ascending=False)

international_df = pd.concat([data_wc, data_ec, data_copa])
international_df = international_df.sort_values(by="xG", ascending=False)

total_df = pd.concat([club_df, international_df])
total_df = total_df.sort_values(by="xG", ascending=False)
top_20 = total_df.head(20)
top_20 = top_20.reset_index(drop=True)

In [None]:
plt.figure(figsize = (15, 15), dpi = 200, facecolor = '#f4f2e5')

ax = plt.axes()
ax.set_facecolor("#f4f2e5")

ax.grid(True, c = "grey", ls = ":")

ax.spines['top'].set_visible(False)

ax.set_xlabel("xG", fontsize=15)
ax.set_title("Biggest xG Of A Single Team With No Goals Scored - from 2017", fontsize=20)

ax.spines['bottom'].set_visible(True)
ax.spines['left'].set_visible(True)

ax.tick_params(left = False, right = False , labelleft = False ,)

for i, v in enumerate(top_20['xG']):
    plt.barh(i, v, color="lightblue", ec="black")
    ax.text(0.1, i+0.05, top_20['Team'][i] + " vs ", fontsize = 10)
    ax.text(0.1, i-0.25, top_20['Opponent'][i] + " - " + top_20['Result'][i] + " - " + str(top_20['Season'][i]), fontsize = 10)
    ax.text(v+0.1, i-0.1, str(v), color = 'black', fontweight="bold", zorder=5)

ax.barh(15, 3, color="red", alpha=0.5)

ax.text(4.5, 19.2, "Competitions Included", fontsize=12, color="red", alpha=0.8)

ax.text(4.5, 18.8, "Club Competitions:", fontsize=10)
ax.text(4.5, 18.4, "EPL, Serie A, La Liga, ")
ax.text(4.5, 18.1, "Bundesliga, Ligue1")

ax.text(4.5, 17.7, "National Competitions:", fontsize=10)

ax.text(4.5, 17.3, "World Cup, Euro, ")
ax.text(4.5, 17, "Copa America")

ax.text(4.5, 16.5, "Data souce: FBRef", fontsize=12)

plt.savefig('biggest-xg-with-no-goals-scored.png')
plt.show()