In [2]:
import pandas as pd
import os
import plotly.express as px

In [16]:
# Function that fetches the data from the website and produces the csv files
def createPremierLeagueDataForWeek():
    # URL for the data we want to retrieve
    premier_league_url = "https://fbref.com/en/comps/9/Premier-League-Stats"

    # Only want to keep the first dataframe and the columns indicated below
    prem_league_df = pd.read_html(premier_league_url)[0]
    prem_columns = ["Rk", "Squad", "MP", "W", "L", "GF", "GA", "GD", "Pts", "Pts/MP", "Last 5", "Top Team Scorer"]
    prem_df = prem_league_df[prem_columns]

    # Function that helps split the column "Top Team Scorer" into two separate columns
    def split_column(row):
        values = row['Top Team Scorer'].split('-')
        if(len(values)==2):
            return pd.Series({'Top_Team_Scorer_Name': values[0], 'Top_Team_Scorer_Num_Goals': values[1]})
        else:
            return pd.Series({'Top_Team_Scorer_Name': values[0] + values[1], 'Top_Team_Scorer_Num_Goals': values[2]})
    prem_df = pd.concat([prem_df, prem_df.apply(split_column, axis=1)], axis=1)
    prem_df = prem_df.loc[:, prem_df.columns!='Top Team Scorer']
    prem_df['Top_Team_Scorer_Num_Goals'] = prem_df['Top_Team_Scorer_Num_Goals'].astype('int')

    # Create the csv file for the indicated week of matchplay
    if(prem_df["MP"].nunique() == 1):
        folder_path = 'data'
        os.makedirs(folder_path, exist_ok=True)

        file_path = os.path.join(folder_path, "prem_league_week" + str(prem_df["MP"].iloc[0]) + ".csv")
        prem_df.to_csv(file_path, index=False)
        print("Successfully created data")
        return True
    else:
        print("Error occured")
        return False


In [17]:
createPremierLeagueDataForWeek()

URLError: <urlopen error [Errno 11001] getaddrinfo failed>

In [19]:
def createPremierLeaguePointsDashboard(week_num):
    path = "./data/prem_league_week" + str(week_num) + ".csv"
    df = pd.read_csv(path)
    
    fig = px.bar(df, 
             x=df.Squad, 
             y=df.Pts,
             title="Total Points : EPL 2023 - 2024",
             color=df.Pts,
             text=df.Pts,
             color_continuous_scale="oranges",
             height=600
            )
    fig.show()
    fig.write_html("./dashboards/PremierLeagueTable.html")


In [20]:
createPremierLeaguePointsDashboard(14)

In [22]:
def createPremLeagueTopScorerDashboard(week_num):
    path = "./data/prem_league_week" + str(week_num) + ".csv"
    df = pd.read_csv(path)
    
    d = {}
    for i in range(len(df)):
        d[df.loc[i, "Top_Team_Scorer_Name"] + "|" + df.loc[i, "Squad"]] = round((df.loc[i, "Top_Team_Scorer_Num_Goals"])/(df.loc[i, "GF"]), 2)
    
    player = []
    team = []
    percentage = []
    for x, y in d.items():
        temp = x.split("|")
        if len(temp) == 2:
            player.append(temp[0])
            team.append(temp[1])
            percentage.append(y)
        else:
            player.append([temp[0], temp[1]])
            team.append(temp[2])
            percentage.append(y)
    player_percentage_df = pd.DataFrame(
        {
            'player' : player,
            'team': team,
            'percentage': percentage
        }
    )
    
    fig = px.bar(player_percentage_df, 
             x=player_percentage_df.team, 
             y=player_percentage_df.percentage,
             title="Percentage of Goals of Leading Goal Scorers on every Premier League Team",
             color=player_percentage_df.percentage,
             text=player_percentage_df.player,
             color_continuous_scale="earth",
             height=600
            )
    fig.show()
    fig.write_html("./dashboards/TopScorerDashboard.html")
    
createPremLeagueTopScorerDashboard(14)