In [5]:
import pandas as pd
import os
import plotly.express as px

In [5]:
# Function that fetches the data from the website and produces the csv files
def createPremierLeagueDataForWeek():
    # URL for the data we want to retrieve
    premier_league_url = "https://fbref.com/en/comps/9/Premier-League-Stats"

    # Only want to keep the first dataframe and the columns indicated below
    prem_league_df = pd.read_html(premier_league_url)[0]
    prem_columns = ["Rk", "Squad", "MP", "W", "L", "GF", "GA", "GD", "Pts", "Pts/MP", "Last 5", "Top Team Scorer"]
    prem_df = prem_league_df[prem_columns]

    # Function that helps split the column "Top Team Scorer" into two separate columns
    def split_column(row):
        values = row['Top Team Scorer'].split('-')
        if(len(values)==2):
            return pd.Series({'Top_Team_Scorer_Name': values[0], 'Top_Team_Scorer_Num_Goals': values[1]})
        else:
            return pd.Series({'Top_Team_Scorer_Name': values[0] + values[1], 'Top_Team_Scorer_Num_Goals': values[2]})
    prem_df = pd.concat([prem_df, prem_df.apply(split_column, axis=1)], axis=1)
    prem_df = prem_df.loc[:, prem_df.columns!='Top Team Scorer']
    prem_df['Top_Team_Scorer_Num_Goals'] = prem_df['Top_Team_Scorer_Num_Goals'].astype('int')

    # Create the csv file for the indicated week of matchplay
    if(prem_df["MP"].nunique() == 1):
        folder_path = 'data'
        os.makedirs(folder_path, exist_ok=True)

        file_path = os.path.join(folder_path, "prem_league_week" + str(prem_df["MP"].iloc[0]) + ".csv")
        prem_df.to_csv(file_path, index=False)
        print("Successfully created data")
        return True
    else:
        print("Error occured")
        return False

In [6]:
createPremierLeagueDataForWeek()

Successfully created data


True

In [3]:
week13_df = pd.read_csv("./data/prem_league_week13.csv")
week13_df.head()


Unnamed: 0,Rk,Squad,MP,W,L,GF,GA,GD,Pts,Pts/MP,Last 5,Top_Team_Scorer_Name,Top_Team_Scorer_Num_Goals
0,1,Arsenal,13,9,1,27,10,17,30,2.31,D W L W W,Eddie Nketiah,5
1,2,Manchester City,13,9,2,33,13,20,29,2.23,W W W D D,Erling Haaland,14
2,3,Liverpool,13,8,1,28,11,17,28,2.15,W W D W D,Mohamed Salah,10
3,4,Aston Villa,13,9,3,31,18,13,28,2.15,W W L W W,Ollie Watkins,7
4,5,Tottenham,13,8,3,25,17,8,26,2.0,W W L L L,Son Heungmin,8


In [4]:
d = {}
for i in range(len(week13_df)):
    d[week13_df.loc[i, "Top_Team_Scorer_Name"]] = round((week13_df.loc[i, "Top_Team_Scorer_Num_Goals"])/(week13_df.loc[i, "GF"]), 2)
d

{'Eddie Nketiah ': 0.19,
 'Erling Haaland ': 0.42,
 'Mohamed Salah ': 0.36,
 'Ollie Watkins ': 0.23,
 'Son Heungmin ': 0.32,
 'Bruno Fernandes, Scott McTominay ': 0.19,
 'Callum Wilson, Alexander Isak ': 0.23,
 'Evan Ferguson ': 0.21,
 'Jarrod Bowen ': 0.35,
 'Nicolas Jackson ': 0.27,
 'Bryan Mbeumo ': 0.32,
 'Hwang Heechan ': 0.39,
 'Odsonne Édouard ': 0.38,
 'Willian ': 0.23,
 'Taiwo Awoniyi ': 0.25,
 'Dominic Solanke ': 0.43,
 'Carlton Morris ': 0.25,
 'Oliver McBurnie, Gustavo Hamer... ': 0.18,
 'Abdoulaye Doucouré ': 0.29,
 'Lyle Foster ': 0.3}

In [8]:
fig = px.bar(week13_df, 
             x=week13_df.Squad, 
             y=week13_df.Pts,
             title="Total Points : EPL 2023 - 2024",
             color=week13_df.Pts,
             text=week13_df.Pts,
             color_continuous_scale="oranges",
             height=600
            )
fig.show()

In [9]:
from dash import Dash, dcc, html, Input, Output
app = Dash(__name__)

ModuleNotFoundError: No module named 'dash'