In [7]:
import pandas as pd
import os

In [19]:
# Function that fetches the data from the website and produces the csv files
def createPremierLeagueDataForWeek():
    # URL for the data we want to retrieve
    premier_league_url = "https://fbref.com/en/comps/9/Premier-League-Stats"

    # Only want to keep the first dataframe and the columns indicated below
    prem_league_df = pd.read_html(premier_league_url)[0]
    prem_columns = ["Rk", "Squad", "MP", "W", "L", "GF", "GA", "GD", "Pts", "Pts/MP", "Last 5", "Top Team Scorer"]
    prem_df = prem_league_df[prem_columns]

    # Function that helps split the column "Top Team Scorer" into two separate columns
    def split_column(row):
        values = row['Top Team Scorer'].split('-')
        if(len(values)==2):
            return pd.Series({'Top_Team_Scorer_Name': values[0], 'Top_Team_Scorer_Num_Goals': values[1]})
        else:
            return pd.Series({'Top_Team_Scorer_Name': values[0] + values[1], 'Top_Team_Scorer_Num_Goals': values[2]})
    prem_df = pd.concat([prem_df, prem_df.apply(split_column, axis=1)], axis=1)
    prem_df = prem_df.loc[:, prem_df.columns!='Top Team Scorer']
    prem_df['Top_Team_Scorer_Num_Goals'] = prem_df['Top_Team_Scorer_Num_Goals'].astype('int')

    # Create the csv file for the indicated week of matchplay
    if(prem_df["MP"].nunique() == 1):
        folder_path = 'data'
        os.makedirs(folder_path, exist_ok=True)

        file_path = os.path.join(folder_path, "prem_league_week" + str(prem_df["MP"].iloc[0]) + ".csv")
        prem_df.to_csv(file_path, index=False)
        print("Successfully created data")
        return True
    else:
        print("Error occured")
        return False

In [20]:
createPremierLeagueDataForWeek()

Successfully created data


True

In [21]:
week12_df = pd.read_csv("./data/prem_league_week12.csv")
week12_df.head()


Unnamed: 0,Rk,Squad,MP,W,L,GF,GA,GD,Pts,Pts/MP,Last 5,Top_Team_Scorer_Name,Top_Team_Scorer_Num_Goals
0,1,Manchester City,12,9,2,32,12,20,28,2.33,L W W W D,Erling Haaland,13
1,2,Liverpool,12,8,1,27,10,17,27,2.25,D W W D W,Mohamed Salah,10
2,3,Arsenal,12,8,1,26,10,16,27,2.25,W D W L W,Eddie Nketiah,5
3,4,Tottenham,12,8,2,24,15,9,26,2.17,W W W L L,Son Heungmin,8
4,5,Aston Villa,12,8,3,29,17,12,25,2.08,D W W L W,Ollie Watkins,6


In [22]:
d = {}
for i in range(len(week12_df)):
    d[week12_df.loc[i, "Top_Team_Scorer_Name"]] = round((week12_df.loc[i, "Top_Team_Scorer_Num_Goals"])/(week12_df.loc[i, "GF"]), 2)
d

{'Erling Haaland ': 0.41,
 'Mohamed Salah ': 0.37,
 'Eddie Nketiah ': 0.19,
 'Son Heungmin ': 0.33,
 'Ollie Watkins ': 0.21,
 'Bruno Fernandes, Scott McTominay ': 0.23,
 'Callum Wilson ': 0.26,
 'Evan Ferguson ': 0.2,
 'Jarrod Bowen ': 0.38,
 'Nicolas Jackson ': 0.29,
 'Bryan Mbeumo ': 0.32,
 'Hwang Heechan ': 0.38,
 'Odsonne Édouard ': 0.42,
 'Taiwo Awoniyi ': 0.29,
 'Bobby Reid, João Palhinha ': 0.2,
 'Dominic Solanke ': 0.55,
 'Carlton Morris ': 0.3,
 'Gustavo Hamer, Cameron Archer ': 0.2,
 'Abdoulaye Doucouré ': 0.29,
 'Lyle Foster ': 0.33}