In [2]:
import pandas as pd
import os
import plotly.express as px

In [2]:
# Function that fetches the data from the website and produces the csv files
def createPremierLeagueDataForWeek():
    # URL for the data we want to retrieve
    premier_league_url = "https://fbref.com/en/comps/9/Premier-League-Stats"

    # Only want to keep the first dataframe and the columns indicated below
    prem_league_df = pd.read_html(premier_league_url)[0]
    prem_columns = ["Rk", "Squad", "MP", "W", "L", "GF", "GA", "GD", "Pts", "Pts/MP", "Last 5", "Top Team Scorer"]
    prem_df = prem_league_df[prem_columns]

    # Function that helps split the column "Top Team Scorer" into two separate columns
    def split_column(row):
        values = row['Top Team Scorer'].split('-')
        if(len(values)==2):
            return pd.Series({'Top_Team_Scorer_Name': values[0], 'Top_Team_Scorer_Num_Goals': values[1]})
        else:
            return pd.Series({'Top_Team_Scorer_Name': values[0] + values[1], 'Top_Team_Scorer_Num_Goals': values[2]})
    prem_df = pd.concat([prem_df, prem_df.apply(split_column, axis=1)], axis=1)
    prem_df = prem_df.loc[:, prem_df.columns!='Top Team Scorer']
    prem_df['Top_Team_Scorer_Num_Goals'] = prem_df['Top_Team_Scorer_Num_Goals'].astype('int')

    # Create the csv file for the indicated week of matchplay
    if(prem_df["MP"].nunique() == 1):
        folder_path = 'data'
        os.makedirs(folder_path, exist_ok=True)

        file_path = os.path.join(folder_path, "prem_league_week" + str(prem_df["MP"].iloc[0]) + ".csv")
        prem_df.to_csv(file_path, index=False)
        print("Successfully created data")
        return True
    else:
        print("Error occured")
        return False


In [3]:
createPremierLeagueDataForWeek()

Successfully created data


True

In [11]:
def createPremierLeaguePointsDashboard(week_num):
    path = "./data/prem_league_week" + str(week_num) + ".csv"
    df = pd.read_csv(path)
    
    
    fig = px.bar(df, 
             x=df.Squad, 
             y=df.Pts,   
             title="Total Points : EPL 2023 - 2024",
             color=df.Pts,
             text=df.Pts,
             color_continuous_scale="oranges",
             height=600
            )
    fig.update_layout(
        xaxis_title="Premier League Squad",
        yaxis_title="Points"
    )
    
    fig.add_hline(y=df.iloc[3]["Pts"], line_width=3, line_dash="dash", line_color="green", annotation_text="UEFA Champions League Qualification", annotation_position="bottom right")
    fig.add_hline(y=df.iloc[4]["Pts"], line_width=3, line_dash="dash", line_color="orange", annotation_text="Europa League Qualification", annotation_position="bottom right")
    fig.add_hline(y=df.iloc[17]["Pts"], line_width=3, line_dash="dash", line_color="red", annotation_text="Relegation", annotation_position="bottom right")

    
    fig.show()
    fig.write_html("./dashboards/PremierLeagueTable.html")


In [12]:
createPremierLeaguePointsDashboard(15)

In [6]:
def createPremLeagueTopScorerDashboard(week_num):
    path = "./data/prem_league_week" + str(week_num) + ".csv"
    df = pd.read_csv(path)
    
    d = {}
    for i in range(len(df)):
        d[df.loc[i, "Top_Team_Scorer_Name"] + "|" + df.loc[i, "Squad"]] = round((df.loc[i, "Top_Team_Scorer_Num_Goals"])/(df.loc[i, "GF"]), 2)
    
    player = []
    team = []
    percentage = []
    for x, y in d.items():
        temp = x.split("|")
        if len(temp) == 2:
            player.append(temp[0])
            team.append(temp[1])
            percentage.append(y)
        else:
            player.append([temp[0], temp[1]])
            team.append(temp[2])
            percentage.append(y)
    player_percentage_df = pd.DataFrame(
        {
            'player' : player,
            'team': team,
            'percentage': percentage
        }
    )
    
    fig = px.bar(player_percentage_df, 
             x=player_percentage_df.team, 
             y=player_percentage_df.percentage,
             title="Percentage of Goals of Leading Goal Scorers on every Premier League Team",
             color=player_percentage_df.percentage,
             text=player_percentage_df.player,
             color_continuous_scale="earth",
             height=600
            )
    fig.show()
    fig.write_html("./dashboards/TopScorerDashboard.html")
    
createPremLeagueTopScorerDashboard(15)

In [12]:
# Chart for Goal Difference
df_lst = []
for file in os.listdir("./data"):
    df = pd.read_csv("./data/" + file)
    df["Week"] = file[-6:-4]
    df_lst.append(df[["Rk", "Squad", "Pts", "Week"]])
(df_lst[0])

Unnamed: 0,Rk,Squad,Pts,Week
0,1,Manchester City,28,12
1,2,Liverpool,27,12
2,3,Arsenal,27,12
3,4,Tottenham,26,12
4,5,Aston Villa,25,12
5,6,Manchester Utd,21,12
6,7,Newcastle Utd,20,12
7,8,Brighton,19,12
8,9,West Ham,17,12
9,10,Chelsea,16,12


In [6]:
import plotly.express as px
import pandas as pd

# Create dataset
data = {
   'year':[2015,2016,2017,2018,2019],
   'lifeexp':[75,74,72,70,69],
   'state':['kerala','punjab','karnataka','andhra','odisha'],
   'ratio':[74,73.9,71.5,69.8,69]
}

# Create dataframe
df = pd.DataFrame(data)
#print(df)
# Create Line plot
fig = px.line(df, x=df['year'], y=df['lifeexp'])

# Add Scatter plot
fig.add_scatter(x=df['year'], y=df['ratio'])

# Display the plot
fig.show()