In [1]:
# ------------------------------------ Install required packages --------------------------
# install.packages("tidyverse")
# install.packages("RColorBrewer")
# install.packages("plotly")
# devtools::install_github("JaseZiv/worldfootballR")

# ------------------ Loading necessary libraries and data preparations --------------------
suppressPackageStartupMessages({
    library(rvest)
    library(tidyverse)
    library(stringr)
    library(worldfootballR)
})
# -----------------------------------------------------------------------------------------

# Skapa en vektor som innehåller urvalet URLs
player_urls <- c(
    "https://fbref.com/en/players/8e92be30/Alexander-Isak",
    "https://fbref.com/en/players/1f44ac21/Erling-Haaland",
    "https://fbref.com/en/players/21a66f6a/Harry-Kane",
    "https://fbref.com/en/players/8652a85c/Lois-Openda",
    "https://fbref.com/en/players/42fd9c7f/Kylian-Mbappe",
    "https://fbref.com/en/players/8d78e732/Robert-Lewandowski",
    "https://fbref.com/en/players/f7036e1c/Lautaro-Martinez",
    "https://fbref.com/en/players/6f8cd6d0/Marcus-Thuram",
    "https://fbref.com/en/players/79443529/Dusan-Vlahovic",
    "https://fbref.com/en/players/ce50fd99/Jonathan-David",
    "https://fbref.com/en/players/4d5a9185/Viktor-Gyokeres",
    "https://fbref.com/en/players/dc44b03c/Samu-Omorodion"
)

# Ligor och cuper av intresse. Ligue 1 är inkluderad pga Kylian Mbappé
comps <- c(
    "1. Premier League", "1. Bundesliga", "1. La Liga", "1. Serie A",
    "1. Ligue 1", "1. Primeira Liga", "1. Champions Lg", "2. Europa Lg"
)

# Säsonger av intresse
seasons <- c("2024-2025", "2023-2024")

# Initiera en tom-lista för att lagra datan
all_player_data <- list()

# For-loop för att hämta datan i respektive URL
for (i in seq_along(player_urls)) {
    # Paus för att undvika 429-fel
    Sys.sleep(15)

    player_url <- player_urls[i]
    
    # Försök att hämta standarddata
    Sys.sleep(15)
    standard_data <- fb_player_season_stats(player_url, stat_type = "standard")

    # Försök att hämta skottdata
    Sys.sleep(15)
    shooting_data <- fb_player_season_stats(player_url, stat_type = "shooting")

    # Försök att hämta skott och mål creation data
    Sys.sleep(15)
    gca_data <- fb_player_season_stats(player_url, stat_type = "gca")

    # Försök att hämta bollinnehav data
    Sys.sleep(15)
    poss_data <- fb_player_season_stats(player_url, stat_type = "possession")

    # Försök att hämta miscellaneuous stats
    Sys.sleep(15)
    misc_data <- fb_player_season_stats(player_url, stat_type = "misc")

    # Filtrera 
    standard_data_filtered <- standard_data %>%
        filter(Comp %in% comps & Season %in% seasons) %>%
        select(
            player_name, Season, Squad, Age, Comp, Mins_Per_90_Time, 
            Gls, G_minus_PK, Ast, xG_Expected, npxG_Expected, xAG_Expected, 
            PrgC_Progression, PrgP_Progression, PrgR_Progression,
            Gls_Per_Minutes, G_minus_PK_Per_Minutes, Ast_Per_Minutes, 
            xG_Per_Minutes, npxG_Per_Minutes, xAG_Per_Minutes
        )

    shooting_data_filtered <- shooting_data %>%
        filter(Comp %in% comps & Season %in% seasons) %>%
        select(player_name, Season, Squad, Comp, Sh_Standard, Sh_per_90_Standard, SoT_per_90_Standard)

    gca_data_filtered <- gca_data %>%
        filter(Comp %in% comps & Season %in% seasons) %>%
        select(player_name, Season, Squad, Comp, SCA90_SCA, GCA90_GCA)

    poss_data_filtered <- poss_data %>%
        filter(Comp %in% comps & Season %in% seasons) %>%
        select(player_name, Season, Squad, Comp, Succ_percent_Take_Ons, Final_Third_Carries, CPA_Carries, Dis_Carries)

    misc_data_filtered <- misc_data %>%
        filter(Comp %in% comps & Season %in% seasons) %>%
        select(player_name, Season, Squad, Comp, Won_percent_Aerial_Duels, Off)

    # Använd full_join för att merge alla tre dataframes
    combined_data <- standard_data_filtered %>%
        full_join(shooting_data_filtered, by = c("player_name", "Season", "Squad", "Comp")) %>%
        full_join(gca_data_filtered, by = c("player_name", "Season", "Squad", "Comp")) %>%
        full_join(poss_data_filtered, by = c("player_name", "Season", "Squad", "Comp")) %>%
        full_join(misc_data_filtered, by = c("player_name", "Season", "Squad", "Comp"))
    
    # Lägg till i listan
    all_player_data[[i]] <- combined_data
}

# Kombinera till en datamängd
all_player_data <- bind_rows(all_player_data)

# Namnbyte av kolumner
all_player_data <- all_player_data %>% 
    rename(
        Player = player_name,
        Competition = Comp,
        MinutesPer90 = Mins_Per_90_Time,
        Goals = Gls,
        Assists = Ast,
        npGoals = G_minus_PK,
        xG = xG_Expected,
        npxG = npxG_Expected,
        xAG = xAG_Expected,
        PrgC = PrgC_Progression,
        PrgP = PrgP_Progression,
        PrgR = PrgR_Progression,
        GoalsPer90 = Gls_Per_Minutes,
        npGoalsPer90 = G_minus_PK_Per_Minutes,
        AssistsPer90 = Ast_Per_Minutes,
        xGPer90 = xG_Per_Minutes,
        npxGPer90 = npxG_Per_Minutes,
        xAGPer90 = xAG_Per_Minutes,
        Shots = Sh_Standard,
        ShotsPer90 = Sh_per_90_Standard,
        SoTPer90 = SoT_per_90_Standard,
        SCAPer90 = SCA90_SCA,
        GCAPer90 = GCA90_GCA,
        AerialDuelsWonPercent = Won_percent_Aerial_Duels,
        Offsides = Off,
        SuccessfulTakeOnsPercent = Succ_percent_Take_Ons,
        FinalThirdCarries = Final_Third_Carries,
        CarriesPenaltyArea = CPA_Carries,
        Dispossessed = Dis_Carries
    )

# Ordna efter senaste säsong
all_player_data <- all_player_data %>%
    mutate(StartYear = as.numeric(substr(Season, 1, 4))) %>% # Nödvändigt då Season-kolumnen har formatet XXXX-XXXX
    arrange(desc(StartYear))

all_player_data <- all_player_data %>% select(-StartYear) # Ta bort den skapade kolumnen

# Spara filen
write.csv(all_player_data, "playerstatdf.csv", fileEncoding = "UTF-8")