In [None]:
library(tidyverse)
library(tibble)
library(repr)
library(tidymodels)
library(GGally)
library(ISLR)
options(repr.matrix.max.rows = 6)
source("cleanup.R")

In [None]:
players <- read_csv("players.csv")
players

In [None]:
# Turn experience column into numeric values

clean_players <- players|>
                 mutate(experience_num = case_when(
                    experience == "Pro" ~ 5,
                    experience == "Veteran" ~ 4,
                    experience == "Regular" ~ 3,
                    experience == "Amateur" ~ 2,
                    experience == "Beginner" ~ 1))
                 
                 
clean_players                 

In [None]:
player_split <- initial_split(clean_players, prop = 0.80, strata = played_hours)
player_training <- training(player_split)
player_testing <- testing(player_split)

In [None]:
player_spec <- linear_reg()|>
               set_engine("lm")|>
               set_mode("regression")

player_recipe <- recipe(played_hours ~ experience_num + Age, data = clean_players)

player_fit <- workflow() |>
              add_recipe(player_recipe) |>
              add_model(player_spec) |>
              fit(data = player_training)

player_preds <- player_fit |>
   predict(player_training) |>
   bind_cols(player_training)


player_preds

In [None]:
player_test_results <- player_fit |>
         predict(player_testing) |>
         bind_cols(player_testing) |>
         metrics(truth = played_hours, estimate = .pred)

player_rmspe <- player_test_results |>
          filter(.metric == "rmse") |>
          select(.estimate) |>
          pull()


player_rmspe

In [None]:
player_spec_2 <- nearest_neighbor(weight_func = "rectangular", neighbors = tune()) |> 
       set_engine("kknn") |>
       set_mode("regression") 

player_recipe_2 <- recipe(played_hours ~ experience_num, data = clean_players) |>
       step_scale(all_predictors()) |>
       step_center(all_predictors())

player_vfold <- vfold_cv(player_training, v = 5, strata = played_hours)

player_workflow_2 <- workflow() |>
                     add_recipe(player_recipe_2)|>
                     add_model(player_spec_2) 

player_workflow_2


In [None]:
player_results <- 
       player_workflow_2|>
       tune_grid(resamples = player_vfold, grid = gridvals) |>
       collect_metrics()


player_results

In [None]:
player_min <- player_results |>
    filter(.metric == "rmse") |>
    slice_min(std_err, n = 1)

player_min

In [None]:
k_min <- player_min |>
         pull(neighbors)

player_best_spec <- nearest_neighbor(weight_func = "rectangular", neighbors = k_min) |>
          set_engine("kknn") |>
          set_mode("regression")

player_best_fit <- workflow() |>
          add_recipe(player_recipe_2) |>
          add_model(player_best_spec) |>
          fit(data = player_training)

player_summary <- player_best_fit |>
           predict(player_testing) |>
           bind_cols(player_testing) 
          
player_summary_2 <- player_summary|>
                    metrics(truth = played_hours, estimate = .pred)

player_summary
player_summary_2