In [1]:
library(tidyverse)
library(repr)
library(tidymodels)

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mggplot2  [39m 3.5.1     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34mtidyr    [39m 1.3.1
[32m✔[39m [34mpurrr    [39m 1.0.2     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors
── [1mAttaching packages[22m ────────────────────────────────────── tidymodels 1.1.1 ──

[32m✔[39m [34mbroom       [39m 1.0.6     [32m✔[39m [34mrsample     [39

In [6]:
players <- read_csv("https://raw.githubusercontent.com/crmnwn06/minecraft-data/refs/heads/main/players.csv" )
head(players)

[1mRows: [22m[34m196[39m [1mColumns: [22m[34m7[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (4): experience, hashedEmail, name, gender
[32mdbl[39m (2): played_hours, Age
[33mlgl[39m (1): subscribe

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


experience,subscribe,hashedEmail,played_hours,name,gender,Age
<chr>,<lgl>,<chr>,<dbl>,<chr>,<chr>,<dbl>
Pro,True,f6daba428a5e19a3d47574858c13550499be23603422e6a0ee9728f8b53e192d,30.3,Morgan,Male,9
Veteran,True,f3c813577c458ba0dfef80996f8f32c93b6e8af1fa939732842f2312358a88e9,3.8,Christian,Male,17
Veteran,False,b674dd7ee0d24096d1c019615ce4d12b20fcbff12d79d3c5a9d2118eb7ccbb28,0.0,Blake,Male,17
Amateur,True,23fe711e0e3b77f1da7aa221ab1192afe21648d47d2b4fa7a5a659ff443a0eb5,0.7,Flora,Female,21
Regular,True,7dc01f10bf20671ecfccdac23812b1b415acd42c2147cb0af4d48fcce2420f3e,0.1,Kylie,Male,21
Amateur,True,f58aad5996a435f16b0284a3b267f973f9af99e7a89bee0430055a44fa92f977,0.0,Adrian,Female,17


In [41]:
clean_player <- players |> 
select(subscribe,played_hours,Age) |>
mutate(subscribe=as_factor(subscribe)) |>
na.omit()
head(clean_player)

subscribe,played_hours,Age
<fct>,<dbl>,<dbl>
True,30.3,9
True,3.8,17
False,0.0,17
True,0.7,21
True,0.1,21
True,0.0,17


In [20]:
set.seed(2025) 
player_split <- initial_split(clean_player, prop = 0.8, strata = subscribe)
player_train <- training(player_split)
player_test <- testing(player_split)

In [21]:
spec <- nearest_neighbor(weight_func = "rectangular", neighbors = tune()) |>
  set_engine("kknn") |>
  set_mode("classification")

In [22]:
recipe <- recipe(subscribe ~ ., data = player_train) |>
  step_scale(all_predictors()) |>
  step_center(all_predictors())

In [45]:
k_vals <- tibble(neighbors = seq(from = 1, to = 30, by = 1))

In [46]:
player_fold <- vfold_cv(player_train, v = 5, strata = subscribe)

In [47]:
knn_results <- workflow() |>
  add_recipe(recipe) |>
  add_model(spec) |>
  tune_grid(resamples = player_fold, grid = k_vals) |>
  collect_metrics()

accuracies <- knn_results |>
  filter(.metric == "accuracy")

accuracies
#19 is best K 

neighbors,.metric,.estimator,mean,n,std_err,.config
<dbl>,<chr>,<chr>,<dbl>,<int>,<dbl>,<chr>
1,accuracy,binary,0.4872984,5,0.021885027,Preprocessor1_Model01
2,accuracy,binary,0.4872984,5,0.021885027,Preprocessor1_Model02
3,accuracy,binary,0.5385081,5,0.029613802,Preprocessor1_Model03
4,accuracy,binary,0.5185081,5,0.034894319,Preprocessor1_Model04
5,accuracy,binary,0.583078,5,0.032368318,Preprocessor1_Model05
6,accuracy,binary,0.583078,5,0.032368318,Preprocessor1_Model06
7,accuracy,binary,0.5903763,5,0.024304465,Preprocessor1_Model07
8,accuracy,binary,0.5837097,5,0.022180205,Preprocessor1_Model08
9,accuracy,binary,0.5837097,5,0.022180205,Preprocessor1_Model09
10,accuracy,binary,0.5703763,5,0.023688333,Preprocessor1_Model10
