In [16]:
library(tidyverse)
library(tidymodels)
library(repr)
library(ggplot2)
library(lubridate)
library(forcats)
library(tidyr)

In [46]:
players <- read_csv("data/players.csv")

[1mRows: [22m[34m196[39m [1mColumns: [22m[34m7[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (4): experience, hashedEmail, name, gender
[32mdbl[39m (2): played_hours, Age
[33mlgl[39m (1): subscribe

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


In [47]:
players_cleaned <- players |> filter(!is.na(experience), !is.na(subscribe), !is.na(hashedEmail), !is.na(played_hours), !is.na(name), !is.na(gender), !is.na(Age))

In [67]:
player_sessions <- players_cleaned |> group_by(hashedEmail) |> summarize(number_of_sessions = n(), .groups = "drop")
player_combined <- players_cleaned |> left_join (player_sessions, by = "hashedEmail") |> filter (!is.na(number_of_sessions)) |> select(experience, subscribe, played_hours, number_of_sessions) |> mutate(experience = as.factor(experience), subscribe = as.factor(subscribe))
head(player_combined)

experience,subscribe,played_hours,number_of_sessions
<fct>,<fct>,<dbl>,<int>
Pro,True,30.3,1
Veteran,True,3.8,1
Veteran,False,0.0,1
Amateur,True,0.7,1
Regular,True,0.1,1
Amateur,True,0.0,1


In [78]:
knn_spec <- nearest_neighbor(weight_func = "rectangular", neighbors = 5) |> set_engine("kknn") |> set_mode("classification")
knn_fit <- knn_spec |> fit(subscribe ~ experience + played_hours + number_of_sessions, data = player_combined)
player_recipe <- recipe(subscribe ~ experience + played_hours + number_of_sessions, data = player_combined) |> step_scale(played_hours)|>
step_center(played_hours) |> prep()
scaled_player <- bake(player_recipe, player_combined)
scaled_player

experience,played_hours,number_of_sessions,subscribe
<fct>,<dbl>,<int>,<fct>
Pro,0.85603962,1,TRUE
Veteran,-0.07385234,1,TRUE
Veteran,-0.20719534,1,FALSE
Amateur,-0.18263216,1,TRUE
Regular,-0.20368631,1,TRUE
Amateur,-0.20719534,1,TRUE
Regular,-0.20719534,1,TRUE
Amateur,-0.20719534,1,FALSE
Amateur,-0.20368631,1,TRUE
Veteran,-0.20719534,1,TRUE
