In [1]:
library(tidyverse)
library(repr)
library(tidymodels)


── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mggplot2  [39m 3.5.1     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34mtidyr    [39m 1.3.1
[32m✔[39m [34mpurrr    [39m 1.0.2     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors
── [1mAttaching packages[22m ────────────────────────────────────── tidymodels 1.1.1 ──

[32m✔[39m [34mbroom       [39m 1.0.6     [32m✔[39m [34mrsample     [39

In [5]:
# reading in data
players_data<- read_csv("players.csv") |>
               mutate(gender = as_factor(gender))

[1mRows: [22m[34m196[39m [1mColumns: [22m[34m9[39m
[36m──[39m [1mColumn specification[22m [36m────────────────────────────────────────────────────────[39m
[1mDelimiter:[22m ","
[31mchr[39m (4): experience, hashedEmail, name, gender
[32mdbl[39m (2): played_hours, age
[33mlgl[39m (3): subscribe, individualId, organizationName

[36mℹ[39m Use `spec()` to retrieve the full column specification for this data.
[36mℹ[39m Specify the column types or set `show_col_types = FALSE` to quiet this message.


In [None]:
## set.seed(9456)

options(repr.plot.width=10, repr.plot.height=6)

## filtering people with zero , selected wanted parameters
players <- filter(players_data, played_hours != 0) |> 
                         select(played_hours, age, gender) |>
                         arrange(desc(age))

## creating the train/test split

players_split <- initial_split(players, prop = 0.75, strata = gender)
players_train <- training(players_split)
players_test <- testing(players_split)

## Preprocessing the data

players_recipe <- recipe(gender ~ played_hours + gender, data = players_train) |>
    step_scale(all_predictors()) |>
    step_center(all_predictors())

## Trainig classifier

knn_spec <- nearest_neighbor(weight_func = "rectangular" , 
                             neighbors = 5) |> 
     set_engine("kknn") |>
     set_mode("classification")

knn_fit <- workflow() |> 
           add_recipe(players_recipe) |>
           add_model(knn_spec) |>
           fit(players_train)

## making predictions

gender_predicted <- predict(knn_fit, players_train) |>
                    bind_cols(players_train)
        
# compute the accuracy
acc <- gender_predicted |>
  metrics(truth = gender, estimate = .pred_class) |>
  filter(.metric == "accuracy") |>
  select(.estimate) |>
  pull()
acc
