In [1]:
library(repr)
library(tidyverse)
library(tidymodels)
library(recipe)
options(repr.matrix.max.rows = 10)
source("cleanup.R")

── [1mAttaching core tidyverse packages[22m ──────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mggplot2  [39m 3.5.1     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34mtidyr    [39m 1.3.1
[32m✔[39m [34mpurrr    [39m 1.0.2     
── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mℹ[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors
── [1mAttaching packages[22m ────────────────────────────────────── tidymodels 1.1.1 ──

[32m✔[39m [34mbroom       [39m 1.0.6     [32m✔[39m [34mrsample     [39

ERROR: Error in library(recipe): there is no package called ‘recipe’


In [None]:
players<-read_csv("players.csv")
players

In [None]:
players<-players|>
select(experience, subscribe, played_hours)
players

In [None]:
options(repr.plot.width = 10, repr.plot.height = 10) 
player_experience_plot<-players|>
ggplot(aes(x=experience, fill=subscribe))+
geom_bar(position="fill")+
labs(x="Player Experience Level", y="Proportion", fill="Subscribed")+
scale_fill_manual(values = c("TRUE" = "purple", "FALSE" = "pink")) +
ggtitle("Player Experience Level vs. Subscribers (Figure 1)")+
theme(plot.title = element_text(hjust = 0.5, size = 18, face = "bold"),  
axis.title = element_text(size = 14),                             
axis.text = element_text(size = 13))
player_experience_plot

In [None]:
options(repr.plot.width = 10, repr.plot.height = 10) 
hours_played_plot<-players|>
ggplot(aes(x=played_hours, fill=subscribe))+
geom_histogram(position="dodge", binwidth=30)+
labs(x="Hours Played", y="Proportion", fill="Subscribed")+
scale_fill_manual(values = c("TRUE" = "purple", "FALSE" = "pink")) +
ggtitle("Hours Played vs. Subscribers (Figure 2)")+
theme(plot.title = element_text(hjust = 0.5, size = 18, face = "bold"),  
axis.title = element_text(size = 14),                             
axis.text = element_text(size = 13))
hours_played_plot

In [None]:
players <- players|>
mutate(subscribe = factor(subscribe, levels = c("FALSE", "TRUE")))
players

In [None]:
data_split<-initial_split(players, prop = 0.8, strata = subscribe)
data_split
training_data<-training(data_split)
training_data
testing_data<-testing(data_split)
testing_data

In [None]:
subscribe_recipe<-recipe(subscribe~experience+played_hours, data=training_data)|>
step_scale(all_predictors())|>
step_center(all_predictors())
subscribe_recipe

In [None]:
subscribe_spec<-nearest_neighbor(weight_func="rectangular", neighbors=tune())|>
set_engine("kknn")|>
set_mode("classification")
subscribe_spec

In [None]:
subscribe_workflow<-workflow()|>
add_recipe(subscribe_recipe)|>
add_model(subscribe_spec)|>
fit(data=training_data)
subscribe_workflow

In [None]:
subscribe_folds<-vfold_cv(training_data, v = 5, strata = subscribe)
subscribe_folds

In [None]:
subscribe_resample_fit<-workflow()|>
add_recipe(fruit_recipe)|>
add_model(knn_spec)|>
fit_resamples(resamples=fruit_vfold)
fruit_resample_fit