In [None]:
library(tidyverse)
library(repr)
library(readxl)
library(tidymodels)
source("cleanup.R")
options(repr.matrix.max.rows = 6)

## Title

## Methods and Results

In [None]:
#Loading in the Data and cleaning/wrangling

player_data <- read_csv("players.csv")|>
            select(subscribe, played_hours, Age)|>
            filter(!is.na(Age), !is.na(played_hours))

player_split <-initial_split(player_data, prop = 3/4, strata = subscribe)
player_train <- training(player_split)
player_test <- testing(player_split)

In [None]:
#summary of data relevant to analysis

nrow(player_data)
ncol(player_data)

summary(player_data)

**Summary Table of Variables from Player_data**

|Variable Name|Data Type|Description/Meaning|Summary Statistics/Values|
|:-------------:|:---------:|:-------------------:|:-------------------------:|
|subscribe| logical | If the player is subscribed to the magazine or not| True = 142, False = 52|
|played_hours| numeric | Total hours played by each player | Mean = 5.95, Median = 0.10, Min = 0.00, Max = 223.10| 
|Age| numeric | Player's age in years | Mean = 21.14, Median = 19.00, Min = 9.00, Max = 58.00|

Total number of columns: 3 <br>
Total number of rows: 194

In [None]:
# Visualization of exploratory data analysis

options(repr.plot.width = 14, repr.plot.height = 8)

age_histogram <- player_data |>
    ggplot(aes(x = Age)) +
    geom_histogram(binwidth = 5) +
    labs(x = "Age of Players (Years)",
         y = "Number of Players") +
    ggtitle("Distribution of Player Age")

age_histogram

In [None]:
options(repr.plot.width = 14, repr.plot.height = 8)

played_hours_histogram <- player_data |>
    ggplot(aes(x = played_hours)) +
    geom_histogram(binwidth = 30) +
    labs(x = "Hours Played (hrs)",
         y = "Number of Players") +
    ggtitle("Distribution of Hours Played")

played_hours_histogram

In [None]:
Hours_vs_age <- player_data |>
    ggplot(aes(x = Age, y = played_hours, color = subscribe)) +
    geom_point() +
    labs(x = "Age of Players (yrs)",
         y = "Hours played (hrs)",
         color = "Subscribed or Not") +
    ggtitle("Age of Players vs Hours played and if Subscribed")

Hours_vs_age