# TITLE #

Members:

In [None]:
#Loading needed libraries
library(tidyverse)
library(dplyr)

In [None]:
#Reading in .csv files
options(repr.matrix.max.rows = 6)
read_csv('https://raw.githubusercontent.com/Yamasaur/DSCI-100-Group-Project-Final-Report-Group-10-/refs/heads/main/players.csv')
read_csv('https://raw.githubusercontent.com/Yamasaur/DSCI-100-Group-Project-Final-Report-Group-10-/refs/heads/main/sessions.csv')

# Intro #

Description of each variable is listed in the following table.

|Variable:|Data type:|Possible Values:|Desciption:|
|:----|:----|:----|:----|
| experience | character (chr) | Beginner, Amateur, Regular, Veteran, Pro | The self-declared skill level of the player |
| subscribe | logical (lgl) | TRUE, FALSE | Whether or not the player is subscribed |
| hashedEmail | character (chr) | N/A | A unique sequence of numbers and letters providing a secure way to represent a players E-mail |
| played_hours | double (dbl) | N/A | The number of hours a player has put into the server |
| name | character (chr) | N/A | The name of the player |
| gender | character (chr) | Male, Female, Non-binary, Two-Spirited, Prefer not to say, Other, Agender | Gender of the player |
| Age | double (dbl) | N/A | Age of the player |

* **Number of observations?**
  
    196, determined as there are 196 rows in the dataset

* **Number of Variables?**

    7, determined as there are 7 columns

* **Summary Statistics:** (Code/calculations in the following cell)

<br>

Distribution of experience levels:

|Value:|Percentage:|Count:|
|:----|:----|:----|
| Beginner | 17.86% | (35 players) |
| Amateur | 32.14% | (63 players) |
| Regular | 18.37% | (36 players) |
| Veteran | 24.49% | (48 Players) |
| Pro | 7.14% | (14 players) |

<br>Distribution of subscribed and unsubscribed players:

|Status:|Percentage:|Count:|
|:----|:----|:----|
| Subscribed | 73.47% | (144 players) |
| Unsuscribed | 26.53% | (52 players) |

<br>Lowest, Median, Mean, and Highest Playtime (hours):

|Lowest:|Median:|Mean:|Highest:|
|:----|:----|:----|:----|
| 0 | 0.1 | 5.85 | 233.1 |

<br>Distribution of Gender

|Gender:|Percentage:|Count:|
|:----|:----|:----|
| Male | 63.27 | (124 players) |
| Female | 18.88% | (37 players) |
| Non-Binary | 7.65% | (15 players) |
| Two-Spirited | 3.06% | (6 players) |
| Other | 0.51% | (1 players) |
| Agender | 1.02% | (2 players) |
| Prefer not to say | 5.61% | (11 players) |

<br>Lowest, Median, Mean, and Highest age (years):

|Lowest:|Median:|Mean:|Highest:|
|:----|:----|:----|:----|
| 9 | 19 | 21.14 | 58 |

<br>

# Method and Results #

In [None]:
#Summary Statistics: (Please run the cell at the very top before proceeding! Also, to see the outputs uncomment the last line of each function i.e to see the value of num_Beginner,
#                     uncomment the line #num_Beginner at the end of its respective code.)

#--------------- Distribution of experience levels: ---------------#

#number of Beginner
num_Beginner <- players_csv |>
    filter(experience == 'Beginner') |>
    select(experience) |>
    nrow()
num_Beginner_perc <- round(((num_Beginner/196) * 100),2)
#num_Beginner
#num_Beginner_perc
    
#number of Amateur
num_Amateur <- players_csv |>
    filter(experience == 'Amateur') |>
    select(experience) |>
    nrow()
num_Amateur_perc <- round(((num_Amateur/196) * 100),2)
#num_Amateur
#num_Amateur_perc

#number of Regular
num_Regular <- players_csv |>
    filter(experience == 'Regular') |>
    select(experience) |>
    nrow()
num_Regular_perc <- round(((num_Regular/196) * 100),2)
#num_Regular
#num_Regular_perc

#number of Veteran
num_Veteran <- players_csv |>
    filter(experience == 'Veteran') |>
    select(experience) |>
    nrow()
num_Veteran_perc <- round(((num_Veteran/196) * 100),2)
#num_Veteran
#num_Veteran_perc

#number of Pro
num_Pro <- players_csv |>
    filter(experience == 'Pro') |>
    select(experience) |>
    nrow()
num_Pro_perc <- round(((num_Pro/196) * 100),2)
#num_Pro
#num_Pro_perc

#------- Distribution of suscribed and unsuscribed players: -------#

#number of suscribed
num_sub <- players_csv |>
    filter(subscribe == TRUE) |>
    select(subscribe) |>
    nrow()
num_sub_perc <- round(((num_sub/196) * 100),2)
#num_sub
#num_sub_perc

#number unsuscribed
num_unsub <- players_csv |>
    filter(subscribe == FALSE) |>
    select(subscribe) |>
    nrow()
num_unsub_perc <- round(((num_unsub/196) * 100),2)
#num_unsub
#num_sub_perc

#----------- Lowest, Median, Mean, and Highest Playtime: ----------#

#lowest playtime
low_pt <- players_csv |>
    select(played_hours) |>
    map_df(min, na.rm = TRUE) |>
    pull()
#low_pt

#median playtime
med_pt <- players_csv |>
select(played_hours) |>
    map_df(median, na.rm = TRUE) |>
    pull() |>
    round(2)
#med_pt

#mean playtime
mean_pt <- players_csv |>
    select(played_hours) |>
    map_df(mean, na.rm = TRUE) |>
    pull() |>
    round(2)
#mean_pt

#highest playtime
high_pt <- players_csv |>
    select(played_hours) |>
    map_df(max, na.rm = TRUE) |>
    pull()
#high_pt

#-------------------- Distribution of Gender: ---------------------#

#number of male
num_male <- players_csv |>
    filter(gender == 'Male') |>
    select(gender) |>
    nrow()
num_male_perc <- round(((num_male/196) * 100),2)
#num_male
#num_male_perc

#number of female
num_female <- players_csv |>
    filter(gender == 'Female') |>
    select(gender) |>
    nrow()
num_female_perc <- round(((num_female/196) * 100),2)
#num_female
#num_female_perc

#number of Non-binary
num_nb <- players_csv |>
    filter(gender == 'Non-binary') |>
    select(gender) |>
    nrow()
num_nb_perc <- round(((num_nb/196) * 100),2)
#num_nb
#num_nb_perc

#number of Two-Spirited
num_ts <- players_csv |>
    filter(gender == 'Two-Spirited') |>
    select(gender) |>
    nrow()
num_ts_perc <- round(((num_ts/196) * 100),2)
#num_ts
#num_ts_perc

#number of Other
num_oth <- players_csv |>
    filter(gender == 'Other') |>
    select(gender) |>
    nrow()
num_oth_perc <- round(((num_oth/196) * 100),2)
#num_oth
#num_oth_perc

#number of Agender
num_agn <- players_csv |>
    filter(gender == 'Agender') |>
    select(gender) |>
    nrow()
num_agn_perc <- round(((num_agn/196) * 100),2)
#num_agn
#num_agn_perc

#number prefer not to say
num_pns <- players_csv |>
    filter(gender == 'Prefer not to say') |>
    select(gender) |>
    nrow()
num_pns_perc <- round(((num_pns/196) * 100),2)
#num_pns
#num_pns_perc

#------------- Lowest, Median, Mean, and Highest Age: -------------#

#lowest age
low_age <- players_csv |>
    select(Age) |>
    map_df(min, na.rm = TRUE) |>
    pull()
#low_age

#median age
med_age <- players_csv |>
    select(Age) |>
    map_df(median, na.rm = TRUE) |>
    pull() |>
    round(2)
#med_age

#mean age
mean_age <- players_csv |>
    select(Age) |>
    map_df(mean, na.rm = TRUE) |>
    pull() |>
    round(2)
#mean_age

#highest age
high_age <- players_csv |>
    select(Age) |>
    map_df(max, na.rm = TRUE) |>
    pull()
#high_age

# Disscussion #

# references #