## Data Visualization

In [None]:
# import library
library(tidyverse)
library(ggplot2)
library(stringr)



In [None]:
# helper functions

to_bool <- function(x) {ifelse(x == 'True', TRUE, FALSE)}


In [None]:
# set global variables

BIN_WIDTH = 2.0


In [None]:
# read csv
raw_df <- read.csv('data/mds-retention_2019-04-04.csv', stringsAsFactors = FALSE)

#print(head(raw_df))


In [None]:
# data cleaning
# remove second row as irrlevant
raw_df <- raw_df[-2,]

# get only the questions
raw_df <- raw_df %>% select(matches("Q[0-9]"))



questions <- unname(unlist(raw_df[1,]))

raw_df <- raw_df[-1,]
names(raw_df) <- questions

# %
#  get deciding factors
d_factors <- raw_df[,1:5]

# convert character to numeric for hours
d_factors[,2:5] <- sapply( d_factors[,2:5], as.numeric )

retentions <- raw_df[,6:ncol(raw_df)]
# convert character to bool for retention question
retentions <- retentions %>%
  mutate_all(to_bool)



In [None]:
# save clean data
clean_df <- d_factors %>%
  cbind(retentions)
cat(sprintf("\n========>saving clean data to result\n\n"))
write.csv(clean_df , file = 'result/clean_data.csv')


In [None]:
# make plot for deciding factors
discrete_d_factor_plt <- d_factors[,c(-4,-5)] %>%
  gather(key="questions", value="answers") %>%
  ggplot(aes(x = answers)) +
  geom_bar( stat = 'count') +
  facet_wrap(~questions,scales = "free", ncol=1)

ggsave(filename="discrete_deciding_factors.png",
  plot=discrete_d_factor_plt,
  path='img')


continuois_d_factors_hist <- d_factors[,c(4,5)] %>%
    gather(key="questions", value="answers") %>%
    ggplot(aes(x = answers)) +
    geom_histogram(binwidth=BIN_WIDTH) +
    facet_wrap(~questions,scales = "free", ncol=1)

ggsave(filename="continuous_deciding_factors_hist.png",
  plot=continuois_d_factors_hist,
  path='img')


continuois_d_factors_preq <- d_factors[,c(4,5)] %>%
    gather(key="questions", value="answers") %>%
    ggplot(aes(x = answers)) +
    geom_freqpoly(binwidth=BIN_WIDTH) +
    facet_wrap(~questions,scales = "free", ncol=1)

ggsave(filename="continuous_deciding_factors_freqp.png",
  plot=continuois_d_factors_preq,
  path='img')


In [None]:
# plot the retention questions
#summary(retentions)

retentions <- sapply( retentions, as.character )

retentions <- as.data.frame(retentions)

retentions_plot <- retentions %>%
  gather(key="questions", value="answers") %>%
  mutate(questions = str_wrap(questions, width =  30)) %>%
  ggplot(aes(x = answers)) +
  geom_bar(stat="count") +
  facet_wrap(~questions,scales = "free", ncol=3)

ggsave(filename="retentions.png",
  plot=retentions_plot,
  path='img')