Skip to content

Latest commit

 

History

History
321 lines (252 loc) · 9.59 KB

turns_etc.md

File metadata and controls

321 lines (252 loc) · 9.59 KB

KDEs, GMMs, and turns

isd_plots <- ggplot(data = filter(all_isds, sim %in% c(NA, 1, 10, 20, 30, 40, 50, 60, 70, 80, 90)), aes(x = wgt)) +
  geom_density() +
  theme_bw() +
  facet_grid(cols = vars(sim), rows = vars(source), scales = "free_y", drop = T) +
  xlim(0, 300) +
  ggtitle("ISDs")
isd_plots
## Warning: Removed 258 rows containing non-finite values (stat_density).

kde_plots <- ggplot(data = filter(all_kdes, sim %in% c(NA, 1, 10, 20, 30, 40, 50, 60, 70, 80, 90)), aes(x = wgt, y = density)) +
  geom_point() +
  theme_bw() +
  facet_grid(cols = vars(sim), rows = vars(source), scales = "free_y", drop = T) +
  xlim(0, 300) +
  ggtitle("KDEs")
kde_plots
## Warning: Removed 1610 rows containing missing values (geom_point).

gmm_plots <- ggplot(data = filter(all_gmms, sim %in% c(NA, 1, 10, 20, 30, 40, 50, 60, 70, 80, 90)), aes(x = wgt, y = density)) +
  geom_point() +
  theme_bw() +
  facet_grid(cols = vars(sim), rows = vars(source), scales = "free_y", drop = T) +
  xlim(0, 300) +
  ggtitle("GMMss")
gmm_plots
## Warning: Removed 1610 rows containing missing values (geom_point).

find_most_density <- function(density_vector, cutoff = .9) {
  
  density_df <- data.frame(initial_order = 1:length(density_vector),
                           density = density_vector)
  
  density_df <- density_df %>%
    arrange(desc(density)) %>%
    mutate(density_index = row_number())
  
  running_sum <- 0
  for(i in 1:nrow(density_df)) {
    running_sum <- running_sum + density_df$density[i]
    if(running_sum >= cutoff) {
      break
    }
  }
  
  density_df$in_most <- (density_df$density_index <= i)
  
  density_df <- density_df %>%
    arrange(initial_order)
  
  return(density_df$in_most)
}

all_gmms <- all_gmms %>%
  group_by(sim, source) %>%
  mutate(in_most = find_most_density(density, cutoff = .9)) %>%
  ungroup()

all_kdes <- all_kdes %>%
  group_by(sim, source) %>%
  mutate(in_most = find_most_density(density, cutoff = .9)) %>%
  ungroup()
kde_plots <- ggplot(data = filter(all_kdes, sim %in% c(NA, 1, 10, 20, 30, 40, 50, 60, 70, 80, 90)), aes(x = wgt, y = density, color = in_most)) +
  geom_point() +
  theme_bw() +
  facet_grid(cols = vars(sim), rows = vars(source), scales = "free_y", drop = T) +
  xlim(0, 300) +
  ggtitle("KDEs")+
  scale_color_viridis_d(end = .6)
kde_plots
## Warning: Removed 1610 rows containing missing values (geom_point).

gmm_plots <- ggplot(data = filter(all_gmms, sim %in% c(NA, 1, 10, 20, 30, 40, 50, 60, 70, 80, 90)), aes(x = wgt, y = density, color = in_most)) +
  geom_point() +
  theme_bw() +
  facet_grid(cols = vars(sim), rows = vars(source), scales = "free_y", drop = T) +
  xlim(0, 300) +
  ggtitle("GMMs") +
  scale_color_viridis_d(end = .6)
gmm_plots
## Warning: Removed 1610 rows containing missing values (geom_point).

all_kdes <- all_kdes %>%
  mutate(smoother = "kde") 
all_gmms <- all_gmms %>% 
  mutate(smoother = "gmm")

all <- bind_rows(all_kdes, all_gmms)


summarize_chunks <- function(in_most_df) {
  
  nchunks <- 0
  in_most_df$chunk <- NA
  
  for(i in 1:nrow(in_most_df)) {
    if(i == 1) {
      if(in_most_df$in_most[i]) {
        nchunks <- nchunks + 1
      }
    } else if(all(in_most_df$in_most[i], in_most_df$in_most[i - 1] == FALSE)) {
      nchunks <- nchunks + 1
    }
    
    if(all(nchunks > 0, in_most_df$in_most[i])) {
    in_most_df$chunk[i] <- nchunks
    }
  }
  
  min_in_most <- min(in_most_df[ which(in_most_df$in_most), "density"])
  max_in_most <- max(in_most_df[ which(in_most_df$in_most), "density"])
  
  edges_ratio <- max_in_most/min_in_most
  

  in_most_evenness <- in_most_df %>%
    filter(in_most) %>%
    group_by(chunk) %>%
    summarize(chunk_sum = sum(density)) %>%
    ungroup()
  
  chunk_dom <- vegan::diversity(in_most_evenness$chunk_sum, index = "simpson")
  
  
  in_most_evenness_cutoff <- in_most_evenness %>%
    filter(chunk_sum / sum(in_most_evenness$chunk_sum) >= .1)
  
  cutoff_chunk_dom = vegan::diversity(in_most_evenness_cutoff$chunk_sum, index = "simpson")
  
  nchunks_cutoff <- nrow(in_most_evenness_cutoff)
  
  output <- data.frame(sim = in_most_df$sim[1], source = in_most_df$source[1], smoother = in_most_df$smoother[1], nchunks = nchunks, edges_ratio = edges_ratio, chunk_dominance = chunk_dom, cutoff_dom = cutoff_chunk_dom, nchunks_cutoff = nchunks_cutoff, stringsAsFactors = F)
  
  return(output)
}
# 
# add_chunk <- function(in_most_df) {
#   
#   nchunks <- 0
#   in_most_df$chunk <- NA
#   
#   for(i in 2:nrow(in_most_df)) {
#     if(all(in_most_df$in_most[i], in_most_df$in_most[i - 1] == FALSE)) {
#       nchunks <- nchunks + 1
#     }
#     
#     if(nchunks > 0) {
#     in_most_df$chunk[i] <- nchunks
#     }
#   }
#   return(in_most_df)
# }

all_chunks <- list() 

for(i in 1:min(30, length(unique(all$sim)))) {
  for(j in 1:length(unique(all$smoother))) {
    for(k in 1:length(unique(all$source))) {
      this_df <- filter(all, sim == unique(all$sim)[i],
                        smoother == unique(all$smoother)[j],
                        source == unique(all$source)[k])
      if(nrow(this_df) < 1) {
        next
      }
      
      all_chunks[[length(all_chunks) + 1]] <- summarize_chunks(this_df)
    }
  }
}

all_chunks <- bind_rows(all_chunks)

all_chunks <- all_chunks %>%
  arrange(desc(nchunks)) %>%
  mutate(chunk_rank = row_number()) %>%
  arrange(desc(edges_ratio)) %>%
  mutate(edges_rank = row_number()) %>%
  group_by(smoother, nchunks) %>%
  arrange(desc(edges_ratio)) %>%
  mutate(within_nchunks_e_r = row_number()) %>%
  arrange(desc(chunk_dominance)) %>%
  mutate(dominance_rank = row_number()) %>%
  ungroup() %>%
  group_by(smoother, nchunks_cutoff) %>%
  arrange(desc(cutoff_dom)) %>%
  mutate(cutoff_dominance_rank = row_number()) %>%
  ungroup()


all <- left_join(all, all_chunks, by = c("sim", "smoother", "source")) %>%
  mutate(nchunks = as.factor(nchunks),
         nchunks_cutoff = as.factor(nchunks_cutoff)) %>%
  group_by(sim, smoother, source) %>%
  mutate(p_sd = sd(density)) %>%
  ungroup()
nchunks_plot <- ggplot(data = filter(all, chunk_rank %% 10 == 1), aes(x = wgt, y = density, color = nchunks)) +
  geom_point(size = .5) +
  theme_bw() +
  scale_color_viridis_d(end = .8) +
  facet_wrap(vars(chunk_rank))
gmm_er_plot <- ggplot(data =filter(all, smoother == "gmm", as.numeric(nchunks) > 1, in_most), aes(x = wgt, y = density, color = chunk_dominance, alpha = in_most)) +
  geom_point() +
  theme_bw() +
  scale_color_viridis_c(end = .8) +
  facet_grid(rows = vars(nchunks), cols = vars(dominance_rank), scales= "free") +
  ggtitle("GMMs")
gmm_er_plot
## Warning: Using alpha for a discrete variable is not advised.

gmm_er_plot_nou <- ggplot(data =filter(all, smoother == "gmm", source != "uniform", in_most), aes(x = wgt, y = density, color = chunk_dominance, alpha = in_most)) +
  geom_point() +
  theme_bw() +
  scale_color_viridis_c(end = .8) +
  facet_grid(rows = vars(nchunks), cols = vars(dominance_rank), scales= "free") +
  ggtitle("GMMs")
gmm_er_plot_nou
## Warning: Using alpha for a discrete variable is not advised.

kde_er_plot <- ggplot(data =filter(all, smoother == "kde", as.numeric(nchunks) > 1,in_most), aes(x = wgt, y = density, color = chunk_dominance, alpha = in_most)) +
  geom_point() +
  theme_bw() +
  scale_color_viridis_c(end = .8) +
  facet_grid(rows = vars(nchunks), cols = vars(dominance_rank), scales= "free_x") +
  ggtitle("KDEs")
kde_er_plot
## Warning: Using alpha for a discrete variable is not advised.

kde_er_plot_nou <- ggplot(data =filter(all, smoother == "kde", source != "uniform",in_most), aes(x = wgt, y = density, color = chunk_dominance, alpha = in_most)) +
  geom_point() +
  theme_bw() +
  scale_color_viridis_c(end = .8) +
  facet_grid(rows = vars(nchunks), cols = vars(dominance_rank), scales= "free_x") +
  ggtitle("KDEs")
kde_er_plot_nou
## Warning: Using alpha for a discrete variable is not advised.

try removing low-value chunks

gmm_er_plot_co <- ggplot(data =filter(all, smoother == "gmm", source != "uniform"), aes(x = wgt, y = density, color = cutoff_dom, alpha = in_most)) +
  geom_point() +
  theme_bw() +
  scale_color_viridis_c(end = .8) +
  facet_grid(rows = vars(nchunks_cutoff), cols = vars(cutoff_dominance_rank), scales= "free") +
  ggtitle("GMMs")
gmm_er_plot_co
## Warning: Using alpha for a discrete variable is not advised.

kde_er_plot_co <- ggplot(data =filter(all, smoother == "kde", as.numeric(nchunks) > 1,in_most), aes(x = wgt, y = density, color = cutoff_dom, alpha = in_most)) +
  geom_point() +
  theme_bw() +
  scale_color_viridis_c(end = .8) +
  facet_grid(rows = vars(nchunks_cutoff), cols = vars(cutoff_dominance_rank), scales= "free_x") +
  ggtitle("KDEs")
kde_er_plot_co
## Warning: Using alpha for a discrete variable is not advised.