## Plotting the ATAC predictions

##### ATAC predictions are made with diffent models, trimmed and not trimmed histograms, gaussian smoothed and not gaussian smoothed individuals etc.

In [1]:
library(tidyverse)

Registered S3 method overwritten by 'rvest':
  method            from
  read_xml.response xml2
── Attaching packages ─────────────────────────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
✔ ggplot2 3.3.5     ✔ purrr   0.3.4
✔ tibble  3.1.4     ✔ dplyr   1.0.7
✔ tidyr   1.1.3     ✔ stringr 1.4.0
✔ readr   1.3.1     ✔ forcats 0.4.0
── Conflicts ────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()


### Observed ATAC values

In [None]:
ATAC_obs <- read.csv("data/sum_control_normalized_ATAC.csv") %>% select(bin, ATAC_val)
colnames(ATAC_obs) <- c("bin", "ATAC_observed")
head(ATAC_obs)

In [None]:
# helper function for formatting the input predictions files

format_input <- function(input_pred, individual){
    ind_pred <- input_pred %>% filter(sample == individual) %>% pivot_longer(!sample, values_to = "ATAC_prediction", names_to = "bin")
    ind_pred <- merge(ind_pred, ATAC_obs, by = "bin")
    
    ind_pred$bin <- sub('...', '', ind_pred$bin)
    ind_pred <- ind_pred %>% separate(bin, c("chr", "bin_number"), sep = "_")

    ind_pred$chr <- as.numeric(ind_pred$chr)
    ind_pred$bin_number <- as.numeric(ind_pred$bin_number)
    ind_pred <- ind_pred %>% mutate(position = bin_number*250000)
    ind_pred <- ind_pred %>% arrange(chr, position)
    return(ind_pred)
}

### Lasso predictions

In [None]:
lasso_pred <- read.csv("data/ATAC_predictions_on_all_samples/ATAC_pred_lasso_std_FALSE_formatted.csv")
lasso_pred <- format_input(lasso_pred, "PGDX10344P1")
head(lasso_pred)

In [None]:
ind_pred_chr <- lasso_pred %>% filter(chr == 1)

colors <- c("ATAC_observed" = "firebrick", "ATAC_prediction" = "darkblue")

p <- ggplot(ind_pred_chr) + 
        geom_point(aes(x = position, y = ATAC_observed, color = "ATAC_observed"), size = 0.5) + 
        geom_line(aes(x = position, y = ATAC_observed, color = "ATAC_observed"), size = 0.3) + 
        geom_point(aes(x = position, y = ATAC_prediction, color = "ATAC_prediction"), size = 0.5) + 
        geom_line(aes(x = position, y = ATAC_prediction, color = "ATAC_prediction"), size = 0.3) +
        labs(x = "Position",
             y = "ATAC value",
             color = "Legend") +
        scale_color_manual(values = colors)

plotly::ggplotly(p)

### Lasso predictions standardized

In [None]:
lasso_pred <- read.csv("data/ATAC_predictions_on_all_samples/ATAC_pred_lasso_std_FALSE_formatted_standardized.csv")
lasso_pred <- format_input(lasso_pred, "PGDX10344P1")
head(lasso_pred)

In [None]:
ind_pred_chr <- lasso_pred %>% filter(chr == 1)

colors <- c("ATAC_observed" = "firebrick", "ATAC_prediction" = "darkblue")

p <- ggplot(ind_pred_chr) + 
        geom_point(aes(x = position, y = ATAC_observed, color = "ATAC_observed"), size = 0.5) + 
        geom_line(aes(x = position, y = ATAC_observed, color = "ATAC_observed"), size = 0.3) + 
        geom_point(aes(x = position, y = ATAC_prediction, color = "ATAC_prediction"), size = 0.5) + 
        geom_line(aes(x = position, y = ATAC_prediction, color = "ATAC_prediction"), size = 0.3) +
        labs(x = "Position",
             y = "ATAC value",
             color = "Legend") +
        scale_color_manual(values = colors)

plotly::ggplotly(p)

### Lasso Gaussian Trimmed predictions

In [None]:
lasso_gaussian_trim_pred <- read.csv("data/ATAC_predictions_on_all_samples/ATAC_pred_lasso_gaussian_trimmed_formatted.csv")
lasso_gaussian_trim_pred <- format_input(lasso_gaussian_trim_pred, "PGDX10344P1")
head(lasso_gaussian_trim_pred)

In [None]:
ind_pred_chr <- lasso_gaussian_trim_pred %>% filter(chr == 1)

colors <- c("ATAC_observed" = "firebrick", "ATAC_prediction" = "darkblue")

p <- ggplot(ind_pred_chr) + 
        geom_point(aes(x = position, y = ATAC_observed, color = "ATAC_observed"), size = 0.5) + 
        geom_line(aes(x = position, y = ATAC_observed, color = "ATAC_observed"), size = 0.3) + 
        geom_point(aes(x = position, y = ATAC_prediction, color = "ATAC_prediction"), size = 0.5) + 
        geom_line(aes(x = position, y = ATAC_prediction, color = "ATAC_prediction"), size = 0.3) +
        labs(x = "Position",
             y = "ATAC value",
             color = "Legend") +
        scale_color_manual(values = colors)

plotly::ggplotly(p)

### Lasso Gaussian std FALSE predictions

In [None]:
lasso_gaussian_stdFALSE_pred <- read.csv("data/ATAC_predictions_on_all_samples/ATAC_pred_smooth_gaussian_lasso_std_FALSE_formatted.csv")
lasso_gaussian_stdFALSE_pred <- format_input(lasso_gaussian_stdFALSE_pred, "PGDX10344P1")
head(lasso_gaussian_stdFALSE_pred)

In [None]:
ind_pred_chr <- lasso_gaussian_stdFALSE_pred %>% filter(chr == 1)

colors <- c("ATAC_observed" = "firebrick", "ATAC_prediction" = "darkblue")

p <- ggplot(ind_pred_chr) + 
        geom_point(aes(x = position, y = ATAC_observed, color = "ATAC_observed"), size = 0.5) + 
        geom_line(aes(x = position, y = ATAC_observed, color = "ATAC_observed"), size = 0.3) + 
        geom_point(aes(x = position, y = ATAC_prediction, color = "ATAC_prediction"), size = 0.5) + 
        geom_line(aes(x = position, y = ATAC_prediction, color = "ATAC_prediction"), size = 0.3) +
        labs(x = "Position",
             y = "ATAC value",
             color = "Legend") +
        scale_color_manual(values = colors)

plotly::ggplotly(p)

### Lasso Gaussian std TRUE predictions

In [None]:
lasso_gaussian_stdTRUE_pred <- read.csv("data/ATAC_predictions_on_all_samples/ATAC_pred_smooth_gaussian_lasso_std_TRUE_formatted.csv")
lasso_gaussian_stdTRUE_pred <- format_input(lasso_gaussian_stdTRUE_pred, "PGDX10344P1")
head(lasso_gaussian_stdTRUE_pred)

In [None]:
ind_pred_chr <- lasso_gaussian_stdTRUE_pred %>% filter(chr == 1)

colors <- c("ATAC_observed" = "firebrick", "ATAC_prediction" = "darkblue")

p <- ggplot(ind_pred_chr) + 
        geom_point(aes(x = position, y = ATAC_observed, color = "ATAC_observed"), size = 0.5) + 
        geom_line(aes(x = position, y = ATAC_observed, color = "ATAC_observed"), size = 0.3) + 
        geom_point(aes(x = position, y = ATAC_prediction, color = "ATAC_prediction"), size = 0.5) + 
        geom_line(aes(x = position, y = ATAC_prediction, color = "ATAC_prediction"), size = 0.3) +
        labs(x = "Position",
             y = "ATAC value",
             color = "Legend") +
        scale_color_manual(values = colors)

plotly::ggplotly(p)

### Random forest predictions

In [None]:
random_forest_pred <- read.csv("data/ATAC_predictions_on_all_samples/ATAC_pred_random_forest_formatted.csv")
random_forest_pred <- format_input(random_forest_pred, "PGDX10344P1")
head(random_forest_pred)

In [None]:
ind_pred_chr <- random_forest_pred %>% filter(chr == 1)

colors <- c("ATAC_observed" = "firebrick", "ATAC_prediction" = "darkblue")

p <- ggplot(ind_pred_chr) + 
        geom_point(aes(x = position, y = ATAC_observed, color = "ATAC_observed"), size = 0.5) + 
        geom_line(aes(x = position, y = ATAC_observed, color = "ATAC_observed"), size = 0.3) + 
        geom_point(aes(x = position, y = ATAC_prediction, color = "ATAC_prediction"), size = 0.5) + 
        geom_line(aes(x = position, y = ATAC_prediction, color = "ATAC_prediction"), size = 0.3) +
        labs(x = "Position",
             y = "ATAC value",
             color = "Legend") +
        scale_color_manual(values = colors)

plotly::ggplotly(p)

## Plots after removing the outlier (PGDX7096P) form the PCA/tSNE

### Lasso

In [None]:
lasso_pred <- read.csv("data/ATAC_predictions_on_all_samples/ATAC_pred_lasso_std_FALSE_formatted.csv")
lasso_pred <- format_input(lasso_pred, "PGDX7096P")
head(lasso_pred)

In [None]:
ind_pred_chr <- lasso_pred %>% filter(chr == 1)

colors <- c("ATAC_observed" = "firebrick", "ATAC_prediction" = "darkblue")

p <- ggplot(ind_pred_chr) + 
        geom_point(aes(x = position, y = ATAC_observed, color = "ATAC_observed"), size = 0.5) + 
        geom_line(aes(x = position, y = ATAC_observed, color = "ATAC_observed"), size = 0.3) + 
        geom_point(aes(x = position, y = ATAC_prediction, color = "ATAC_prediction"), size = 0.5) + 
        geom_line(aes(x = position, y = ATAC_prediction, color = "ATAC_prediction"), size = 0.3) +
        labs(x = "Position",
             y = "ATAC value",
             color = "Legend") +
        scale_color_manual(values = colors)

plotly::ggplotly(p)

### Random forest 

In [None]:
random_forest_pred <- read.csv("data/ATAC_predictions_on_all_samples/ATAC_pred_random_forest_formatted.csv")
random_forest_pred <- format_input(random_forest_pred, "PGDX7096P")
head(random_forest_pred)

In [None]:
ind_pred_chr <- random_forest_pred %>% filter(chr == 2)

colors <- c("ATAC_observed" = "firebrick", "ATAC_prediction" = "darkblue")

p <- ggplot(ind_pred_chr) + 
        geom_point(aes(x = position, y = ATAC_observed, color = "ATAC_observed"), size = 0.5) + 
        geom_line(aes(x = position, y = ATAC_observed, color = "ATAC_observed"), size = 0.3) + 
        geom_point(aes(x = position, y = ATAC_prediction, color = "ATAC_prediction"), size = 0.5) + 
        geom_line(aes(x = position, y = ATAC_prediction, color = "ATAC_prediction"), size = 0.3) +
        labs(x = "Position",
             y = "ATAC value",
             color = "Legend") +
        scale_color_manual(values = colors)

plotly::ggplotly(p)