In [None]:
library(tidyverse)

In [None]:
csq = read_tsv("output/freebayes_at9852_ath_filt-q100-ac10-dp100-200k-only1k1g-csq_onlyNLRs_csqtab_geneids.tsv",
              col_names=c("chrom", "pos", "alt", "qual", "af", "bcsq", "gene_id"), na=".")

In [None]:
glimpse(csq)

In [None]:
csq2 = csq %>%
    filter(!is.na(bcsq), !grepl("^@", bcsq)) %>%
    mutate(bcsq=sub(",.*", "", bcsq)) %>%
    separate_wider_delim(bcsq, "|", names=c("csqtype", "gene", "transcript", "genetype", "strand", "aa", "cds"), too_few="debug", too_many="debug") %>%
    select(gene_id, chrom, pos, alt, qual, af, csqtype) %>%
    mutate(csqtype=sub("&.*", "", csqtype)) %>%
    filter(!grepl("^\\*", csqtype)) %>%
    group_by(gene_id) %>%
    mutate(genepos = (pos - min(pos)) / ( max(pos)-min(pos)))
write_csv(csq2, "output/summarised_csq_pergene.csv", na="")

In [None]:
table(csq2$csqtype)

In [None]:
ggplot(csq2, aes(x=genepos, y=af)) +
    geom_point(aes(colour=gene_id)) +
    guides(colour=guide_none()) +
    theme_bw() +
    theme()

In [None]:
ggplot(csq2, aes(x=genepos, y=af)) +
    geom_point(aes(colour=csqtype)) +
    labs(x="Relative Gene Position", y="Allele Frequency", colour="Variant\nEffect")+
    theme_bw() +
    theme()

In [None]:
csq2 %>%
    filter(csqtype %in% c("frameshift", "stop_gained", "feature_elongation", "start_lost", "stop_gained", "stop_lost"))%>%
ggplot(aes(x=genepos, y=af)) +
    geom_point(aes(colour=csqtype)) +
    labs(x="Relative Gene Position", y="Allele Frequency", colour="Variant\nEffect")+
    theme_bw() +
    theme()

In [None]:
csq2 %>%
    filter(csqtype %in% c("frameshift", "stop_gained", "feature_elongation", "start_lost", "stop_gained", "stop_lost"))%>%
    filter(af>0.01) %>%
ggplot(aes(x=genepos, y=af)) +
    geom_point(aes(colour=csqtype)) +
    labs(x="Relative Gene Position", y="Allele Frequency", colour="Variant\nEffect")+
    theme_bw() +
    theme()

In [None]:
library(ggridges)

In [None]:
gcons = guide_legend(title="Consequence")
csq2 %>%
ggplot(aes(x=af, y=csqtype)) +
    geom_density_ridges(aes(fill=csqtype, colour=csqtype), scale=0.8, rel_min_height = 0.01, panel_scaling = F) +
    labs(y="Consequence Type",x="Allele Frequency") +
    guides(fill=gcons, colour=gcons) +
    theme_bw() +
    theme()