In [None]:
source('results_and_plotting.R')
library(RColorBrewer)
library(latex2exp)

location_figures = "../../figures/"

In [2]:
M1_results = load_M1_results("variant")

# Figure 1: G

In [None]:
key = "pan_data"
method = "variant"

options(repr.plot.width = 12, repr.plot.height = 5)

gamma_plot = plot_M1_results(M1_results, key, method, "selection", show_genes = FALSE)
legend = get_legend(gamma_plot + theme(legend.title = element_blank(), legend.text = element_text(size=14)))#theme(legend.title = element_text(size=16), legend.text = element_text(size=14)))
gamma_plot = gamma_plot + theme(legend.position='none') + 
                scale_y_continuous(labels=function(x){x/1e6}, limits=c(0,NA), breaks=c(0,0.25,0.5,1,1.5,2)*1e6) + 
                ylab(TeX(r'(Scaled selection coefficient $(\times 10^{6})$)'))
flux_plot = plot_M1_results(M1_results, key, method, "fixation",show_freq_legend = FALSE) + 
                theme(axis.ticks.y=element_line(), axis.title.y = element_blank()) + 
                scale_y_continuous(limits = c(0, NA)) +
                ylab(TeX(r'(Fixation rate $(1^{!}$)'))
mu_plot = plot_M1_results(M1_results, key, method, "mutation",show_freq_legend = FALSE, show_genes = FALSE) + 
            scale_y_continuous(labels=function(x){x/1e-6}, limits=c(0,NA), breaks=seq(0,2e-6,0.3e-6)) + 
            ylab(TeX(r'(Mutation rate $(\times 10^{-6})$)'))


p = plot_grid(flux_plot, mu_plot, gamma_plot, ncol=3, rel_widths=c(1.25,1,1))
p = plot_grid(p, legend, ncol=1, rel_heights=c(1,0.1))
p

In [11]:
ggsave(paste0(location_figures, 'G.png'),p, width = 12, height=5)

## Analysis for results text

In [None]:
M1_results %>% filter(key=="pan_data") %>% arrange(mu)

In [None]:
M1_results %>% filter(key=="pan_data") %>% arrange(freq)

# Figure 2: GxE

In [None]:
a = M1_results %>%
    filter(method == "variant") %>%
    filter(key %in% c("smoking","nonsmoking")) %>%
    pivot_wider(
                names_from = key,
                id_cols = gene,
                values_from = mu
            ) %>%
    ggplot(aes(x=nonsmoking,y=smoking)) +
        geom_point(,size=4, color="#7373d2") +
        geom_abline(slope=1,intercept=0,lty=2) +
        geom_text_repel(aes(label=gene), fontface="italic") +
        theme_classic() +
        labs(x=TeX(r'(Oncogenic mutation rate in NS-LUAD $(\times 10^{-7})$)'),
             y=TeX(r'(Oncogenic mutation rate in ES-LUAD $(\times 10^{-7})$)'))+
        scale_y_continuous(labels=function(x){x/1e-7}, breaks=c(1,5,10,15,25,35)*1e-7) + 
        scale_x_continuous(labels=function(x){x/1e-7}, breaks=seq(1,3,0.5)*1e-7, limits=c(1e-7,NA)) +
        theme(axis.title = element_text(size=18),
              axis.text = element_text(size=16))

b = plot_GxE_results(M1_results,"variant",ratio_plot = F) + 
    labs(x="Mutated gene",title="", y=TeX(r'(Scaled selection coefficient $(\times 10^{6})$)')) +
    theme(legend.position=c(0.5,0.9)) +
    scale_y_continuous(labels = function(x){x/1e6}, breaks=seq(0,2,0.3)*1e6)


options(repr.plot.width=17, repr.plot.height=6)
p = plot_grid(a,b,ncol=2, rel_widths = c(7,10), label_size = 20)
p

In [13]:
ggsave(paste0(location_figures, 'GxE.png'),p, width = 17, height=6)

## Analysis for results text

In [None]:
M1_results %>% filter(key %in% c("smoking_plus","nonsmoking_plus")) %>%
    pivot_wider(
        names_from = key,
        id_cols = gene,
        values_from = mu
    ) %>%
    mutate(ratio = round(smoking_plus/nonsmoking_plus,2)) %>%
    arrange(desc(ratio))

In [None]:
M1_results %>% filter(key %in% c("smoking_plus","nonsmoking_plus")) %>%
    pivot_wider(
        names_from = key,
        id_cols = gene,
        values_from = gamma_mle
    ) %>%
    mutate(
        ns_over_s = nonsmoking_plus/smoking_plus,
        s_over_ns = 1/ns_over_s,
        percent_diff = round(100*(smoking_plus - nonsmoking_plus)/nonsmoking_plus,2)) %>%
    #arrange(abs(percent_diff))
    arrange(desc(nonsmoking_plus))

## Supplementary Figure 1

In [14]:
M1_all_results = load_M1_all_results("variant")

In [None]:
options(repr.plot.width = 8,repr.plot.height = 8)

alpha_palette = c("TRUE"=0.8,"FALSE"=0.025)
color_palette = c('1'='blue','2'='red')
fill_palette = c("TRUE"="yellow","FALSE"="#ffffff")

p = M1_all_results %>%
  filter(gamma_ci_low >= 1) %>%
  pivot_wider(names_from = key, values_from = c(gamma_mle, gamma_ci_low, gamma_ci_high), id_cols = gene) %>%
  mutate(selection_ratio = gamma_mle_nonsmoking_plus/gamma_mle_smoking_plus,
          signif_diff = (gamma_ci_low_nonsmoking_plus > gamma_ci_high_smoking_plus | gamma_ci_low_smoking_plus > gamma_ci_high_nonsmoking_plus)
          ) %>% {
              ggplot(.,aes(x=gamma_mle_nonsmoking_plus,y=gamma_mle_smoking_plus, alpha=signif_diff)) +
              geom_errorbarh(aes(xmin=gamma_ci_low_nonsmoking_plus,xmax=gamma_ci_high_nonsmoking_plus),
                              height=0, linewidth=0.3) + 
              geom_errorbar(aes(ymin=gamma_ci_low_smoking_plus,ymax=gamma_ci_high_smoking_plus),
                              width=0, linewidth=0.3) + 
              geom_point(aes(fill=signif_diff),shape=21, size=2) + 
              geom_abline(slope = 1, intercept = 0, lty=2) +
              #geom_point(aes(x=1,y=1),color="white") + 
              ggrepel::geom_text_repel(data = . %>% filter(signif_diff) %>% filter(selection_ratio<1 | selection_ratio>5 | (selection_ratio>1 & gamma_mle_nonsmoking_plus>3e5)),
                  
                  #(abs(log10(selection_ratio)) > log10(8) | gamma_mle_smoking_plus > 1e6 | gamma_mle_nonsmoking_plus > 5e6 ), signif_diff),
                              aes(label=gene, color = ifelse(selection_ratio>1,'1','2')),
                              alpha=1,
                              size=5,
                              max.overlaps = 100) + 
              scale_alpha_manual(values=alpha_palette) +
              scale_color_manual(values=color_palette) +
              scale_fill_manual(values=fill_palette) +
              scale_x_log10(labels=log_labels, limits=c(3e2,2e6)) + scale_y_log10(labels=log_labels,limits=c(3e2,2e6)) +
              annotation_logticks() +
              labs(x="Scaled selection coefficient in never-smoker LUAD",y="Scaled selection coefficient in ever-smoker LUAD", alpha = "Significant Difference") +
              theme_classic() + 
              theme(axis.title = element_text(size=18),
                      axis.text = element_text(size=16),
                      legend.position="none")
          }

p

In [None]:
ggsave(paste0(location_figures, 'GxE_scatter.png'),p, width = 8, height=8)

In [None]:
M1_all_results %>%
    filter(key %in% c("smoking_plus","nonsmoking_plus")) %>%
    pivot_wider(names_from = key, id_cols = gene, values_from = c(gamma_mle, gamma_ci_low, gamma_ci_high)) %>%
    mutate(signif = (gamma_ci_low_smoking_plus > gamma_ci_high_nonsmoking_plus | gamma_ci_low_nonsmoking_plus > gamma_ci_high_smoking_plus),
            ratio = gamma_mle_smoking_plus/gamma_mle_nonsmoking_plus) %>%
    filter(signif, ratio<1, gamma_mle_smoking_plus>0, gamma_mle_nonsmoking_plus>0) %>%
    arrange((ratio))

# Figure 3: TP53 + KRAS

In [None]:
M1_results %>% filter(key %in% c("smoking_plus","nonsmoking_plus"), gene %in% c('TP53','KRAS')) %>% select(gene, key, mu) 

In [None]:
M2_results = load_M2_results("variant")

In [20]:
variant_M2_smoking_interaction_df = get_interaction_df(M2_results %>% filter(key == "smoking_plus", method=="variant"))
variant_M2_nonsmoking_interaction_df = get_interaction_df(M2_results %>% filter(key == "nonsmoking_plus", method=="variant"))

In [None]:
variant_M2_smoking_interaction_df %>% filter(gene_set =="TP53_KRAS") %>% arrange(desc(gamma_mle))

In [None]:
variant_M2_nonsmoking_interaction_df %>% filter(gene_set =="TP53_KRAS")

# Figure 4: GxG

### Options for Panel A

In [None]:
variant_M2_nonsmoking_interaction_df %>% filter(signif, epistatic_gt != "WT") %>%
    arrange(mutated_gene) %>%
    group_by(mutated_gene) %>%
    filter(!(all(ratio<1)|all(ratio>1))) %>%
    bind_rows(variant_M2_smoking_interaction_df %>% 
                filter(signif, epistatic_gt != "WT") %>%
                arrange(mutated_gene) %>%
                group_by(mutated_gene) %>%
                filter(!(all(ratio<1)|all(ratio>1)))) %>%
    group_by(epistatic_gt, mutated_gene) %>%
    filter(n() == 2) %>%
    select(key, epistatic_gt, mutated_gene, starts_with('gamma'), ratio, signif) %>%
    arrange(mutated_gene)

## Panel A

In [None]:
options(repr.plot.width = 15, repr.plot.height = 2)

mg = "EGFR"
epistatic_gts = c("KRAS","TP53")
cohort = "smoking_plus"

alpha_palette = c("TRUE" = 1, "FALSE" = 0.1)
fill_palette = c("WT" = "purple", "Antagonism" = brewer.pal(n = 3, name = "RdYlBu")[1], "Synergism"=brewer.pal(n = 3, name = "RdYlBu")[3])

if(cohort == "smoking_plus"){
   tmp = variant_M2_smoking_interaction_df %>%
        filter(mutated_gene == mg, epistatic_gt %in% c(epistatic_gts,"WT"))
} else if(cohort == "nonsmoking_plus"){
   tmp = variant_M2_nonsmoking_interaction_df %>%
        filter(mutated_gene == mg, epistatic_gt %in% c(epistatic_gts,"WT"))
}

    
tmp = tmp %>%
    filter(epistatic_gt != "WT") %>%
    bind_rows(M1_results %>% 
        filter(gene == mg) %>%
        filter(key == cohort) %>%
        mutate(mutated_gene=gene, epistatic_gt="WT", signif=TRUE) %>% 
        left_join(tmp %>% filter(epistatic_gt == "WT") %>% group_by(mutated_gene) %>% summarize(to_count=mean(to_count)), by = "mutated_gene")) %>% 
    mutate(context = ifelse(epistatic_gt == "WT","WT","Non-WT\n([gene] mutated)"),
            color = ifelse(epistatic_gt=="WT","WT",ifelse(ratio>1,"Synergism","Antagonism"))) %>%
    mutate(nudge_dist = case_when(epistatic_gt == "KRAS" ~ -0.1, epistatic_gt == "TP53" ~ 0.1, TRUE ~ 0)) %>%
    mutate(epistatic_gt = ifelse(epistatic_gt=="WT","Wild-type",paste0(epistatic_gt,"-mutant")))

p = tmp %>%
    ggplot(aes(x=mutated_gene, y=gamma_mle, size=to_count, alpha=signif)) + 
            geom_errorbar(aes(ymin=gamma_ci_low,ymax=gamma_ci_high),
                        width=0.1,
                        linewidth=0.2, 
                        position = position_nudge(tmp %>% pull(nudge_dist))) + 
            geom_point(aes(fill=color),
                        shape=21, color="black", 
                        position = position_nudge(tmp %>% pull(nudge_dist))) + 
            geom_text(data=tmp %>% filter(signif), 
                                            aes(label=epistatic_gt),
                                            size = 5,
                                            nudge_x=-0.25,
                                            check_overlap=FALSE) +
            labs(x="", y=glue("Scaled selection coefficient for *{mg}* mutation in somatic genotype"),# title="Pairwise epistatic interactions in never-smoker LUAD",
                    size="Sample count") +
            scale_alpha_manual(values = alpha_palette, name="Significant difference in selection") +
            scale_fill_manual(values = fill_palette, name = "Genetic context") +
            #scale_fill_viridis_c(name="Ratio of Selection") +
            scale_y_continuous(labels = scientific_expr, breaks=seq(0,2e5,3e4))+#, breaks = seq(0,ceiling, 1e6), limits = c(0, ceiling)) +
            theme_classic() +
            theme(plot.title = element_text(size = 24, hjust=0.5),
                    axis.title = ggtext::element_markdown(size = 24),
                    axis.text = element_text(size = 20, face="italic"),
                    #axis.ticks.x = element_blank(),
                    legend.position = c(0.8,0.2),
                    #legend.direction="horizontal",
                    legend.key.size = unit(1.5, 'cm'),
                    legend.text = element_text(size = 20),
                    legend.title = element_text(size = 20)) +
            guides(fill = "none",
                    size = "none",
                    alpha = "none")+#guide_legend(title.position="top", title.hjust = 0.5)) + 
            coord_flip()
p

In [85]:
ggsave(paste0(location_figures, 'pairwise_epistasis_explainer_smoker_template.png'),p,width = 15, height=2)

In [None]:
options(repr.plot.width = 15, repr.plot.height = 2)

alpha_palette = c("TRUE" = 1, "FALSE" = 0.1)
fill_palette = c("WT" = "purple", "Antagonism" = brewer.pal(n = 3, name = "RdYlBu")[1], "Synergism"=brewer.pal(n = 3, name = "RdYlBu")[3])

mg = "EGFR"
epistatic_gts = c("KRAS","TP53")
cohort = "nonsmoking_plus"

if(cohort == "smoking_plus"){
   tmp = variant_M2_smoking_interaction_df %>%
        filter(mutated_gene == mg, epistatic_gt %in% c(epistatic_gts,"WT"))
} else if(cohort == "nonsmoking_plus"){
   tmp = variant_M2_nonsmoking_interaction_df %>%
        filter(mutated_gene == mg, epistatic_gt %in% c(epistatic_gts,"WT"))
}

tmp = tmp %>%
    filter(epistatic_gt != "WT") %>%
    bind_rows(M1_results %>% 
        filter(gene == mg) %>%
        filter(key == cohort) %>%
        mutate(mutated_gene=gene, epistatic_gt="WT", signif=TRUE) %>% 
        left_join(tmp %>% filter(epistatic_gt == "WT") %>% group_by(mutated_gene) %>% summarize(to_count=mean(to_count)), by = "mutated_gene")) %>% 
    mutate(context = ifelse(epistatic_gt == "WT","WT","Non-WT\n([gene] mutated)"),
            color = ifelse(epistatic_gt=="WT","WT",ifelse(ratio>1,"Synergism","Antagonism"))) %>%
    mutate(nudge_dist = case_when(epistatic_gt == "KRAS" ~ -0.1, epistatic_gt == "TP53" ~ 0.1, TRUE ~ 0)) %>%
    mutate(epistatic_gt = ifelse(epistatic_gt=="WT","Wild-type",paste0(epistatic_gt,"-mutant")))

p = tmp %>%
    ggplot(aes(x=mutated_gene, y=gamma_mle, size=to_count, alpha=signif)) + 
            geom_errorbar(aes(ymin=gamma_ci_low,ymax=gamma_ci_high),
                        width=0.1,
                        linewidth=0.2,
                        position = position_nudge(tmp %>% pull(nudge_dist))) + 
            geom_point(aes(fill=color),
                        shape=21, color="black",
                        position = position_nudge(tmp %>% pull(nudge_dist))) + 
            geom_text(data=tmp %>% filter(signif), 
                                            aes(label=epistatic_gt),
                                            size = 5,
                                            nudge_x=-0.25,
                                            check_overlap=FALSE) +
            labs(x="", y="Scaled selection coefficient for *EGFR* mutation in somatic genotype",# title="Pairwise epistatic interactions in never-smoker LUAD",
                    size="Sample count") +
            scale_alpha_manual(values = alpha_palette, name="Significant difference in selection") +
            scale_fill_manual(values = fill_palette, name = "Genetic context") +
            #scale_fill_viridis_c(name="Ratio of Selection") +
            scale_y_continuous(labels = scientific_expr, breaks=seq(0,2.5e6,5e5))+#, breaks = seq(0,ceiling, 1e6), limits = c(0, ceiling)) +
            theme_classic() +
            theme(plot.title = element_text(size = 24, hjust=0.5),
                    axis.title = ggtext::element_markdown(size = 24),
                    axis.text = element_text(size = 20, face="italic"),
                    #axis.ticks.x = element_blank(),
                    legend.position = c(0.8,0.2),
                    #legend.direction="horizontal",
                    legend.key.size = unit(1.5, 'cm'),
                    legend.text = element_text(size = 20),
                    legend.title = element_text(size = 20)) +
            guides(fill = "none",
                    size = "none",
                    alpha = "none")+#guide_legend(title.position="top", title.hjust = 0.5)) + 
            coord_flip()
p

In [87]:
ggsave(paste0(location_figures, 'pairwise_epistasis_explainer_nonsmoker_template.png'),p,width = 15, height=2)

## Panels B and C

In [None]:
options(repr.plot.width = 15, repr.plot.height = 20)

ceiling = 1.5e6
p = plot_all_pairwise_epistatic_effects(variant_M2_smoking_interaction_df,
                                    M1_results %>% filter(key=="smoking_plus", method=="variant"),
                                    ceiling=ceiling) + 
    scale_y_continuous(labels = scientific_expr, breaks = seq(0,ceiling-1,3e5))
p = p + theme(legend.position="none")
p

In [103]:
ggsave(paste0(location_figures, 'GxG_smoker.png'),p,width = 15, height=20)

In [None]:
ceiling = 4e6
p = plot_all_pairwise_epistatic_effects(variant_M2_nonsmoking_interaction_df,
                                    M1_results %>% filter(key=="nonsmoking_plus", method=="variant"),
                                    ceiling=ceiling) + 
    scale_y_continuous(labels = scientific_expr, breaks = seq(0,ceiling-1,6e5))
p = p + theme(legend.position="none")
p

In [105]:
ggsave(paste0(location_figures, 'GxG_nonsmoker.png'),p,width = 15, height=20)

## Analysis for results text

In [23]:
ranked_smoking_genes = M1_results %>% filter(key=="smoking_plus") %>% arrange(desc(gamma_mle)) %>% pull(gene)
ranked_nonsmoking_genes = M1_results %>% filter(key=="nonsmoking_plus") %>% arrange(desc(gamma_mle)) %>% pull(gene)

oncogenes = c("KRAS", "BRAF", "EGFR", "CTNNB1", "PIK3CA", "MET", "GNAS", "ALK")
tsgs = c("TP53", "KEAP1", "ATM", "STK11", "BRCA2", "SETD2", "RBM10", "APC", "MGA", "RB1", "SMAD4")
chromatin_remodelers = c("SMARCA4", "ARID1A")

Epistatic patterns of strongly selected genes in ES-LUAD

In [None]:
variant_M2_smoking_interaction_df %>% filter(signif, epistatic_gt != "WT") %>%
    filter(mutated_gene %in% ranked_smoking_genes[1:9], ratio > 1) %>%
    arrange(ratio)

Epistatic patterns of chromatin remodeling genes and moderately strongly selected tumor suppressor genes

In [None]:
variant_M2_smoking_interaction_df %>% filter(signif, epistatic_gt != "WT") %>%
    #filter(mutated_gene %in% c("BRCA2", "SETD2", "RBM10", "APC", "MGA", "RB1")) %>%
    filter(mutated_gene %in% chromatin_remodelers) %>%
    arrange(ratio) %>%
    count(epistatic_gt) %>%
    arrange(desc(n))

Epistatic patterns of oncogenes

In [None]:
variant_M2_smoking_interaction_df %>% filter(signif, epistatic_gt != "WT") %>%
    filter(mutated_gene %in% oncogenes) %>% 
        group_by(ratio>1) %>% 
        count(epistatic_gt) %>%
        arrange(desc(n))
        # filter(ratio>1) %>%
        # count(mutated_gene) %>% 
        # arrange(desc(n))

In [None]:
variant_M2_smoking_interaction_df %>% filter(signif, epistatic_gt != "WT") %>%
    filter(mutated_gene %in% oncogenes) %>% 
    arrange(mutated_gene) %>% 
    group_by(mutated_gene) %>%
    filter((all(ratio<1)|all(ratio>1)))

Median effect of synergistic epistasis

In [None]:
variant_M2_smoking_interaction_df %>% filter(signif, epistatic_gt != "WT", ratio>1) %>%
    pull(ratio) %>% summary

Correlation of strength of synergistic epistasis with ...

1) Strength of selection on prior mutation

In [None]:
options(repr.plot.width = 6, repr.plot.height = 6)

tmp = variant_M2_smoking_interaction_df %>% filter(epistatic_gt != "WT", signif, ratio>1) %>%
    select(gene_set, epistatic_gt, ratio) %>% rename(mutated_gene = epistatic_gt) %>% mutate(epistatic_gt = "WT") %>%
    left_join(variant_M2_smoking_interaction_df %>% select(gene_set, epistatic_gt, mutated_gene, gamma_mle), 
                by=c("gene_set","epistatic_gt","mutated_gene"))

tmp %>%
    ggplot(aes(x=log(gamma_mle),y=ratio)) + 
        geom_point() +
        geom_smooth(method="lm", formula = y~log(x), alpha=0.5) + 
        labs(x="log(Selection coefficient) for prior mutation",y="Epistatic ratio") +
        theme_bw()

ratio_model = lm(ratio~log(gamma_mle), data=tmp)
summary(ratio_model)

2) Strength of selection on secondary mutation

In [None]:
options(repr.plot.width=6, repr.plot.height=6)
variant_M2_smoking_interaction_df %>% filter(epistatic_gt == "WT", signif, ratio>1) %>% 
    filter(ratio<20) %>%
    ggplot(aes(x=gamma_mle, y=ratio)) +
        geom_point() + theme_bw() + geom_smooth(method="lm", formula = y~x) + 
        geom_hline(yintercept = 3, linetype="dashed") + geom_hline(yintercept = 8, linetype="dashed") +
        labs(x="Baseline selection coefficient for affected mutation", y="Epistatic ratio") +
        theme_bw()
        
ratio_model = lm(ratio~gamma_mle, 
                data=variant_M2_smoking_interaction_df %>% filter(epistatic_gt == "WT", signif, ratio>1) %>% filter(ratio<20))
summary(ratio_model)

Antagonistic sign epistasis

In [None]:
# variant_M2_smoking_interaction_df %>% filter(epistatic_gt != "WT", gamma_mle<1)
variant_M2_smoking_interaction_df %>% filter(epistatic_gt != "WT", gamma_ci_high<1, signif) %>% arrange(mutated_gene)

Antagonistic magnitude epistasis

In [None]:
variant_M2_smoking_interaction_df %>% filter(epistatic_gt != "WT", gamma_mle>1, ratio<1)

In [None]:
variant_M2_smoking_interaction_df %>% 
    filter(epistatic_gt != "WT", ratio<1, signif) %>% 
    filter(mutated_gene %in% c("TP53","KRAS")) %>%
    count(epistatic_gt) %>% arrange(desc(n))

Synergistic and antagonistic epistasis in NS-LUAD

In [None]:
tmp = variant_M2_smoking_interaction_df %>% full_join(variant_M2_nonsmoking_interaction_df, by=c("tested_combo", "epistatic_gt"), suffix = c("_smoking", "_nonsmoking")) %>%
  filter(epistatic_gt != "WT") %>%
  select(tested_combo, ratio_smoking, ratio_nonsmoking, signif_smoking, signif_nonsmoking) %>%
  mutate(syn_smoking = ratio_smoking > 1, syn_nonsmoking = ratio_nonsmoking > 1) %>%
  select(-c(ratio_smoking, ratio_nonsmoking)) 

dcast(setDT(tmp), syn_smoking + signif_smoking ~ syn_nonsmoking + signif_nonsmoking)
dcast(setDT(tmp), syn_nonsmoking + signif_nonsmoking ~ syn_smoking + signif_smoking)

In [None]:
tmp %>% filter(syn_smoking, signif_smoking, !syn_nonsmoking, signif_nonsmoking) %>% mutate(gene = str_split_i(tested_combo,"_",2)) %>% count(gene) %>% arrange(desc(n))

In [None]:
variant_M2_nonsmoking_interaction_df %>% filter(epistatic_gt != "WT", ratio>1, signif) %>% arrange(ratio) # %>% pull(ratio) %>% summary

Asymmetric epistasis

In [None]:
variant_M2_smoking_interaction_df %>% filter(epistatic_gt != "WT", signif) %>%
  group_by(gene_set) %>% filter(n() == 2) %>%
  filter(!all(gamma_mle < 1))

In [None]:
variant_M2_smoking_interaction_df %>% filter(gene_set == "KEAP1_STK11")

# Figure 5: GxGxE

In [None]:
options(repr.plot.width = 20, repr.plot.height = 6)

env1_df = variant_M2_smoking_interaction_df
env2_df = variant_M2_nonsmoking_interaction_df

mg = "RB1"; gt = "KRAS"
a = plot_gge_interaction(mg, gt, env1_df, env2_df)
mg = "RB1"; gt = "EGFR"
b = plot_gge_interaction(mg, gt, env1_df, env2_df) + theme(axis.title.y = element_blank())
mg = "PIK3CA"; gt = "KRAS" # mg = "MET"; gt = "TP53"
c = plot_gge_interaction(mg, gt, env1_df, env2_df) + theme(axis.title.y = element_blank())
mg = "EGFR"; gt = "TP53"
d = plot_gge_interaction(mg, gt, env1_df, env2_df) + theme(axis.title.y = element_blank()) + scale_y_continuous(breaks=seq(0,25,5)*1e5,labels=function(y)y*1e-5)

p = plot_grid(a,b,c,d,nrow=1,rel_widths = c(1.08,1,1,1),label_size = 25)
p

In [111]:
ggsave(paste0(location_figures, 'all_interaction_types.png'),p,width = 20, height=6)

## Analysis for results text

G

In [None]:
variant_M2_smoking_interaction_df %>% bind_rows(variant_M2_nonsmoking_interaction_df) %>%
    filter(!(mutated_gene %in% get_genes_with_gxe_effects(M1_results, "variant",include_panel_data = T)),
           epistatic_gt != "WT", !signif) %>%
    filter(gamma_mle > 1) %>%
    group_by(tested_combo) %>% filter(n()>1) %>%
    mutate(ratio_variance = var(ratio)) %>%
    arrange(ratio_variance) %>%
    head(20)

In [None]:
options(repr.plot.width = 22, repr.plot.height = 5)

# mg = "APC"; gt = "KRAS"
# a = plot_gge_interaction(mg, gt, env1_df, env2_df) + theme(legend.position = "none", axis.title.y = element_blank())
mg = "BRCA2"; gt = "EGFR"
b = plot_gge_interaction(mg, gt, env1_df, env2_df) + theme(legend.position = "none", axis.title.y = element_blank())
# mg = "RBM10"; gt = "TP53"
# c = plot_gge_interaction(mg, gt, env1_df, env2_df) + theme(legend.position = "none", axis.title.y = element_blank())
mg = "RB1"; gt = "KRAS"
d = plot_gge_interaction(mg, gt, env1_df, env2_df) + theme(legend.position = "none", axis.title.y = element_blank())
# mg = "SMARCA4"; gt = "KRAS"
# e = plot_gge_interaction(mg, gt, env1_df, env2_df) + theme(legend.position = "none", axis.title.y = element_blank())
mg = "ATM"; gt = "EGFR"
f = plot_gge_interaction(mg, gt, env1_df, env2_df) + theme(legend.position = "none", axis.title.y = element_blank())
# mg = "RB1"; gt = "BRAF"
# g = plot_gge_interaction(mg, gt, env1_df, env2_df) + theme(legend.position = "none", axis.title.y = element_blank())
# mg = "SMARCA4"; gt = "EGFR"
# h = plot_gge_interaction(mg, gt, env1_df, env2_df) + theme(legend.position = "none", axis.title.y = element_blank())
mg = "BRCA2"; gt = "KRAS"
i = plot_gge_interaction(mg, gt, env1_df, env2_df) + theme(legend.position = "none", axis.title.y = element_blank())
# mg = "MGA"; gt = "CTNNB1"
# j = plot_gge_interaction(mg, gt, env1_df, env2_df) + theme(legend.position = "none", axis.title.y = element_blank())
#plot_grid(a,b,c,d,e,f,g,h,i,j,nrow=2,label_size = 25)
plot_grid(b,d,f,i,nrow=1)

In [None]:
variant_M2_smoking_interaction_df %>% bind_rows(variant_M2_nonsmoking_interaction_df) %>% filter(mutated_gene == "RB1", epistatic_gt != "WT", signif) %>% 
    group_by(epistatic_gt) %>% filter(n() > 1)

GxG

In [None]:
variant_M2_smoking_interaction_df %>% bind_rows(variant_M2_nonsmoking_interaction_df) %>%
    filter(!(mutated_gene %in% get_genes_with_gxe_effects(M1_results, "variant",include_panel_data = T)),
           epistatic_gt != "WT", signif) %>% 
    filter(gamma_mle > 1) %>%
    group_by(tested_combo) %>% filter(n()>1, (all(ratio>1) | all(ratio<1))) %>%
    mutate(ratio_variance = var(ratio)) %>%
    arrange(ratio_variance) %>%
    head(20)

In [None]:
variant_M2_smoking_interaction_df %>% bind_rows(variant_M2_nonsmoking_interaction_df) %>% filter(tested_combo == "SMAD4_BRCA2", epistatic_gt != "WT")

GxE

In [None]:
variant_M2_smoking_interaction_df %>% bind_rows(variant_M2_nonsmoking_interaction_df) %>%
    filter(mutated_gene %in% get_genes_with_gxe_effects(M1_results, "variant",include_panel_data = T),
           epistatic_gt != "WT", !signif) %>%
    group_by(tested_combo) %>% filter(n()>1) %>%
    mutate(max_abs_log_ratio = max(abs(log(ratio))),
            ratio_variance = var(ratio)) %>%
    arrange(max_abs_log_ratio) %>% 
    ungroup() %>%
    slice(1:20)

In [None]:
options(repr.plot.width = 22, repr.plot.height = 10)

mg = "SMAD4"; gt = "ATM"
a = plot_gge_interaction(mg, gt, env1_df, env2_df) + theme(legend.position = "none", axis.title.y = element_blank())
mg = "MET"; gt = "ARID1A"
b = plot_gge_interaction(mg, gt, env1_df, env2_df) + theme(legend.position = "none", axis.title.y = element_blank())
mg = "STK11"; gt = "TP53"
c = plot_gge_interaction(mg, gt, env1_df, env2_df) + theme(legend.position = "none", axis.title.y = element_blank())
mg = "MET"; gt = "TP53"
d = plot_gge_interaction(mg, gt, env1_df, env2_df) + theme(legend.position = "none", axis.title.y = element_blank())
mg = "KEAP1"; gt = "BRAF"
e = plot_gge_interaction(mg, gt, env1_df, env2_df) + theme(legend.position = "none", axis.title.y = element_blank())
mg = "SMAD4"; gt = "CTNNB1"
f = plot_gge_interaction(mg, gt, env1_df, env2_df) + theme(legend.position = "none", axis.title.y = element_blank())
mg = "PIK3CA"; gt = "ATM"
g = plot_gge_interaction(mg, gt, env1_df, env2_df) + theme(legend.position = "none", axis.title.y = element_blank())
mg = "PIK3CA"; gt = "KRAS"
h = plot_gge_interaction(mg, gt, env1_df, env2_df) + theme(legend.position = "none", axis.title.y = element_blank())
mg = "MET"; gt = "STK11"
i = plot_gge_interaction(mg, gt, env1_df, env2_df) + theme(legend.position = "none", axis.title.y = element_blank())
mg = "MET"; gt = "ALK"
j = plot_gge_interaction(mg, gt, env1_df, env2_df) + theme(legend.position = "none", axis.title.y = element_blank())
plot_grid(a,b,c,d,e,f,g,h,i,j,nrow=2,label_size = 25)
# plot_grid(b,d,f,i,nrow=1)

GxGxE

In [None]:
variant_M2_smoking_interaction_df %>% bind_rows(variant_M2_nonsmoking_interaction_df) %>%
    filter(signif, epistatic_gt == "WT") %>% 
    group_by(tested_combo) %>% 
    filter(n()>1, (all(ratio>1) | all(ratio<1))) %>%
    arrange(tested_combo) %>%
    filter(max(gamma_ci_low) > min(gamma_ci_high)) %>% # test for GxE between mutations from WT
    mutate(ratio_variance = var(ratio)) %>%
    arrange(ratio_variance)

In [None]:
options(repr.plot.width = 22, repr.plot.height = 5)
mg = "TP53"; gt = "KRAS"
a = plot_gge_interaction(mg, gt, env1_df, env2_df) + theme(legend.position = "none", axis.title.y = element_blank())
mg = "EGFR"; gt = "TP53"
b = plot_gge_interaction(mg, gt, env1_df, env2_df) + theme(legend.position = "none", axis.title.y = element_blank())
mg = "SMAD4"; gt = "BRCA2"
c = plot_gge_interaction(mg, gt, env1_df, env2_df) + theme(legend.position = "none", axis.title.y = element_blank())
mg = "STK11"; gt = "KEAP1"
d = plot_gge_interaction(mg, gt, env1_df, env2_df) + theme(legend.position = "none", axis.title.y = element_blank())
mg = "EGFR"; gt = "KRAS"
e = plot_gge_interaction(mg, gt, env1_df, env2_df) + theme(legend.position = "none", axis.title.y = element_blank())
plot_grid(a,b,c,d,e,nrow=1)

In [None]:
variant_M2_smoking_interaction_df %>% bind_rows(variant_M2_nonsmoking_interaction_df) %>% 
    filter(tested_combo == "TP53_EGFR")

Opposite signs of epistasis between ES- and NS-LUAD

In [None]:
variant_M2_smoking_interaction_df %>% bind_rows(variant_M2_nonsmoking_interaction_df) %>%
    filter(!(mutated_gene %in% get_genes_with_gxe_effects(M1_results, "variant",include_panel_data = T)),
           epistatic_gt != "WT", !signif) %>%
    group_by(tested_combo) %>% filter(n()>1, (all(ratio>1) | all(ratio<1))) %>%
    mutate(prop_diff_ratio = (max(ratio)-mean(ratio))/mean(ratio)) %>%
    arrange(prop_diff_ratio)

GxG

In [None]:
variant_M2_smoking_interaction_df %>% filter(signif, epistatic_gt != "WT") %>% 
    bind_rows(variant_M2_nonsmoking_interaction_df %>% filter(signif, epistatic_gt != "WT")) %>%
    arrange(tested_combo) %>%
    group_by(tested_combo) %>% filter(n()>1, (all(ratio>1) | all(ratio<1))) %>%
    filter(!(mutated_gene %in% get_genes_with_gxe_effects(M1_results, "variant",include_panel_data = T))) %>% 
    mutate(prop_diff_ratio = (max(ratio)-mean(ratio))/mean(ratio)) %>%
    arrange(prop_diff_ratio) %>%
    pull(tested_combo) %>% unique

In [None]:
variant_M2_smoking_interaction_df %>% bind_rows(variant_M2_nonsmoking_interaction_df) %>%
    filter(signif, epistatic_gt == "WT") %>% 
    group_by(tested_combo) %>% filter(n()>1) %>%
    filter(max(gamma_ci_low) > min(gamma_ci_high)) %>%
    filter(sum(ratio<1)==1)

In [None]:
variant_M2_smoking_interaction_df %>% bind_rows(variant_M2_nonsmoking_interaction_df) %>%
    filter(mutated_gene %in% get_genes_with_gxe_effects(M1_results, "variant",include_panel_data = T),
           epistatic_gt != "WT", signif) %>%
    group_by(tested_combo) %>% filter(n()>1, sum(ratio<1)==1) %>%
    pull(tested_combo) %>% unique

In [None]:
options(repr.plot.width = 5, repr.plot.height = 5)
mg = "MET"; gt = "SMAD4"
plot_gge_interaction(mg, gt, env1_df, env2_df) + theme(legend.position = "none", axis.title.y = element_blank())

# Figure 6: GxGxG for KRAS, KEAP1, and STK11

In [112]:
variant_M3_data = load_M3_results("variant")

variant_M3_smoking_interaction_df = get_interaction_df(variant_M3_data %>% filter(key == "smoking_plus"))
variant_M3_nonsmoking_interaction_df = get_interaction_df(variant_M3_data %>% filter(key == "nonsmoking_plus"))

In [113]:
smoking_plotting_df = get_multi_gene_effects_2(variant_M2_smoking_interaction_df %>% pull(tested_combo) %>% unique, variant_M3_smoking_interaction_df)
nonsmoking_plotting_df = get_multi_gene_effects_2(variant_M2_nonsmoking_interaction_df %>% pull(tested_combo) %>% unique, variant_M3_nonsmoking_interaction_df)

In [None]:
variant_M3_smoking_interaction_df %>% filter(gene_set == "KRAS_KEAP1_STK11") %>% distinct(epistatic_gt,mutated_gene, .keep_all = T)

In [None]:
M1_results %>% filter(key=="smoking_plus",gene %in% c("KRAS","KEAP1","STK11")) %>% select(gene, mu) %>% arrange(desc(mu))

In [None]:
variant_M3_smoking_interaction_df %>% filter(gene_set == "KRAS_KEAP1_STK11", mutated_gene == "STK11") %>% distinct(epistatic_gt, .keep_all = T)

# Figure 7: GxGxG

In [None]:
smoking_plotting_df %>% filter(extra_effect_strict) %>% select(gene_set, mutated_gene) %>% distinct() %>%
    left_join(variant_M3_smoking_interaction_df, by = c("gene_set", "mutated_gene")) %>% 
    filter(epistatic_gt != "WT") %>% 
    arrange(gene_set) %>%
    select(gene_set, mutated_gene, epistatic_gt, starts_with("gamma"), ratio, signif)

In [None]:
smoking_plotting_df %>% filter(extra_effect_strict) %>% select(gene_set, mutated_gene) %>% distinct() %>%
    left_join(variant_M3_smoking_interaction_df, by = c("gene_set", "mutated_gene")) %>% 
    filter(epistatic_gt != "WT") %>% 
    arrange(gene_set) %>%
    select(gene_set, mutated_gene, epistatic_gt, starts_with("gamma"), ratio, signif) %>%
    group_by(gene_set, mutated_gene) %>%
    filter(all(ratio>1)) %>%
    arrange(ratio, .by_group = TRUE) %>% distinct(gene_set,.keep_all = TRUE) %>% arrange(ratio) %>%
    mutate(rel_span = (gamma_ci_high - gamma_ci_low) / gamma_mle) %>% arrange(rel_span)

In [None]:
nonsmoking_plotting_df %>% filter(extra_effect_strict) %>% select(gene_set, mutated_gene) %>% distinct() %>%
    left_join(variant_M3_nonsmoking_interaction_df, by = c("gene_set", "mutated_gene")) %>% 
    filter(epistatic_gt != "WT") %>% 
    arrange(gene_set) %>%
    select(gene_set, mutated_gene, epistatic_gt, starts_with("gamma"), ratio, signif) %>%
    group_by(gene_set, mutated_gene)

In [None]:
smoking_plotting_df %>% filter(extra_effect_strict) %>% select(gene_set, mutated_gene) %>% distinct() %>%
    left_join(variant_M3_smoking_interaction_df, by = c("gene_set", "mutated_gene")) %>% 
    filter(epistatic_gt != "WT") %>% 
    arrange(gene_set) %>%
    select(gene_set, mutated_gene, epistatic_gt, starts_with("gamma"), ratio, signif) %>%
    group_by(gene_set, mutated_gene) %>%
    filter(all(ratio>1)) %>%
    mutate(num_mutations = str_detect(epistatic_gt,'_')+1) %>%
    group_by(gene_set, mutated_gene, num_mutations) %>%
    summarize(combined_effect=prod(ratio), .groups="drop_last") %>%
    arrange(gene_set) %>%
    pivot_wider(id_cols = c(gene_set, mutated_gene), names_from = num_mutations, values_from = combined_effect, names_prefix = "epistatic_ratio_") %>%
    mutate(o_over_e = round(epistatic_ratio_2/epistatic_ratio_1,2)) %>%
    arrange(desc(o_over_e))

Higher order epistasis

In [None]:
# for each mutated gene in each gene set
# need `epistatic ratio to WT` for all three non-WT genotypes
# 
# only need to look at synergistic mutations
# - defining sub-additive epistasis as synergistic effect of
#   two mutations on a third that is less than the product of
#   the individual synergistic effects of the two mutations
exp_and_obs_ratios = 
    variant_M3_smoking_interaction_df %>% 
    filter(epistatic_gt != "WT", ratio > 1) %>% 
    filter(case_when(!str_detect(epistatic_gt,'_') ~ signif,
                     TRUE ~ TRUE)) %>%
    group_by(gene_set, mutated_gene) %>%
    filter(n()==3) %>%
    mutate(num_mutations = str_detect(epistatic_gt,'_')+1) %>%
    select(gene_set, num_mutations, mutated_gene, ratio, signif) %>%
    group_by(gene_set, mutated_gene, num_mutations) %>%
    summarize(combined_effect=prod(ratio), .groups="drop_last") %>%
    spread(key = num_mutations, value = combined_effect) %>%
    mutate(obs_over_exp_ratio = `2`/`1`) %>% 
    arrange(obs_over_exp_ratio)

exp_and_obs_ratios %>%
    select(gene_set, mutated_gene, obs_over_exp_ratio) %>%
    ggplot(aes(x=reorder(paste0(mutated_gene,' [',gene_set,']'), -obs_over_exp_ratio), y=obs_over_exp_ratio)) +
        geom_bar(stat='identity') + 
        geom_hline(yintercept = 1, lty=2, color="red") +
        theme(axis.title.x=element_blank(), axis.text.x = element_blank())

In [None]:
# for each mutated gene in each gene set
# need `epistatic ratio to WT` for all three non-WT genotypes
# 
# only need to look at synergistic mutations
# - defining sub-additive epistasis as synergistic effect of
#   two mutations on a third that is less than the product of
#   the individual synergistic effects of the two mutations
exp_and_obs_ratios = 
    variant_M3_nonsmoking_interaction_df %>% 
    filter(epistatic_gt != "WT", ratio > 1) %>% 
    filter(case_when(!str_detect(epistatic_gt,'_') ~ signif,
                     TRUE ~ TRUE)) %>%
    group_by(gene_set, mutated_gene) %>%
    filter(n()==3) %>%
    mutate(num_mutations = str_detect(epistatic_gt,'_')+1) %>%
    select(gene_set, num_mutations, mutated_gene, ratio, signif) %>%
    group_by(gene_set, mutated_gene, num_mutations) %>%
    summarize(combined_effect=prod(ratio), .groups="drop_last") %>%
    spread(key = num_mutations, value = combined_effect) %>%
    mutate(obs_over_exp_ratio = `2`/`1`) %>% 
    arrange(obs_over_exp_ratio)

exp_and_obs_ratios %>%
    select(gene_set, mutated_gene, obs_over_exp_ratio) %>%
    ggplot(aes(x=reorder(paste0(mutated_gene,' [',gene_set,']'), -obs_over_exp_ratio), y=obs_over_exp_ratio)) +
        geom_bar(stat='identity') + 
        geom_hline(yintercept = 1, lty=2, color="red") +
        theme(axis.title.x=element_blank(), axis.text.x = element_blank())

In [None]:
variant_M3_smoking_interaction_df %>% 
    filter(epistatic_gt != "WT") %>%
    filter(case_when(!str_detect(epistatic_gt,'_') ~ !signif,
                     str_detect(epistatic_gt,'_') ~ (signif & ratio<1))) %>%
    group_by(gene_set, mutated_gene) %>%
    filter(n()==3)

In [None]:
variant_M3_nonsmoking_interaction_df %>% 
    filter(epistatic_gt != "WT") %>%
    filter(case_when(!str_detect(epistatic_gt,'_') ~ !signif,
                     str_detect(epistatic_gt,'_') ~ (signif & ratio<1))) %>%
    group_by(gene_set, mutated_gene) %>%
    filter(n()==3)

In [None]:
variant_M3_smoking_interaction_df %>% 
    filter(epistatic_gt != "WT") %>%
    filter(case_when(!str_detect(epistatic_gt,'_') ~ !signif,
                     str_detect(epistatic_gt,'_') ~ (signif & ratio>1))) %>%
    group_by(gene_set, mutated_gene) %>%
    filter(n()==3)

In [None]:
smoking_plotting_df %>% filter(extra_effect_strict) %>% select(gene_set, mutated_gene) %>% distinct() %>%
    left_join(variant_M3_smoking_interaction_df, by = c("gene_set", "mutated_gene")) %>%
    filter(epistatic_gt != "WT") %>%
    filter(case_when(!str_detect(epistatic_gt,'_') ~ !signif,
                     str_detect(epistatic_gt,'_') ~ (signif & ratio>1))) %>%
    group_by(gene_set, mutated_gene) %>%
    filter(n()==3)

In [None]:
nonsmoking_plotting_df %>% filter(extra_effect_strict) %>% select(gene_set, mutated_gene) %>% distinct() %>%
    left_join(variant_M3_nonsmoking_interaction_df, by = c("gene_set", "mutated_gene")) %>%
    filter(epistatic_gt != "WT") %>%
    arrange(gene_set) %>%
    select(gene_set, mutated_gene, epistatic_gt, starts_with("gamma"), ratio, signif)

In [None]:
options(repr.plot.width = 22, repr.plot.height = 5.1)
smoking_interactions_to_plot =c("KRAS [TP53]", "STK11 [KRAS]", "SMARCA4 [KRAS]", "BRCA2 [KEAP1]",
                                "BRCA2 [TP53]", "SETD2 [TP53]", "APC [STK11]", "APC [KRAS]",
                                "RB1 [KEAP1]", "EGFR [TP53]")

tmp_df = 
    smoking_plotting_df %>% filter(extra_effect_strict) %>%
    bind_rows(variant_M3_smoking_interaction_df %>% filter(epistatic_gt=="WT") %>% mutate(pairwise_combo = combo_name)) %>%
    #filter(pairwise_combo %in% smoking_interactions_to_plot) %>% 
    group_by(gene_set, pairwise_combo) %>% filter(n()==3) %>% ungroup() %>%
    mutate(group = ifelse(epistatic_gt=="WT","WT", ifelse(str_detect(epistatic_gt, "_"), "Double-mutant","Single-mutant")),
            nudge_dist = ifelse(group=="Double-mutant",0.1,ifelse(group=="WT",-0.1,0)), 
            label_nudge_dist = case_when(
                nudge_dist<0 ~ nudge_dist-0.1,
                nudge_dist==0 ~ 0.2,
                nudge_dist>0 ~ nudge_dist+0.3#+0.27
            )) %>%
    select(gene_set, pairwise_combo, epistatic_gt, mutated_gene, starts_with("gamma"), from_count, to_count, extra_effect_strict, group, ends_with("nudge_dist")) %>%
    #mutate(label = ifelse(epistatic_gt=="WT",'\u00D8',gsub('_','+\n',epistatic_gt))) %>%
    rowwise() %>% mutate(label = ifelse(epistatic_gt=="WT",'',paste(sapply(strsplit(epistatic_gt, "_")[[1]], function(x) substr(x, 1, 1)), collapse = '+'))) %>% ungroup() %>%
    group_by(gene_set, pairwise_combo) %>% mutate(max_gamma = max(gamma_mle)) %>% ungroup() %>%
    mutate(order = ifelse(group=="Double-mutant", ifelse(gamma_mle == max_gamma, gamma_mle, -max_gamma), -Inf)) %>%
    group_by(gene_set, pairwise_combo) %>% mutate(order = max(order), max_gamma = NULL) %>% ungroup() %>% 
    rowwise() %>% mutate(other_genes = gsub('(__|_)',',<br>',trimws(gsub(mutated_gene, '',gene_set), "right", '_'))) %>% ungroup() %>% 
    arrange(mutated_gene, other_genes, group) %>% mutate_cond(mutated_gene == "SMARCA4" & gene_set == "KRAS_KEAP1_SMARCA4" & group != "WT", label = c('KE+KR','KR'), label_nudge_dist = label_nudge_dist + c(0.15,0.075)) %>%
    #mutate_cond(mutated_gene == "BRCA2" & gene_set == "TP53_KEAP1_BRCA2" & group == "Single-mutant", nudge_dist = c(-0.03,0.03), label_nudge_dist = nudge_dist+c(0.2,0.25)) %>%
    distinct(epistatic_gt, mutated_gene, gene_set, .keep_all = T) # removes duplicate points from TP53_KEAP1_BRCA2 set

p = tmp_df %>%
    ggplot(aes(x=
    reorder(paste0('**',mutated_gene,'**','<br>-------<br>',stringr::str_wrap(other_genes,10)),order), y=gamma_mle)) +
        geom_errorbar(aes(ymin = gamma_ci_low, ymax = gamma_ci_high), position=position_nudge(tmp_df$nudge_dist), width=0) +
        geom_point(aes(fill=group),position=position_nudge(tmp_df$nudge_dist),shape = 21,size=5) +
        geom_text(aes(label=stringr::str_wrap(label,6)), nudge_x=tmp_df$label_nudge_dist, size=6,fontface="italic") +
        #geom_text_repel(aes(label=stringr::str_wrap(label,6)), nudge_x=tmp_df$label_nudge_dist, segment.color = NA, size=5, force=0.5) + #, position=position_nudge(tmp_df$label_nudge_dist), size=5) +
        # scale_x_discrete(guide = guide_axis(n.dodge = 2)) +
        #scale_x_discrete(labels = label_wrap(12)) +
        scale_fill_brewer(palette = "YlGnBu",direction=1) +
        ggbreak::scale_y_break(c(2.1e6, 3e6)) + 
        scale_y_continuous(breaks=c(seq(0,2e6,5e5),3e6), labels=scientific_expr) +
        labs(x="Mutations within neighboring genotypes",y="Scaled selection coefficient") +
        theme_classic() +
        theme(axis.title = element_text(size = 22),
                axis.text = element_text(size = 18),
                axis.text.x = ggtext::element_markdown(size=18,face="italic"),
                axis.text.y.right = element_blank(),
                axis.line.y.right = element_blank(),
                axis.ticks.y.right = element_blank(),
                legend.position = "none")
                #panel.grid.major.x = element_line(color="gray",linewidth=0.75, linetype=3))
p

In [118]:
ggsave(paste0(location_figures, 'GxGxG_smoker_new.png'),p,width = 22, height=5.1)

In [None]:
options(repr.plot.width = 5, repr.plot.height = 5.6)
tmp_df = 
    nonsmoking_plotting_df %>% filter(extra_effect_strict) %>%
    bind_rows(variant_M3_nonsmoking_interaction_df %>% filter(epistatic_gt=="WT") %>% mutate(pairwise_combo = combo_name)) %>%
    group_by(gene_set, pairwise_combo) %>% filter(n()==3) %>% ungroup() %>%
    mutate(group = ifelse(epistatic_gt=="WT","WT", ifelse(str_detect(epistatic_gt, "_"), "Double-mutant","Single-mutant")),
            nudge_dist = ifelse(group=="Double-mutant",0.1,ifelse(group=="WT",-0.1,0)), 
            label_nudge_dist = case_when(
                nudge_dist<0 ~ nudge_dist-0.1,
                nudge_dist==0 ~ 0.2,
                nudge_dist>0 ~ nudge_dist+0.3#+0.27
            )) %>%
    select(gene_set, pairwise_combo, epistatic_gt, mutated_gene, starts_with("gamma"), from_count, to_count, extra_effect_strict, group, ends_with("nudge_dist")) %>%
    #mutate(label = ifelse(epistatic_gt=="WT",'\u00D8',gsub('_','+\n',epistatic_gt))) %>%
    rowwise() %>% mutate(label = ifelse(epistatic_gt=="WT",'',paste(sapply(strsplit(epistatic_gt, "_")[[1]], function(x) substr(x, 1, 1)), collapse = '+'))) %>% ungroup() %>%
    rowwise() %>% mutate(other_genes = gsub('(__|_)',',<br>',trimws(gsub(mutated_gene, '',gene_set), "right", '_'))) %>% ungroup()

p = tmp_df %>%
    ggplot(aes(x=paste0('**',mutated_gene,'**','<br>-------<br>',stringr::str_wrap(other_genes,12)), y=gamma_mle)) +
        geom_errorbar(aes(ymin = gamma_ci_low, ymax = gamma_ci_high), position=position_nudge(tmp_df$nudge_dist), width=0) +
        geom_point(aes(fill=group),position=position_nudge(tmp_df$nudge_dist),shape = 21,size=5) +
        geom_text(aes(label=stringr::str_wrap(label,6)), nudge_x=tmp_df$label_nudge_dist, size=6, fontface="italic") +
        #geom_text_repel(aes(label=stringr::str_wrap(label,6)), nudge_x=tmp_df$label_nudge_dist, segment.color = NA, size=5, force=0.5) + #, position=position_nudge(tmp_df$label_nudge_dist), size=5) +
        # scale_x_discrete(guide = guide_axis(n.dodge = 2)) +
        #scale_x_discrete(labels = label_wrap(12)) +
        scale_fill_brewer(palette = "YlGnBu",direction=1) +
        ggbreak::scale_y_break(c(7e6, 1.9e7)) + 
        scale_y_continuous(breaks=c(seq(0,7e6,2e6),1.9e7), labels=scientific_expr) +
        labs(x=stringr::str_wrap("Mutations within neighboring genotypes",25),y="Scaled selection coefficient") +
        theme_classic() +
        theme(axis.title = element_text(size = 22),
                axis.text.y = element_text(size = 18),
                axis.text.x = ggtext::element_markdown(size=18, face="italic"),
                axis.text.y.right = element_blank(),
                axis.line.y.right = element_blank(),
                axis.ticks.y.right = element_blank(),
                legend.position = "none")
                #panel.grid.major.x = element_line(color="gray",linewidth=0.75, linetype=3))
p

In [121]:
ggsave(paste0(location_figures, 'GxGxG_nonsmoker_new.png'),p,width = 5, height=5.6)

In [None]:
options(repr.plot.width = 8, repr.plot.height = 8)
variant_M3_nonsmoking_interaction_df %>% filter(gene_set == "EGFR_PIK3CA_ARID1A", mutated_gene == "ARID1A") %>% distinct(epistatic_gt, .keep_all = T) %>%
ggplot(aes(x=epistatic_gt, y=gamma_mle)) + 
    geom_point() +
    geom_errorbar(aes(ymin=gamma_ci_low, ymax=gamma_ci_high), width=0) +
    theme_bw()