# Numbers about the switches
## Read data

In [None]:
source("~/smartas/pipeline/scripts/variablesAndFunctions.r")

# read consensus, not noise, switches
switches <- read_tsv("../data/pancancer/candidateList_info.agg.tsv") %>%
    filter(NotNoise==1 & IsModel==1)

# read mutations
wes <- read_tsv("../data//mutations/wes_mutations.txt") %>%
    select(Tumor,Gene,Symbol,Patient) %>%
    unique

wgs <- read_tsv("../data//mutations/wgs_mutations.txt") %>%
    select(Tumor,Gene,Symbol,Patient) %>%
    unique


Attaching package: ‘dplyr’

The following objects are masked from ‘package:plyr’:

    arrange, count, desc, failwith, id, mutate, rename, summarise,
    summarize

The following objects are masked from ‘package:stats’:

    filter, lag

The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union

: 497148 parsing failures.
row col  expected    actual
  1  -- 7 columns 6 columns
  2  -- 7 columns 6 columns
  3  -- 7 columns 6 columns
  4  -- 7 columns 6 columns
  5  -- 7 columns 6 columns
... ... ......... .........
.See problems(...) for more details.

## Some stats about switches

In [None]:
# num switches
nrow(switches)

# num genes
nrow(uniqueswitches[,c('GeneId','Symbol')])

In [None]:
# Numbers
# two protein coding isoforms
sum(switches$CDS_Normal & switches$CDS_Tumor)

# only normal isoform
sum(switches$CDS_Normal & !switches$CDS_Tumor)

# only tumor isoform
sum(!switches$CDS_Normal & switches$CDS_Tumor)

# Percentages
# two protein coding isoforms
sum(switches$CDS_Normal & switches$CDS_Tumor)/nrow(switches) * 100

# only normal isoform
sum(switches$CDS_Normal & !switches$CDS_Tumor)/nrow(switches) * 100

# only tumor isoform
sum(!switches$CDS_Normal & switches$CDS_Tumor)/nrow(switches) * 100

In [None]:
# two protein coding isoforms
# functional
# number
sum(switches$CDS_Normal & switches$CDS_Tumor & switches$IsFunctional)
# percentage
sum(switches$CDS_Normal & switches$CDS_Tumor & switches$IsFunctional)/sum(switches$CDS_Normal & switches$CDS_Tumor) * 100

In [None]:
# unbalance between only niso and only tiso
x <- sum(switches$CDS_Normal & !switches$CDS_Tumor)
n <- sum(!switches$CDS_Normal & switches$CDS_Tumor)+sum(switches$CDS_Normal & !switches$CDS_Tumor)
binom.test(x,n,0.5)

## Stats about patient recurrence
We will use mutation data to put switch data into context.

In [None]:
genesWESMutated <- wes %>%
    group_by(Tumor,Patient) %>%
    summarise(WES=length(Patient))

genesWGSMutated <- wgs %>%
    group_by(Tumor,Patient) %>%
    summarise(WGS=length(Patient))

genesSwitched <- as.data.frame(table(unlist(strsplit(switches$Patients_affected,",")))) %>%
    set_colnames(c("Patient","Switch"))

recurrence <- merge(genesSwitched,genesWESMutated,all=T) %>%
    merge(genesWGSMutated,all=T)

In [None]:
recurrence %>%
    melt %>%
    set_colnames(c("Patient","Tumor","Alteration","Counts")) %>%
    ggplot(aes(x=Tumor,y=log10(Counts), fill=Alteration)) +
    geom_boxplot() + 
    smartas_theme() +
    theme(legend.position="bottom") +
    labs(title="Genes altered by mechanism per patient")