In [3]:
library(dplyr)
library(rio) #great for exporting, importing
library (stringr)
library(cowplot)
library(ggplot2)
library(tidyr)
library(ggpubr)

library(patchwork)
library(RColorBrewer)
library(viridis)

## NOTE: Atlantic expression atlas data was DEseq normalized separating the Stress & Tissue sets to calculate rlogs.

Expression atlas experiments were conducted under 15 h light/9 h dark (light 6 am - 9 pm).

# Stress dataset


In [4]:
Stress<- import('~/Dropbox/Potato/RNA-seq_2/RNAseq_analysis/Data/DEseq/Julia_Data/stresses_with_controls_outfile.csv',
                                 sep =',', header = TRUE, fill = TRUE )

In [5]:
Stress[1:2,]

Unnamed: 0_level_0,V1,MejaControl_R1_ATL_CR,MejaControl_R3_ATL_CT,BTHControl_R2_ATL_CV,BTHControl_R3_ATL_CW,SaltLeaf_R1_ATL_DA,ColdLeafControl_R2_ATL_DH,DroughtLeaf_R2_ATL_DK,DroughtLeaf_R3_ATL_DL,Heat_R1_ATL_DP,⋯,ColdLeafControl_R3_ATL_DI,ColdLeaf_R1_ATL_DS,ColdLeaf_R2_ATL_DT,ColdLeaf_R3_ATL_DU,SaltRoot_R1_ATL_DV,SaltRoot_R3_ATL_DX,DroughtRoot_R1_ATL_DY,DroughtRoot_R2_ATL_DZ,DroughtRoot_R3_ATL_EA,RootControl_R1_ATL_EB
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,Soltu.Atl_v3.09_0G012740.1,-0.559407,-0.6376609,-0.5373148,-0.5119229,-0.3991029,-0.2997879,-0.5891199,-0.4704632,-0.4683695,⋯,-0.4641516,-0.6092329,-0.6068864,-0.6379836,5.425528,4.852719,3.641948,4.943948,4.548304,4.591865
2,Soltu.Atl_v3.10_2G013020.1,-2.189428,-2.1952106,-2.1877928,-2.1859099,-2.1786193,-2.1729549,-2.191625,-2.1828254,-2.1826692,⋯,-2.1823545,-2.1931109,-2.1929376,-2.1952344,-2.201074,-2.203791,-2.201255,-2.066951,-2.198884,-2.067517


In [6]:


Stress_names <- colnames(Stress)
Stress_names

In [7]:
Stress_names <- str_sub(Stress_names, 1, -8)

In [30]:
Stress_names

In [9]:
Stress_names <- Stress_names[-1]
Stress_names[1:2]

In [10]:
Stress_names <- c("GeneID", Stress_names)
Stress_names[1:2]

In [11]:
colnames(Stress) <- Stress_names
Stress[1:2,]

Unnamed: 0_level_0,GeneID,MejaControl_R1,MejaControl_R3,BTHControl_R2,BTHControl_R3,SaltLeaf_R1,ColdLeafControl_R2,DroughtLeaf_R2,DroughtLeaf_R3,Heat_R1,⋯,ColdLeafControl_R3,ColdLeaf_R1,ColdLeaf_R2,ColdLeaf_R3,SaltRoot_R1,SaltRoot_R3,DroughtRoot_R1,DroughtRoot_R2,DroughtRoot_R3,RootControl_R1
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,Soltu.Atl_v3.09_0G012740.1,-0.559407,-0.6376609,-0.5373148,-0.5119229,-0.3991029,-0.2997879,-0.5891199,-0.4704632,-0.4683695,⋯,-0.4641516,-0.6092329,-0.6068864,-0.6379836,5.425528,4.852719,3.641948,4.943948,4.548304,4.591865
2,Soltu.Atl_v3.10_2G013020.1,-2.189428,-2.1952106,-2.1877928,-2.1859099,-2.1786193,-2.1729549,-2.191625,-2.1828254,-2.1826692,⋯,-2.1823545,-2.1931109,-2.1929376,-2.1952344,-2.201074,-2.203791,-2.201255,-2.066951,-2.198884,-2.067517


In [12]:

Stress_long<- Stress %>%  pivot_longer(!GeneID, names_to = c('Condition', 'Replicate'),
                                          names_sep = '_',
                                      values_to = 'rlog')
Stress_long[1:2,]

GeneID,Condition,Replicate,rlog
<chr>,<chr>,<chr>,<dbl>
Soltu.Atl_v3.09_0G012740.1,MejaControl,R1,-0.559407
Soltu.Atl_v3.09_0G012740.1,MejaControl,R3,-0.6376609


### Calculate average expression

In [13]:
Stress_long_av<- Stress_long %>% group_by(GeneID, Condition) %>% summarize (mean_expression = mean(rlog), std = sd(rlog))

[1m[22m`summarise()` has grouped output by 'GeneID'. You can override
using the `.groups` argument.


In [14]:
Stress_long_av[1:2,]

GeneID,Condition,mean_expression,std
<chr>,<chr>,<dbl>,<dbl>
Soltu.Atl_v3.01_0G000010.1,BTH,0.54395137,0.861126
Soltu.Atl_v3.01_0G000010.1,BTHControl,0.03311851,0.5498508


### Select only expressed genes = max mean rlog > 0


In [15]:
nrow(Stress_long_av)
Stress_long_av %>% select(GeneID) %>% unique() %>% nrow()
Stress_long_av_expr <- Stress_long_av %>% group_by(GeneID) %>% mutate (MaxExpression = max(mean_expression)) %>% 
                ungroup() %>% filter(MaxExpression > 0)
nrow(Stress_long_av_expr)
Stress_long_av_expr %>% select(GeneID) %>% unique()%>% nrow()

In [16]:
Stress_long_av_expr[1:2,]


GeneID,Condition,mean_expression,std,MaxExpression
<chr>,<chr>,<dbl>,<dbl>,<dbl>
Soltu.Atl_v3.01_0G000010.1,BTH,0.54395137,0.861126,0.5439514
Soltu.Atl_v3.01_0G000010.1,BTHControl,0.03311851,0.5498508,0.5439514


In [17]:
export(Stress_long_av_expr, file = './Results/Stress_long_av_expressed.csv')

# Tissue dataset

In [18]:
Tissue<- import('~/Dropbox/Potato/RNA-seq_2/RNAseq_analysis/Data/DEseq/Julia_Data/tissues_without_tc_outfile.csv',
                                 sep =',', header = TRUE, fill = TRUE )

In [19]:

Tissue_names <- colnames(Tissue)
Tissue_names

In [20]:
Tissue_names <- str_sub(Tissue_names, 1, -8)

In [21]:
Tissue_names

In [22]:
Tissue_names <- Tissue_names[-1]
Tissue_names[1:2]

In [23]:
Tissue_names <- c("GeneID", Tissue_names)
Tissue_names[1:2]

In [24]:
colnames(Tissue) <- Tissue_names
Tissue[1:2,]

Unnamed: 0_level_0,GeneID,TuberS4_R1,TuberS4_R2,TuberS4_R3,TuberS3_R1,TuberS3_R2,TuberS3_R3,TuberS1_R1,TuberS1_R2,TuberS1_R3,⋯,TuberS5_R1,TuberS5_R2,TuberS5_R3,TuberS2_R2,TuberS2_R3,OpenFlower_R2,StemControl_R1,ColdLeafControl_R1,ColdLeafControl_R3,RootControl_R1
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,Soltu.Atl_v3.09_0G012740.1,-1.150968,0.04301417,-0.8950817,-1.236144,-1.23256,-1.225649,-1.277885,0.5096579,-0.05929033,⋯,-1.037691,-1.095777,0.5993646,0.482347,-1.107616,1.675304,1.515202,-0.8991277,-0.889735,5.318072
2,Soltu.Atl_v3.10_2G013020.1,-1.881905,-1.86632902,-1.8337444,-1.900255,-1.89887,-1.896208,-1.916686,-1.6341077,-1.88348821,⋯,-1.860585,-1.871518,-1.8686471,-1.885423,-1.873746,-1.781673,-1.890906,-1.8345059,-1.832738,-1.406416


In [25]:

Tissue_long<- Tissue %>%  pivot_longer(!GeneID, names_to = c('Condition', 'Replicate'),
                                          names_sep = '_',
                                      values_to = 'rlog')
Tissue_long[1:2,]

GeneID,Condition,Replicate,rlog
<chr>,<chr>,<chr>,<dbl>
Soltu.Atl_v3.09_0G012740.1,TuberS4,R1,-1.15096827
Soltu.Atl_v3.09_0G012740.1,TuberS4,R2,0.04301417


### Calculate average expression

In [26]:
Tissue_long_av<- Tissue_long %>% group_by(GeneID, Condition) %>% summarize (mean_expression = mean(rlog), std = sd(rlog))

[1m[22m`summarise()` has grouped output by 'GeneID'. You can override
using the `.groups` argument.


In [27]:
Tissue_long_av[1:2,]

GeneID,Condition,mean_expression,std
<chr>,<chr>,<dbl>,<dbl>
Soltu.Atl_v3.01_0G000010.1,ClosedFlower,1.52559678,0.07353294
Soltu.Atl_v3.01_0G000010.1,ColdLeafControl,0.07708519,0.47580094


### Get only 'expressed' genes, eg max mean expression > 0

In [28]:
nrow(Tissue_long_av)
Tissue_long_av %>% select(GeneID) %>% unique() %>% nrow()
Tissue_long_av_expr <- Tissue_long_av %>% group_by(GeneID) %>% mutate (MaxExpression = max(mean_expression)) %>% 
                ungroup() %>% filter(MaxExpression > 0)
nrow(Tissue_long_av_expr)
Tissue_long_av_expr %>% select(GeneID) %>% unique()%>% nrow()

In [29]:
Tissue_long_av_expr[1:2,]

GeneID,Condition,mean_expression,std,MaxExpression
<chr>,<chr>,<dbl>,<dbl>,<dbl>
Soltu.Atl_v3.01_0G000010.1,ClosedFlower,1.52559678,0.07353294,1.877467
Soltu.Atl_v3.01_0G000010.1,ColdLeafControl,0.07708519,0.47580094,1.877467


In [31]:
export(Tissue_long_av_expr, file = './Results/Tissue_long_av_expressed.csv')