In [None]:
library(dplyr)
library(rio) #great for exporting, importing
library (stringr)
library(cowplot)
library(tidyr)
library(ggpubr)

library(readxl)
library(patchwork)
library(RColorBrewer)
library(viridis)
library(ggvenn)
library(gridExtra)

Original data files are from the seiko server: 

/home/farre/Ann/RNAseq/combined_files_for_PCA/within_species/diel/chc_leaf_diel_outfile.csv 

/home/farre/Ann/RNAseq/combined_files_for_PCA/within_species/diel/cnd_leaf_diel_outfile.csv 

/home/farre/Ann/RNAseq/combined_files_for_PCA/within_species/diel/atl_leaf_diel_outfile.csv 

NOTE:

CHC long day data: 11PM (ZT16) and 1 AM (ZT18) sets are switched

In [2]:
setwd("~/Dropbox/Potato/RNA-seq_2/RNAseq_analysis/Data/DEseq/combined_files_for_PCA/within_species")
list.files()

In [3]:
## Make list of files to import
#:::::::::::::::::::::::::::::::

#Make list of file names of CND and M6 data
files_list <-  c('chc_leaf_diel_outfile.csv','cnd_leaf_diel_outfile.csv','atl_leaf_diel_outfile.csv')
files_list
length(files_list)

In [4]:
#Import files as a list of dataframes
dataframes<-lapply(1:length(files_list), function(i) { i <-  import(file = files_list[[i]], 
                                                         sep =',', header = TRUE, fill = TRUE )})

In [5]:
dataframes[[2]][1:5,]

Unnamed: 0_level_0,V1,Leaf_7AM_start_R1_NCC_AA_shd,Leaf_7AM_start_R2_NCC_AB_shd,Leaf_7AM_start_R3_NCC_AC_shd,Leaf_9AM_R1_NCC_AG_shd,Leaf_9AM_R2_NCC_AH_shd,Leaf_9AM_R3_NCC_AI_shd,Leaf_11AM_R1_NCC_AM_shd,Leaf_11AM_R2_NCC_AN_shd,Leaf_11AM_R3_NCC_AO_shd,⋯,Leaf_1AM_9_15_22_R3_NCC_KP_lgd,Leaf_3AM_9_15_22_R1_NCC_KQ_lgd,Leaf_3AM_9_15_22_R2_NCC_KR_lgd,Leaf_3AM_9_15_22_R3_NCC_KS_lgd,Leaf_5AM_9_15_22_R1_NCC_KT_lgd,Leaf_5AM_9_15_22_R2_NCC_KU_lgd,Leaf_5AM_9_15_22_R3_NCC_KV_lgd,Leaf_7AM_9_15_22_R1_NCC_KW_lgd,Leaf_7AM_9_15_22_R2_NCC_KX_lgd,Leaf_7AM_9_15_22_R3_NCC_KY_lgd
Unnamed: 0_level_1,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,Solca.09G003510.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,⋯,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Solca.03G011140.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,⋯,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,Solca.06G027400.1,10.659238,10.534076,10.521902,10.605352,10.543568,10.646636,10.708969,10.614328,10.609214,⋯,10.713415,10.81244,10.646494,10.681782,10.687877,10.55661,10.729127,10.608727,10.66484,10.50478
4,Solca.06G000450.1,12.790128,12.920987,12.829115,12.674946,12.794572,12.717537,12.768035,12.598848,12.760681,⋯,12.910751,13.081362,13.131436,13.000241,13.345211,13.297529,13.425063,13.322365,13.181478,13.37196
5,Solca.01G002690.1,7.858792,7.475328,7.887766,7.451636,7.351373,7.631687,7.330379,7.527196,7.666659,⋯,7.587326,7.712444,7.445591,7.669178,7.332651,7.356843,7.414614,7.407218,7.357386,7.68489


In [6]:
colnames(dataframes[[1]])[1] 

In [7]:
colnames(dataframes[[3]])[1] 

In [8]:
for (i in 1:length(files_list))
  colnames(dataframes[[i]])[1] <- 'geneID'

In [9]:
colnames(dataframes[[2]])

In [10]:
ncol(dataframes[[2]])

### NOTE: need to fix the M6 long day 11 pm and 1 am time points

In [11]:
grep('Leaf_11PM_9_14_22_R1_NCC_KK_lgd', colnames(dataframes[[2]]))
grep('Leaf_11PM_9_14_22_R3_NCC_KM_lgd', colnames(dataframes[[2]]))
grep('Leaf_1AM_9_15_22_R1_NCC_KN_lgd', colnames(dataframes[[2]]))
grep('Leaf_1AM_9_15_22_R2_NCC_KO_lgd', colnames(dataframes[[2]]))
grep('Leaf_1AM_9_15_22_R3_NCC_KP_lgd', colnames(dataframes[[2]]))



In [12]:
#Change the time and dates
#:::::::::::::::::::::::::::::
colnames(dataframes[[2]][63]) <- 'Leaf_1AM_9_15_22_R1_NCC_KK_lgd'
colnames(dataframes[[2]][64]) <- 'Leaf_1AM_9_15_22_R3_NCC_KM_lgd'
colnames(dataframes[[2]][65]) <- 'Leaf_11PM_9_14_22_R1_NCC_KN_lgd'
colnames(dataframes[[2]][66]) <- 'Leaf_11PM_9_14_22_R2_NCC_KO_lgd'
colnames(dataframes[[2]][67]) <- 'Leaf_11PM_9_14_22_R3_NCC_KP_lgd'

In [13]:
dataframes[[2]] %>% select(contains(c('_1AM', '_11PM'))) %>% head() 

Unnamed: 0_level_0,Leaf_1AM_R1_NCC_CC_shd,Leaf_1AM_R2_NCC_CD_shd,Leaf_1AM_R3_NCC_CE_shd,Leaf_1AM_9_15_22_R1_NCC_KN_lgd,Leaf_1AM_9_15_22_R2_NCC_KO_lgd,Leaf_1AM_9_15_22_R3_NCC_KP_lgd,Leaf_11PM_R2_NCC_BX_shd,Leaf_11PM_R3_NCC_BY_shd,Leaf_11PM_9_14_22_R1_NCC_KK_lgd,Leaf_11PM_9_14_22_R3_NCC_KM_lgd
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,10.678353,10.808532,10.721815,10.676241,10.745977,10.713415,10.791129,10.982089,10.554355,10.586426
4,13.254997,13.007953,12.822098,12.894443,12.735986,12.910751,12.69893,12.764254,12.904739,12.926727
5,7.791589,7.567323,7.341418,7.297013,7.781469,7.587326,7.954517,7.426071,7.460753,7.476845
6,8.98241,9.668315,9.797556,10.464474,10.222388,10.719239,9.544242,9.122572,10.410422,9.94584


### Modifications to pivot longer

Change time to ZT, taking into account the issue with the 7AM and 9AM time points 

In [14]:
for (i in 1:length(files_list)) {
 
    names(dataframes[[i]]) <- sub("7AM_start_", "ZT0_", names(dataframes[[i]]))
    names(dataframes[[i]]) <- sub("7AM_end_", "ZT24_", names(dataframes[[i]]))
    names(dataframes[[i]]) <- sub("7am_start_", "ZT0_", names(dataframes[[i]]))
    names(dataframes[[i]]) <- sub("7am_end_", "ZT24_", names(dataframes[[i]]))
    names(dataframes[[i]]) <- sub("7AM_10_26_22_", "ZT0_", names(dataframes[[i]]))
    names(dataframes[[i]]) <- sub("7AM_10_27_22_", "ZT24_", names(dataframes[[i]]))
    names(dataframes[[i]]) <- sub("7AM_9_14_22_", "ZT0_", names(dataframes[[i]]))
    names(dataframes[[i]]) <- sub("7AM_9_15_22_", "ZT24_", names(dataframes[[i]]))
    names(dataframes[[i]]) <- sub("7AM_8_2_22_", "ZT0_", names(dataframes[[i]]))
    names(dataframes[[i]]) <- sub("7AM_8_3_22_", "ZT24_", names(dataframes[[i]]))
     

    
     


     }
     
     
     
     
     
     
     
     
     
     
     
     

#### NOT NEEDED FOR DIEL DATA
#::::::::::::::::::::::::::::::::     

#For CND LL data
     
    names(dataframes[[i]]) <- sub("7PM_10_4_21_", "ZT12_", names(dataframes[[i]]))
    names(dataframes[[i]]) <- sub("9PM_10_4_21_", "ZT14_", names(dataframes[[i]]))
     
 names(dataframes[[i]]) <- sub("_11PM_10_4_21_", "_ZT16_", names(dataframes[[i]]))
    names(dataframes[[i]]) <- sub("_1AM_10_5_21_", "_ZT18_", names(dataframes[[i]]))
      names(dataframes[[i]]) <- sub("3AM_10_5_21_", "ZT20_", names(dataframes[[i]]))
      names(dataframes[[i]]) <- sub("5AM_10_5_21_", "ZT22_", names(dataframes[[i]]))
      names(dataframes[[i]]) <- sub("7AM_10_5_21_", "ZT24_", names(dataframes[[i]]))
         names(dataframes[[i]]) <- sub("9AM_10_5_21_", "ZT26_", names(dataframes[[i]]))
      names(dataframes[[i]]) <- sub("_11AM_10_5_21_", "_ZT28_", names(dataframes[[i]]))
      names(dataframes[[i]]) <- sub("_1PM_10_5_21_", "_ZT30_", names(dataframes[[i]]))
      names(dataframes[[i]]) <- sub("3PM_10_5_21_", "ZT32_", names(dataframes[[i]]))
         names(dataframes[[i]]) <- sub("5PM_10_5_21_", "ZT34_", names(dataframes[[i]]))
         names(dataframes[[i]]) <- sub("7PM_10_5_21_", "ZT36_", names(dataframes[[i]]))    
     names(dataframes[[i]]) <- sub("9PM_10_5_21_", "ZT38_", names(dataframes[[i]]))
        names(dataframes[[i]]) <- sub("_11PM_10_5_21_", "_ZT40_", names(dataframes[[i]]))
            names(dataframes[[i]]) <- sub("1AM_10_6_21_", "ZT42_", names(dataframes[[i]]))
       names(dataframes[[i]]) <- sub("3AM_10_6_21_", "ZT44_", names(dataframes[[i]]))
       names(dataframes[[i]]) <- sub("5AM_10_6_21_", "ZT46_", names(dataframes[[i]]))
       names(dataframes[[i]]) <- sub("7AM_10_6_21_", "ZT48_", names(dataframes[[i]]))
      names(dataframes[[i]]) <- sub("9AM_10_6_21_", "ZT50_", names(dataframes[[i]]))
     names(dataframes[[i]]) <- sub("_11AM_10_6_21_", "_ZT52_", names(dataframes[[i]]))
      names(dataframes[[i]]) <- sub("_1PM_10_6_21_", "_ZT54_", names(dataframes[[i]]))
     names(dataframes[[i]]) <- sub("_3PM_10_6_21_", "_ZT56_", names(dataframes[[i]]))
     names(dataframes[[i]]) <- sub("_5PM_10_6_21_", "_ZT58_", names(dataframes[[i]]))

#### NOT NEEDED FOR DIEL DATA
#::::::::::::::::::::::::::::::::

for (i in 1:length(files_list)) {

     
#For M6 LL data
     
    names(dataframes[[i]]) <- sub("7PM_7_6_21_", "ZT12_", names(dataframes[[i]]))
    names(dataframes[[i]]) <- sub("9PM_7_6_21_", "ZT14_", names(dataframes[[i]]))
     
 names(dataframes[[i]]) <- sub("_11PM_7_6_21_", "_ZT16_", names(dataframes[[i]]))
    names(dataframes[[i]]) <- sub("_1AM_7_7_21_", "_ZT18_", names(dataframes[[i]]))
      names(dataframes[[i]]) <- sub("3AM_7_7_21_", "ZT20_", names(dataframes[[i]]))
      names(dataframes[[i]]) <- sub("5AM_7_7_21_", "ZT22_", names(dataframes[[i]]))
      names(dataframes[[i]]) <- sub("7AM_7_7_21_", "ZT24_", names(dataframes[[i]]))
         names(dataframes[[i]]) <- sub("9AM_7_7_21_", "ZT26_", names(dataframes[[i]]))
      names(dataframes[[i]]) <- sub("_11AM_7_7_21_", "_ZT28_", names(dataframes[[i]]))
      names(dataframes[[i]]) <- sub("_1PM_7_7_21_", "_ZT30_", names(dataframes[[i]]))
      names(dataframes[[i]]) <- sub("3PM_7_7_21_", "ZT32_", names(dataframes[[i]]))
         names(dataframes[[i]]) <- sub("5PM_7_7_21_", "ZT34_", names(dataframes[[i]]))
         names(dataframes[[i]]) <- sub("7PM_7_7_21_", "ZT36_", names(dataframes[[i]]))    
     names(dataframes[[i]]) <- sub("9PM_7_7_21_", "ZT38_", names(dataframes[[i]]))
        names(dataframes[[i]]) <- sub("_11PM_7_7_21_", "_ZT40_", names(dataframes[[i]]))
            names(dataframes[[i]]) <- sub("_1AM_7_8_21_", "_ZT42_", names(dataframes[[i]]))
       names(dataframes[[i]]) <- sub("3AM_7_8_21_", "ZT44_", names(dataframes[[i]]))
       names(dataframes[[i]]) <- sub("5AM_7_8_21_", "ZT46_", names(dataframes[[i]]))
       names(dataframes[[i]]) <- sub("7AM_7_8_21_", "ZT48_", names(dataframes[[i]]))
     names(dataframes[[i]]) <- sub("9AM_7_8_21_", "ZT50_", names(dataframes[[i]]))
     names(dataframes[[i]]) <- sub("_11AM_7_8_21_", "_ZT52_", names(dataframes[[i]]))
      names(dataframes[[i]]) <- sub("_1PM_7_8_21_", "_ZT54_", names(dataframes[[i]]))
     names(dataframes[[i]]) <- sub("_3PM_7_8_21_", "_ZT56_", names(dataframes[[i]]))
     names(dataframes[[i]]) <- sub("_5PM_7_8_21_", "_ZT58_", names(dataframes[[i]]))
    
     


     }
     


#### NOT NEEDED FOR DIEL DATA
#::::::::::::::::::::::::::::::::


for (i in 1:length(files_list)) {

     
#For ATL LL data
     
    names(dataframes[[i]]) <- sub("7PM_12_16_20_", "ZT12_", names(dataframes[[i]]))
    names(dataframes[[i]]) <- sub("9PM_12_16_20_", "ZT14_", names(dataframes[[i]]))
     
 names(dataframes[[i]]) <- sub("_11PM_12_16_20_", "_ZT16_", names(dataframes[[i]]))
    names(dataframes[[i]]) <- sub("_1AM_12_17_20_", "_ZT18_", names(dataframes[[i]]))
      names(dataframes[[i]]) <- sub("3AM_12_17_20_", "ZT20_", names(dataframes[[i]]))
      names(dataframes[[i]]) <- sub("5AM_12_17_20_", "ZT22_", names(dataframes[[i]]))
      names(dataframes[[i]]) <- sub("7AM_12_17_20_", "ZT24_", names(dataframes[[i]]))
         names(dataframes[[i]]) <- sub("9AM_12_17_20_", "ZT26_", names(dataframes[[i]]))
      names(dataframes[[i]]) <- sub("_11AM_12_17_20_", "_ZT28_", names(dataframes[[i]]))
      names(dataframes[[i]]) <- sub("_1PM_12_17_20_", "_ZT30_", names(dataframes[[i]]))
      names(dataframes[[i]]) <- sub("3PM_12_17_20_", "ZT32_", names(dataframes[[i]]))
         names(dataframes[[i]]) <- sub("5PM_12_17_20_", "ZT34_", names(dataframes[[i]]))
         names(dataframes[[i]]) <- sub("7PM_12_17_20_", "ZT36_", names(dataframes[[i]]))    
     names(dataframes[[i]]) <- sub("9PM_12_17_20_", "ZT38_", names(dataframes[[i]]))
        names(dataframes[[i]]) <- sub("_11PM_12_17_20_", "_ZT40_", names(dataframes[[i]]))
            names(dataframes[[i]]) <- sub("_1AM_12_18_20_", "_ZT42_", names(dataframes[[i]]))
       names(dataframes[[i]]) <- sub("3AM_12_18_20_", "ZT44_", names(dataframes[[i]]))
       names(dataframes[[i]]) <- sub("5AM_12_18_20_", "ZT46_", names(dataframes[[i]]))
       names(dataframes[[i]]) <- sub("7AM_12_18_20_", "ZT48_", names(dataframes[[i]]))
     names(dataframes[[i]]) <- sub("9AM_12_18_20_", "ZT50_", names(dataframes[[i]]))
     names(dataframes[[i]]) <- sub("_11AM_12_18_20_", "_ZT52_", names(dataframes[[i]]))
      names(dataframes[[i]]) <- sub("_1PM_12_18_20_", "_ZT54_", names(dataframes[[i]]))
     names(dataframes[[i]]) <- sub("_3PM_12_18_20_", "_ZT56_", names(dataframes[[i]]))
     names(dataframes[[i]]) <- sub("_5PM_12_18_20_", "_ZT58_", names(dataframes[[i]]))
    
     


     }
     

In [15]:
for (i in 1:length(files_list)) {
names(dataframes[[i]]) <- sub("10_26_22_", "", names(dataframes[[i]]))
names(dataframes[[i]]) <- sub("10_27_22_", "", names(dataframes[[i]]))
names(dataframes[[i]]) <- sub("9_14_22_", "", names(dataframes[[i]]))
names(dataframes[[i]]) <- sub("9_15_22_", "", names(dataframes[[i]]))
names(dataframes[[i]]) <- sub("8_2_22_", "", names(dataframes[[i]]))
names(dataframes[[i]]) <- sub("8_3_22_", "", names(dataframes[[i]]))
    }

In [16]:
for (i in 1:length(files_list)) {
names(dataframes[[i]]) <- sub("_9am_", "_ZT2_", names(dataframes[[i]]))
names(dataframes[[i]]) <- sub("_9AM_", "_ZT2_", names(dataframes[[i]]))
names(dataframes[[i]]) <- sub("_11am_", "_ZT4_", names(dataframes[[i]]))
names(dataframes[[i]]) <- sub("_11AM_", "_ZT4_", names(dataframes[[i]]))
names(dataframes[[i]]) <- sub("_1pm_", "_ZT6_", names(dataframes[[i]]))
names(dataframes[[i]]) <- sub("_1PM_", "_ZT6_", names(dataframes[[i]]))
names(dataframes[[i]]) <- sub("_3pm_", "_ZT8_", names(dataframes[[i]]))
names(dataframes[[i]]) <- sub("_3PM_", "_ZT8_", names(dataframes[[i]]))
names(dataframes[[i]]) <- sub("_5pm_", "_ZT10_", names(dataframes[[i]]))
names(dataframes[[i]]) <- sub("_5PM_", "_ZT10_", names(dataframes[[i]]))
names(dataframes[[i]]) <- sub("_7pm_", "_ZT12_", names(dataframes[[i]]))
names(dataframes[[i]]) <- sub("_7PM_", "_ZT12_", names(dataframes[[i]]))
names(dataframes[[i]]) <- sub("_9pm_", "_ZT14_", names(dataframes[[i]]))
names(dataframes[[i]]) <- sub("_9PM_", "_ZT14_", names(dataframes[[i]]))
names(dataframes[[i]]) <- sub("_11pm_", "_ZT16_", names(dataframes[[i]]))
names(dataframes[[i]]) <- sub("_11PM_", "_ZT16_", names(dataframes[[i]]))
names(dataframes[[i]]) <- sub("_1am_", "_ZT18_", names(dataframes[[i]]))
names(dataframes[[i]]) <- sub("_1AM_", "_ZT18_", names(dataframes[[i]]))
names(dataframes[[i]]) <- sub("_3am_", "_ZT20_", names(dataframes[[i]]))
names(dataframes[[i]]) <- sub("_3AM_", "_ZT20_", names(dataframes[[i]]))
names(dataframes[[i]]) <- sub("_5am_", "_ZT22_", names(dataframes[[i]]))
names(dataframes[[i]]) <- sub("_5AM_", "_ZT22_", names(dataframes[[i]]))
names(dataframes[[i]]) <- sub("_7am_", "_ZT24_", names(dataframes[[i]]))
names(dataframes[[i]]) <- sub("_7AM_", "_ZT24_", names(dataframes[[i]]))
    }

In [17]:
colnames(dataframes[[3]])

In [18]:


dataframes_long<- lapply(seq_along(dataframes), function (i){

                        dataframes[[i]] %>% pivot_longer (cols= -c(geneID),
                                                            names_to = c('Tissue', 'ZT', 
                                                                          'Replicate',
                                                                          'Sample1',
                                                                         'Sample2',
                                                                          'Condition'
                                                                         ),
                                                             names_sep = "[_]",
                                                           # names_prefix = c('ZT'), #can only be used for one column
                                                            #names_transform = list(ZT = as.integer),
                                                             values_to = 'Expression' )}
                         )

                                                                       
head(dataframes_long[[1]][1:2,])




geneID,Tissue,ZT,Replicate,Sample1,Sample2,Condition,Expression
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>
Solch.M6.05G010700.1,Leaf,ZT0,R1,NCM,AA,shd,10.57672
Solch.M6.05G010700.1,Leaf,ZT0,R2,NCM,AB,shd,10.37314


In [19]:
head(dataframes_long[[3]][1:2,])

geneID,Tissue,ZT,Replicate,Sample1,Sample2,Condition,Expression
<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>
Soltu.Atl_v3.04_1G015590.2,Leaf,ZT0,R1,NCA,AA,shd,10.52359
Soltu.Atl_v3.04_1G015590.2,Leaf,ZT0,R2,NCA,AB,shd,10.45528


## Select only 'expressed' genes and calculate average and standard deviation per time point

In [26]:
AvSD_function <- function(df) {
                    df %>% group_by(geneID, Tissue, Condition) %>% filter(max(Expression) >= 0)%>%  #Select genes in which at least one sample has 
                                                                            #rlog >=0
                                    ungroup() %>% 
                                group_by(geneID, ZT, Tissue, Condition) %>%
                                 dplyr::summarize (mean_expression = mean(Expression), 
                                                    std = sd(Expression) ) %>% ungroup()
                               }


In [27]:
dataframes_AvSd<- lapply(dataframes_long, AvSD_function)

[1m[22m`summarise()` has grouped output by 'geneID', 'ZT', 'Tissue'. You can override
using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'geneID', 'ZT', 'Tissue'. You can override
using the `.groups` argument.
[1m[22m`summarise()` has grouped output by 'geneID', 'ZT', 'Tissue'. You can override
using the `.groups` argument.


In [28]:
head(dataframes_AvSd[[3]][1:2,])

geneID,ZT,Tissue,Condition,mean_expression,std
<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>
Soltu.Atl_v3.01_0G000010.1,ZT0,Leaf,lgd,1.1317273,0.31389706
Soltu.Atl_v3.01_0G000010.1,ZT0,Leaf,shd,0.9914389,0.01607945


## Change format (ZT, Condition) and add target_id column without transcript version

In [29]:


myfunction <- function(df) {
                    
                    df$ZT<- gsub("ZT", "", df$ZT)
                  
                    df$ZT<- as.numeric(df$ZT)
        
                  df$Condition<- gsub("shd", "SD", df$Condition) 
                    df$Condition<- gsub("lgd", "LD", df$Condition)
    
    
                df$target_id <- lapply(df$geneID,
                             function (i){if (str_sub(i,-2,-2) == ".")
                                             { i<- str_sub(i, end = -3)} # i.e. the last character will be -3 position
                                          else if ((str_sub(i,-3,-3) == "."))
                                               { i<- str_sub(i, end = -4)} # i.e. the last character will be -4 position
                                        else {i<-i}
                                          
                                        })
                ## NOTE MAKES THE COLLUMN A LIST

                df$target_id <- as.character(df$target_id)
    
              return(df) #this is to return the full dataframe and not just the last function
    
    }




In [30]:
dataframes_AvSd <- lapply(dataframes_AvSd, myfunction)

In [31]:
head(dataframes_AvSd[[1]])

geneID,ZT,Tissue,Condition,mean_expression,std,target_id
<chr>,<dbl>,<chr>,<chr>,<dbl>,<dbl>,<chr>
Solch.M6.01G000010.3,0,Leaf,LD,6.814213,0.22542011,Solch.M6.01G000010
Solch.M6.01G000010.3,0,Leaf,SD,6.860834,0.26774006,Solch.M6.01G000010
Solch.M6.01G000010.3,10,Leaf,LD,6.773047,0.18581551,Solch.M6.01G000010
Solch.M6.01G000010.3,10,Leaf,SD,6.819131,0.02382822,Solch.M6.01G000010
Solch.M6.01G000010.3,12,Leaf,LD,6.878922,0.2699038,Solch.M6.01G000010
Solch.M6.01G000010.3,12,Leaf,SD,6.555714,0.09369649,Solch.M6.01G000010


In [32]:
dataframes_AvSd[[1]]$Genotype <- 'CHC'
dataframes_AvSd[[2]]$Genotype <- 'CND'
dataframes_AvSd[[3]]$Genotype <- 'ATL'



In [33]:
head(dataframes_AvSd[[1]])

geneID,ZT,Tissue,Condition,mean_expression,std,target_id,Genotype
<chr>,<dbl>,<chr>,<chr>,<dbl>,<dbl>,<chr>,<chr>
Solch.M6.01G000010.3,0,Leaf,LD,6.814213,0.22542011,Solch.M6.01G000010,CHC
Solch.M6.01G000010.3,0,Leaf,SD,6.860834,0.26774006,Solch.M6.01G000010,CHC
Solch.M6.01G000010.3,10,Leaf,LD,6.773047,0.18581551,Solch.M6.01G000010,CHC
Solch.M6.01G000010.3,10,Leaf,SD,6.819131,0.02382822,Solch.M6.01G000010,CHC
Solch.M6.01G000010.3,12,Leaf,LD,6.878922,0.2699038,Solch.M6.01G000010,CHC
Solch.M6.01G000010.3,12,Leaf,SD,6.555714,0.09369649,Solch.M6.01G000010,CHC


In [34]:
getwd()

In [35]:
setwd("~/Dropbox/Potato/RNA-seq_2/RNAseq_analysis/ATL_CND_M6_comparisons/01_Data_normalization")



In [36]:
export(dataframes_AvSd[[1]],
       './Results/CHC_diel_leaf_avsd_expressed.csv', sep = ',')

In [37]:
export(dataframes_AvSd[[2]],
       './Results/CND_diel_leaf_avsd_expressed.csv', sep = ',')

In [38]:
export(dataframes_AvSd[[3]],
       './Results/ATL_diel_leaf_avsd_expressed.csv', sep = ',')