In [2]:
library(dplyr)
library(rio) #great for exporting, importing
library (stringr)
library(cowplot)
library(tidyr)
library(ggpubr)

library(readxl)
library(patchwork)
library(RColorBrewer)
library(viridis)
library(ggvenn)
library(gridExtra)

In [3]:
sessionInfo()

R version 4.3.1 (2023-06-16)
Platform: x86_64-apple-darwin13.4.0 (64-bit)
Running under: macOS Big Sur ... 10.16

Matrix products: default
BLAS/LAPACK: /Users/Eva/opt/anaconda3/envs/R4b/lib/libopenblasp-r0.3.26.dylib;  LAPACK version 3.12.0

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: America/New_York
tzcode source: system (macOS)

attached base packages:
[1] grid      stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] gridExtra_2.3      ggvenn_0.1.10      viridis_0.6.4      viridisLite_0.4.2 
 [5] RColorBrewer_1.1-3 patchwork_1.1.2    readxl_1.4.3       ggpubr_0.6.0      
 [9] ggplot2_3.5.0      tidyr_1.3.1        cowplot_1.1.1      stringr_1.5.1     
[13] rio_1.0.1          dplyr_1.1.4       

loaded via a namespace (and not attached):
 [1] utf8_1.2.4       generics_0.1.3   rstatix_0.7.2    stringi_1.8.3   
 [5] digest_0.6.35    magrittr_2.0.3   evaluate_0.23    pbdZMQ_0.3-11   
 [9] fastm

In [5]:
# IMPORT EXPRESSED GENE DATA
#::::::::::::::
ExprGenes<- import(file = '../ATL_diel_norm_tables_rlog/Results/ATL_diel_norm_expressed.csv',
                 sep =',', header = TRUE, fill = TRUE )

In [6]:

#Get syntelog/allelic groups file
SYN<- import(file = '~/Dropbox/Potato/RNA-seq_2/RNAseq_analysis/Data/GeneLists/Syntelogs_Ann/syntelogs_atl_only.csv', sep =',', header = TRUE, fill = TRUE )


In [7]:
# Add column with target_id, no transcript information. The allelic group data, has some of different gene models. 

## REMOVE transcript version for versions from .1-.99 

SYN$target_id <- SYN$geneID
SYN$target_id<-lapply(SYN$target_id,
                             function (i){if (str_sub(i,-2,-2) == ".")
                                             { i<- str_sub(i, end = -3)} # i.e. the last character will be -3 position
                                          else if ((str_sub(i,-3,-3) == "."))
                                               { i<- str_sub(i, end = -4)} # i.e. the last character will be -4 position
                                        else {i<-i}
                                          
                                        })


SYN$target_id <- as.character(SYN$target_id)
SYN[1:2,]

Unnamed: 0_level_0,Syntelog,geneID,target_id
Unnamed: 0_level_1,<chr>,<chr>,<chr>
1,Synt_1,Soltu.DM.01G000020.1,Soltu.DM.01G000020
2,Synt_1,Soltu.Atl_v3.01_4G000020.2,Soltu.Atl_v3.01_4G000020


In [8]:
SYN_a <- SYN %>% select(Syntelog, geneID) %>% 
        filter(str_detect(geneID, 'Atl')) %>% 
        group_by(Syntelog)%>% 
        summarise(ATL_alleles = n()) %>%
                ungroup() %>% 
        right_join(SYN %>% filter(str_detect(geneID, 'Atl')), by = 'Syntelog')
        
        
SYN_a[1:2,]

Syntelog,ATL_alleles,geneID,target_id
<chr>,<int>,<chr>,<chr>
Synt_1,4,Soltu.Atl_v3.01_4G000020.2,Soltu.Atl_v3.01_4G000020
Synt_1,4,Soltu.Atl_v3.01_1G000020.2,Soltu.Atl_v3.01_1G000020


In [9]:
ExprGenes <- ExprGenes %>% left_join(SYN_a %>% select(target_id, ATL_alleles, Syntelog), by = 'target_id')
ExprGenes[1:2,]

Unnamed: 0_level_0,geneID,target_id,Tissue,Condition,Average,SD,CV,ATL_alleles,Syntelog
Unnamed: 0_level_1,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<chr>
1,Soltu.Atl_v3.01_0G000010.1,Soltu.Atl_v3.01_0G000010,Leaf,LD,0.7458996,0.4044651,0.8060586,,
2,Soltu.Atl_v3.01_0G000010.1,Soltu.Atl_v3.01_0G000010,Leaf,SD,1.7492942,0.9515069,0.5537575,,


In [10]:
SynPairs <- ExprGenes %>% select(Syntelog, geneID) %>% filter(!is.na(Syntelog) )%>%
                unique()%>% #remove gene duplicates of the different conditions
                group_by( Syntelog) %>% 
                 mutate(Alleles = n()) %>% 
                ungroup () %>% group_by(Syntelog,Alleles) %>% #group by syntelog to make calculations within allelic groups
                  filter(n() > 1)%>% # select syntelogs with more than 1 gene
               do(data.frame(t(combn(.$geneID, 2)))) %>%
              rename(Allele1 = X1, Allele2 = X2) %>% unique() %>%
            ungroup() 

In [11]:
head(SynPairs)

Syntelog,Alleles,Allele1,Allele2
<chr>,<int>,<chr>,<chr>
Synt_1,4,Soltu.Atl_v3.01_1G000020.2,Soltu.Atl_v3.01_2G000020.1
Synt_1,4,Soltu.Atl_v3.01_1G000020.2,Soltu.Atl_v3.01_3G000040.1
Synt_1,4,Soltu.Atl_v3.01_1G000020.2,Soltu.Atl_v3.01_4G000020.2
Synt_1,4,Soltu.Atl_v3.01_2G000020.1,Soltu.Atl_v3.01_3G000040.1
Synt_1,4,Soltu.Atl_v3.01_2G000020.1,Soltu.Atl_v3.01_4G000020.2
Synt_1,4,Soltu.Atl_v3.01_3G000040.1,Soltu.Atl_v3.01_4G000020.2


In [12]:
export(SynPairs,
       file = './Results/ExpressedAllelicPairs.csv')