# Frequently Asked Questions

This notebook has tips on how to access specific tables or queries from the generated tables. If you have any other question, please open an issue. 

## Loading the data
Unless stated otherwise, the following function loads all the required data. The folder ```TablesForExploration``` can be found [here](https://opendata.earlham.ac.uk/wheat/under_license/toronto/Ramirez-Gonzalez_etal_2018-06025-Transcriptome-Landscape/data/TablesForExploration/).

In [6]:
options(gsubfn.engine = "R")
library(ggplot2)
library(reshape2)
library(sqldf)
library(fields)
library(gridExtra)
library(ggtern)
library(clue)
library(geometry)
library(gtable)
#library(goseq)
library(plyr)
library(scales)

loadGeneInformation<-function(dir="../../TablesForExploration", 
                              motifs=T, 
                              WGCNA=F, 
                              meanTpms=T
                             ){
    
    path<-paste0(dir,"/CanonicalTranscript.rds")
    canonicalTranscripts<-readRDS(path)
    canonicalTranscripts$intron_length<- canonicalTranscripts$mrna_length -  canonicalTranscripts$exon_length
    canonicalTranscripts$chr_group <- substr(canonicalTranscripts$Chr,4,4)
    canonicalTranscripts$genome    <- substr(canonicalTranscripts$Chr,5,5)
    expressed_genes <- canonicalTranscripts$Gene
    
    if(meanTpms == T){
        path<-paste0(dir, "/MeanTpms.rds")
        meanTpms <- readRDS(path)
        expressed_genes<-unique(meanTpms$gene)
    }
    
    canonicalTranscripts<-canonicalTranscripts[canonicalTranscripts$Gene %in% expressed_genes, ]
    
    canonicalTranscripts$scaled_5per_position <-   5 * ceiling(canonicalTranscripts$scaled_1per_position / 5)
    canonicalTranscripts$scaled_5per_position <- ifelse(canonicalTranscripts$scaled_5per_position == 0, 
        5, 
        canonicalTranscripts$scaled_5per_position)

    path<-paste0(dir, "/region_partition.csv")
    partition<-read.csv(path, row.names=1)
    
    partition_percentages<-round(100*partition/partition$Length)
    partition_percentages$Chr <- rownames(partition_percentages)
    partition$Chr <- rownames(partition)
    ct<-canonicalTranscripts
    ct_with_partition<-sqldf('SELECT ct.*, CASE 
WHEN scaled_1per_position < R1_R2a THEN "R1"
WHEN scaled_1per_position < R2a_C  THEN "R2A"
WHEN scaled_1per_position < C_R2b  THEN "C"
WHEN scaled_1per_position < R2b_R3  THEN "R2B"
ELSE "R3" END as partition
    
FROM ct LEFT JOIN partition_percentages ON ct.chr = partition_percentages.chr   ')

    x<-  as.factor(ct_with_partition$partition)
    x <- factor(x,levels(x)[c(2,3,1,4,5)])
    ct_with_partition$partition <- x 
    canonicalTranscripts<-ct_with_partition

    path<-paste0(dir,"/TriadMovement.rds")
    triadMovement<-readRDS(path)
    
    path<-paste0(dir,"/Triads.rds")
    triads<-readRDS(path)
    
    path<-paste0(dir,"/universe_table.csv")
    gene_universe<-read.csv(path)
    
    path<-paste0(dir, "/OntologiesForGenes.rds")
    ontologies<-readRDS(path)
    
    path<-paste0(dir, "/id_names_merged.txt")
    id_names <- read.csv(path, header=F, sep = "\t")
    
    if(WGCNA == T){
        path<-paste0(dir, "/WGCNA_table.csv")
        WGCNA <-  read.csv(path)    
    }
    
    path<-paste0(dir, "/ObservedGOTermsWithSlim.csv")
    go_slim<-read.csv(path, row.names=1)

    
    if(motifs == T){
        path<-paste0(dir, "/motifs.rds")
        motifs <- readRDS(path)
        motifs<-unique(motifs)
    }
   
    path<-paste0(dir, "/SegmentalTriads.csv")
    allTriads<-read.csv(path, stringsAsFactors=F)
    only_genes<-allTriads[,c("group_id","A", "B", "D")]
    allTriads<-melt(only_genes, id.vars<-c("group_id"),
        variable.name = "chr_group",
        value.name ="gene")
    
    list(canonicalTranscripts=canonicalTranscripts, 
       meanTpms=meanTpms,
       triads=triads, 
       triadMovement=triadMovement,
       gene_universe=gene_universe, 
       ontologies=ontologies,
       id_names=id_names,
       WGCNA=WGCNA,
       GOSlim=go_slim,
       partition=partition,
       motifs=motifs,
       allTriads=allTriads
       )
}


folder<-"./TablesForExploration"


geneInformation<-loadGeneInformation(dir=folder,motifs=F,meanTpms=T)



In [3]:
head(geneInformation$meanTpms)

value,factor,gene,samples,subset,min_mean_tpm
40.6321681,all,TraesCS1A01G000100,140,stress,0.5
1247.8227773,all,TraesCS1A01G000100LC,140,stress,0.5
357.859167,all,TraesCS1A01G000200,140,stress,0.5
0.1167718,all,TraesCS1A01G000200LC,140,stress,0.5
17.7845964,all,TraesCS1A01G000300,140,stress,0.5
0.3985324,all,TraesCS1A01G000300LC,140,stress,0.5
