This notebook contains the instructions for reproducing results presented in  "*Environmental and genealogical signals on DNA methylation in a widespread apomictic dandelion lineage*" by V.N. Ibañez, M. van Antro, C. Peña Ponton, S. Ivanovic, C.A.M. Wagemaker, F. Gawehns, K.J.F. Verhoeven.

## Load data and set R environment

In this section, we will load the dataset to run the script, configure the working directory and environment.

In [None]:
#@title Load files
%load_ext rpy2.ipython
!rm -r *
!mkdir results rawData annotation scripts plots tmp
!wget -c -O scripts/commonFunctions.R https://raw.githubusercontent.com/VeronicaNoe/epiTree/main/Rscripts/commonFunctions.R
!wget -c -O rawData/SSR_data.csv https://raw.githubusercontent.com/VeronicaNoe/epiTree/main/data4r/SSR_data.csv
!wget -c -O rawData/00_DMC_table.csv https://raw.githubusercontent.com/VeronicaNoe/epiTree/main/data4r/00_DMC_table.csv

!wget -c -O rawData/AseI-NsiI_Design_withPlotInfos.txt https://raw.githubusercontent.com/VeronicaNoe/epiTree/main/data4r/AseI-NsiI_Design_withPlotInfos.txt
!wget -c -O rawData/Csp6I-NsiI_Design_withPlotInfos.txt https://raw.githubusercontent.com/VeronicaNoe/epiTree/main/data4r/Csp6I-NsiI_Design_withPlotInfos.txt

!wget -c -O rawData/AseI-NsiI_methylation.filtered https://raw.githubusercontent.com/VeronicaNoe/epiTree/main/data4r/AseI-NsiI_petite.methylation.filtered
!wget -c -O rawData/Csp6I-NsiI_methylation.filtered https://raw.githubusercontent.com/VeronicaNoe/epiTree/main/data4r/Csp6I-NsiI_petite.methylation.filtered

!wget -c -O annotation/Csp6I-NsiI_mergedAnnot.csv https://raw.githubusercontent.com/VeronicaNoe/epiTree/main/data4r/Csp6I-NsiI_mergedAnnot.csv
!wget -c -O annotation/AseI-NsiI_mergedAnnot.csv https://raw.githubusercontent.com/VeronicaNoe/epiTree/main/data4r/AseI-NsiI_mergedAnnot.csv


The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython
--2022-10-18 18:42:49--  https://raw.githubusercontent.com/VeronicaNoe/epiTree/main/Rscripts/commonFunctions.R
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 18802 (18K) [text/plain]
Saving to: ‘scripts/commonFunctions.R’


2022-10-18 18:42:49 (3.39 MB/s) - ‘scripts/commonFunctions.R’ saved [18802/18802]

--2022-10-18 18:42:50--  https://raw.githubusercontent.com/VeronicaNoe/epiTree/main/data4r/SSR_data.csv
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response...

In [None]:
%%R
#@title Set R environment
rm(list=ls())
wd<-getwd()
baseDir <- gsub("/results", "", wd)
scriptDir <- file.path(baseDir, "scripts")

In [None]:
%%R
#@title Install R packages
install.packages(c("data.table","adegenet","poppr","vegan","tidyr", "dplyr"),quiet=TRUE)

In [None]:
%%R
#@title Load packages silently
## load packages silently
suppressPackageStartupMessages({
  library(data.table) # file reading
  library(adegenet)
  library(poppr)
  library(vegan)
  library(tidyr)
  library(dplyr)
  source(file.path(scriptDir, "commonFunctions.R"), local=TRUE)
})

# Analyzing data step-by-step

In this section, we will explore chunk of code to filter the one dataset: *AseI-NsiI*


## Load and explore data

In [None]:
#@title 
%%R
# for genetic data
infileSSRdata <- file.path(paste0(baseDir,"/rawData/","SSR_data.csv"))
# for epigenetic data
RE<-c("AseI-NsiI")
designTable <- file.path(paste0(baseDir, "/rawData/",RE[1], "_Design_withPlotInfos.txt"))
infileName <- file.path(paste0(baseDir,"/rawData/",RE[1],"_methylation.filtered"))
annotationFile <- file.path(paste0(baseDir, "/annotation/",RE[1], "_mergedAnnot.csv"))


In [None]:
#@title Load genetic data
%%R
data<-read.csv(infileSSRdata, header=TRUE, stringsAsFactors = FALSE, row.names = NULL, sep="\t")
str(data)
rownames(data)<-data$Pop
ind<-as.character(data[,1])
site<-as.character(data[,2])
data[,1:3]<-NULL
data_gen<-df2genind(data, ploidy = 3, ind.names = ind, pop=site, sep="_")


'data.frame':	17 obs. of  17 variables:
 $ sample  : chr  "sample_1_AseI" "sample_2_AseI" "sample_3_AseI" "sample_4_AseI" ...
 $ genotype: chr  "DE_1a" "Fl_1" "Fl_2a" "Fl_2b" ...
 $ Pop     : chr  "DE_1a" "Fl_1" "Fl_2a" "Fl_2b" ...
 $ M58     : int  125 125 125 125 125 125 125 125 125 125 ...
 $ M44     : int  186 185 185 186 186 185 185 185 185 185 ...
 $ M31     : int  238 238 238 238 238 238 238 238 238 238 ...
 $ M78_1   : int  164 164 164 164 164 164 164 164 164 164 ...
 $ M78_2   : int  173 173 172 172 172 172 172 172 172 172 ...
 $ M61_1   : int  136 136 136 136 136 136 136 135 133 127 ...
 $ M61_2   : int  138 138 138 138 138 138 138 138 136 136 ...
 $ M67_1   : int  203 203 203 203 203 203 203 203 203 203 ...
 $ M67_2   : int  222 222 222 222 222 222 222 222 222 222 ...
 $ M72_1   : int  175 175 175 175 175 175 175 175 175 175 ...
 $ M72_2   : int  209 209 209 209 209 209 201 206 209 209 ...
 $ M143_1  : int  238 238 238 238 238 238 238 238 238 238 ...
 $ M143_2  : int  240 24

In [None]:
#@title Plot genetic dendrogram
%%R
# plot
pdf(paste0(baseDir,"/plots/SSR_NeiDistance_UPGMA.pdf"))
data_gen %>% 
  genind2genpop() %>%
  aboot(cutoff = 60, quiet = TRUE, sample = 1000, distance = nei.dist)
dev.off()


 Converting data from a genind to a genpop object... 

...done.

png 
  2 


In [None]:
#@title Load epigenetic data
%%R

sampleTab <- f.read.sampleTable(designTable) # see commonFunctions.R
myData <- f.load.methylation.bed(infileName) # see commonFunctions.R
myData<-unite(myData, chrPos, c(chr, pos), sep="_", remove=FALSE)
sort(myData$chrPos)
rownames(myData)<-myData$chrPos

===  2022 Oct 18 06:43:25 PM === Removing 0 samples due to the sampleRemovalInfo column 


In [None]:
#@title Load DMC information
%%R
temp<-read.csv("rawData/00_DMC_table.csv", header=TRUE, sep="\t")
temp<-unite(temp, chrPos, c(chr, pos), sep="_", remove=FALSE)
toKeepRE<-RE[1]
temp<-dplyr::filter(temp, RE==toKeepRE)
temp<-dplyr::filter(temp, factor=="Acc")
temp<-dplyr::filter(temp, pvals<=0.05)
sort(temp$chrPos)
rownames(temp)<-temp$chrPos

In [None]:
#@title Dendrogramms for each context, analysis and annotation
%%R

context <- c("CG", "CHH", "CHG")
whichAnalysis<-c("all", "DMC")
for (j in 1:length(context)){
  if (context[j] != "all") {
      contextFilter <- context[j]
      myD <- subset(myData, context == contextFilter)
  } else if (context[j] == "all"){
      myD<-myData
  }
# subset meth data that has DMC
  for (a in 1:length(whichAnalysis)){
    if(whichAnalysis[a]=="all"){
      mD<-myD
      feature <- c("all","gene", "transposon", "repeat", "nothing")
    } else {
      keep<-intersect(sort(rownames(temp)),sort(rownames(myD)))
      mD<-myD[keep,]
      feature<-c("all",unique(temp$feature))
    }
    ## a plot for each feature
    for (i in 1:length(feature)){
      subAnno <- f.load.merged.annotation(annotationFile, feature[i])
      toKeep <- gsub("chr", "", rownames(subAnno))
      commonChr <- sort(intersect(toKeep,as.character(mD$chr)))
      df <- mD[as.character(mD$chr) %in% as.character(commonChr),]
      if(dim(df)[1]<=4){
        next
      }else{
        ##dendrogram
        totalCols <- grep("_total$", colnames(df), value = TRUE)
        methCols <- grep("_methylated$", colnames(df), value = TRUE)
        totCov <- df[,totalCols]
        methCov <- df[,methCols]
        colnames(totCov) <- gsub("_total$", "", colnames(totCov))
        colnames(methCov) <- gsub("_methylated$", "", colnames(methCov))
        mePerc<-methCov/totCov
        datos<-t(mePerc) 
        datos<-datos[order(rownames(datos)),]
        sampleNames<-sampleTab[order(rownames(sampleTab)),]
        sampleNames$Sample_name<-rownames(sampleNames)
        sampleNames<-unite(sampleNames, roName, Treat,Acc, sep=":", remove=TRUE)
        rownames(datos)<-sampleNames$roName
        onlyControl<-grep("Control:", rownames(datos))
        datos<-datos[onlyControl,]
        dis <- vegdist(datos, na.rm=TRUE, "euclid")
        dist<-vegdist(decostand(dis, "norm"), "euclidean", na.rm=TRUE)
        clus<-hclust(dist, "average")
        dirOut<-paste0(baseDir,"/plots/",RE[1],"_all" ,"_Context_methylationLevelsPerFeature.pdf")
        pdf(paste0(baseDir,"/plots/",RE[1],"_","Dendrogram_","allSamples_",context[j],"_",whichAnalysis[a],"_", feature[i], ".pdf"))
        plot(clus, cex = 0.6, hang=-0.5,main=paste0("Dendrogram_",RE[1],"_",context[j],"_",whichAnalysis[a],"_", feature[i]))
        dev.off()
      }
    }
  }
}



# Make dendrograms for both epigenetic dataset in each context and genomic feature

In this section, the code will run the previous steps for both eigenetic data sets: *AseI-NsiI* and *Csp6I-NsiI*

In [None]:
#@title
%%R
## process both data epigenetic data set

RE<-c("AseI-NsiI", "Csp6I-NsiI")
for (r in 1:length(RE)){
  designTable <- file.path(paste0(baseDir, "/rawData/",RE[r], "_Design_withPlotInfos.txt"))
  infileName <- file.path(paste0(baseDir,"/rawData/",RE[r],"_methylation.filtered"))
  annotationFile <- file.path(paste0(baseDir, "/annotation/",RE[r], "_mergedAnnot.csv"))
  
  sampleTab <- f.read.sampleTable(designTable) # see commonFunctions.R
  myData <- f.load.methylation.bed(infileName) # see commonFunctions.R
  myData<-unite(myData, chrPos, c(chr, pos), sep="_", remove=FALSE)
  sort(myData$chrPos)
  rownames(myData)<-myData$chrPos
  
  temp<-read.csv("rawData/00_DMC_table.csv", header=TRUE, sep="\t")
  temp<-unite(temp, chrPos, c(chr, pos), sep="_", remove=FALSE)
  toKeepRE<-RE[r]
  temp<-dplyr::filter(temp, RE==toKeepRE)
  temp<-dplyr::filter(temp, factor=="Acc")
  temp<-dplyr::filter(temp, pvals<=0.05)
  sort(temp$chrPos)
  rownames(temp)<-temp$chrPos
  
  context <- c("CG", "CHH", "CHG")
  whichAnalysis<-c("all", "DMC")
  for (j in 1:length(context)){
    if (context[j] != "all") {
      contextFilter <- context[j]
      myD <- subset(myData, context == contextFilter)
    } else if (context[j] == "all"){
      myD<-myData
    }
    # subset meth data that has DMC
    for (a in 1:length(whichAnalysis)){
      if(whichAnalysis[a]=="all"){
        mD<-myD
        feature <- c("all","gene", "transposon", "repeat", "nothing")
      } else {
        keep<-intersect(sort(rownames(temp)),sort(rownames(myD)))
        mD<-myD[keep,]
        feature<-c("all",unique(temp$feature))
      }
      ## a plot for each feature
      for (i in 1:length(feature)){
        subAnno <- f.load.merged.annotation(annotationFile, feature[i])
        toKeep <- gsub("chr", "", rownames(subAnno))
        commonChr <- sort(intersect(toKeep,as.character(mD$chr)))
        df <- mD[as.character(mD$chr) %in% as.character(commonChr),]
        if(dim(df)[1]<=4){
          next
        }else{
          ##dendrogram
          totalCols <- grep("_total$", colnames(df), value = TRUE)
          methCols <- grep("_methylated$", colnames(df), value = TRUE)
          totCov <- df[,totalCols]
          methCov <- df[,methCols]
          colnames(totCov) <- gsub("_total$", "", colnames(totCov))
          colnames(methCov) <- gsub("_methylated$", "", colnames(methCov))
          mePerc<-methCov/totCov
          datos<-t(mePerc) 
          datos<-datos[order(rownames(datos)),]
          sampleNames<-sampleTab[order(rownames(sampleTab)),]
          sampleNames$Sample_name<-rownames(sampleNames)
          sampleNames<-unite(sampleNames, roName, Treat,Acc, sep=":", remove=TRUE)
          rownames(datos)<-sampleNames$roName
          onlyControl<-grep("Control:", rownames(datos))
          datos<-datos[onlyControl,]
          dis <- vegdist(datos, na.rm=TRUE, "euclid")
          dist<-vegdist(decostand(dis, "norm"), "euclidean", na.rm=TRUE)
          clus<-hclust(dist, "average")
          dirOut<-paste0(baseDir,"/plots/",RE[r],"_all" ,"_Context_methylationLevelsPerFeature.pdf")
          pdf(paste0(baseDir,"/plots/",RE[r],"_","Dendrogram_","allSamples_",context[j],"_",whichAnalysis[a],"_", feature[i], "_2.pdf"))
          plot(clus, cex = 0.6, hang=-0.5,main=paste0("Dendrogram_",RE[r],"_",context[j],"_",whichAnalysis[a],"_", feature[i]))
          dev.off()
        }
      }
    }
  }
}
