In [2]:
quiet_library <- function(...) {
    suppressPackageStartupMessages(library(...))
}
quiet_library('tidyverse')
quiet_library("hise")
quiet_library('ArchR')
quiet_library('parallel')
addArchRGenome("hg38")
addArchRThreads(threads = 60)
options(future.globals.maxSize = 1000 * 1024^5)
library(arrow)
library(JASPAR2020)
library(TFBSTools )


                                                   / |
                                                 /    \
            .                                  /      |.
            \\\                              /        |.
              \\\                          /           `|.
                \\\                      /              |.
                  \                    /                |\
                  \\#####\           /                  ||
                ==###########>      /                   ||
                 \\##==......\    /                     ||
            ______ =       =|__ /__                     ||      \\\
       \               '        ##_______ _____ ,--,__,=##,__   ///
        ,    __==    ___,-,__,--'#'  ==='      `-'    | ##,-/
        -,____,---'       \\####\\________________,--\\_##,/
           ___      .______        ______  __    __  .______      
          /   \     |   _  \      /      ||  |  |  | |   _  \     
         /  ^  \    |  |_) 

In [3]:
tair.motif=getMatrixSet(x=JASPAR2020,opts=list(species="9606",matrixtype="PWM"))
jaspar2020_df <- data.frame(Motif_ID = character(), Motif_Name = character(), stringsAsFactors = FALSE)

for (i in seq_along(tair.motif@listData)) {
    Motif <- tair.motif@listData[[i]]
    Motif_ID <- Motif@ID
    Motif_Name <- Motif@name
    jaspar2020_df <- rbind(jaspar2020_df, data.frame(Motif_ID = Motif_ID, Motif_Name = Motif_Name, stringsAsFactors = FALSE))
}


In [70]:
remotes::install_version("Matrix", version = "1.6.5")


Downloading package from url: https://cran.r-project.org/src/contrib/Matrix_1.6-5.tar.gz

Installing package into ‘/home/jupyter/libb’
(as ‘lib’ is unspecified)



# L2-cisbp

In [36]:
proj <- loadArchRProject(path = 'PenSen_ATAC_L2_cisbp/')

Successfully loaded ArchRProject!


                                                   / |
                                                 /    \
            .                                  /      |.
            \\\                              /        |.
              \\\                          /           `|.
                \\\                      /              |.
                  \                    /                |\
                  \\#####\           /                  ||
                ==###########>      /                   ||
                 \\##==......\    /                     ||
            ______ =       =|__ /__                     ||      \\\
       \               '        ##_______ _____ ,--,__,=##,__   ///
        ,    __==    ___,-,__,--'#'  ==='      `-'    | ##,-/
        -,____,---'       \\####\\________________,--\\_##,/
           ___      .______        ______  __    __  .______      
          /   \     |   _  \      /      ||  |  |  | |   _ 

In [37]:
MotifMatrix<-getMatrixFromProject(
  ArchRProj = proj,
  useMatrix = "MotifMatrix",
  useSeqnames = NULL,
  verbose = TRUE,
  binarize = FALSE,
  threads = getArchRThreads(),
  logFile = createLogFile("getMatrixFromProject")
)

ArchR logging to : ArchRLogs/ArchR-getMatrixFromProject-545d655fbdde-Date-2024-04-15_Time-22-41-11.92719.log
If there is an issue, please report to github with logFile!

2024-04-15 22:41:49.422207 : Organizing colData, 0.625 mins elapsed.

2024-04-15 22:41:50.650551 : Organizing rowData, 0.645 mins elapsed.

2024-04-15 22:41:50.659931 : Organizing rowRanges, 0.646 mins elapsed.

2024-04-15 22:41:50.6703 : Organizing Assays (1 of 2), 0.646 mins elapsed.

2024-04-15 22:42:27.49247 : Organizing Assays (2 of 2), 1.259 mins elapsed.

2024-04-15 22:43:00.759216 : Constructing SummarizedExperiment, 1.814 mins elapsed.

2024-04-15 22:43:03.035964 : Finished Matrix Creation, 1.852 mins elapsed.



In [38]:
proj<- addImputeWeights(proj)


ArchR logging to : ArchRLogs/ArchR-addImputeWeights-545d46febff6-Date-2024-04-15_Time-22-43-03.215787.log
If there is an issue, please report to github with logFile!

2024-04-15 22:43:06.565283 : Computing Impute Weights Using Magic (Cell 2018), 0 mins elapsed.

Filtering 1 dims correlated > 0.75 to log10(depth + 1)



In [39]:
ChromVar_Z_Imputed<-imputeMatrix(
  mat = MotifMatrix@assays@data$z,
  imputeWeights =  getImputeWeights(proj),
  threads = 30,
  verbose = TRUE
)


Getting ImputeWeights

ArchR logging to : ArchRLogs/ArchR-imputeMatrix-545d110686d2-Date-2024-04-15_Time-22-46-41.970925.log
If there is an issue, please report to github with logFile!

2024-04-15 22:46:47.944725 : Imputing Matrix (1 of 2), 0 mins elapsed.

Using weights on disk



2024-04-15 22:48:33.791298 : Imputing Matrix (2 of 2), 1.764 mins elapsed.

Using weights on disk



2024-04-15 22:50:36.664639 : Finished Imputing Matrix, 3.812 mins elapsed.



In [40]:
df<-as.data.frame(ChromVar_Z_Imputed)
df$row_names <- rownames(df)  

In [41]:
write_parquet(df, "chromVar_Z_L2_Imputed_cisbp.parquet")

# L3-cisbp

In [44]:
proj <- loadArchRProject(path = 'PenSen_ATAC_L3_cisbp/')

Successfully loaded ArchRProject!


                                                   / |
                                                 /    \
            .                                  /      |.
            \\\                              /        |.
              \\\                          /           `|.
                \\\                      /              |.
                  \                    /                |\
                  \\#####\           /                  ||
                ==###########>      /                   ||
                 \\##==......\    /                     ||
            ______ =       =|__ /__                     ||      \\\
       \               '        ##_______ _____ ,--,__,=##,__   ///
        ,    __==    ___,-,__,--'#'  ==='      `-'    | ##,-/
        -,____,---'       \\####\\________________,--\\_##,/
           ___      .______        ______  __    __  .______      
          /   \     |   _  \      /      ||  |  |  | |   _ 

In [45]:
MotifMatrix<-getMatrixFromProject(
  ArchRProj = proj,
  useMatrix = "MotifMatrix",
  useSeqnames = NULL,
  verbose = TRUE,
  binarize = FALSE,
  threads = getArchRThreads(),
  logFile = createLogFile("getMatrixFromProject")
)

ArchR logging to : ArchRLogs/ArchR-getMatrixFromProject-545d7df610b8-Date-2024-04-16_Time-03-32-07.047392.log
If there is an issue, please report to github with logFile!

2024-04-16 03:33:01.216813 : Organizing colData, 0.904 mins elapsed.

2024-04-16 03:33:02.34095 : Organizing rowData, 0.923 mins elapsed.

2024-04-16 03:33:02.349033 : Organizing rowRanges, 0.923 mins elapsed.

2024-04-16 03:33:02.356941 : Organizing Assays (1 of 2), 0.923 mins elapsed.

2024-04-16 03:33:44.890356 : Organizing Assays (2 of 2), 1.632 mins elapsed.

2024-04-16 03:34:18.727838 : Constructing SummarizedExperiment, 2.196 mins elapsed.

2024-04-16 03:34:20.886064 : Finished Matrix Creation, 2.232 mins elapsed.



In [46]:
proj<- addImputeWeights(proj)


ArchR logging to : ArchRLogs/ArchR-addImputeWeights-545d2b00bb82-Date-2024-04-16_Time-03-34-20.901433.log
If there is an issue, please report to github with logFile!

2024-04-16 03:34:20.956662 : Computing Impute Weights Using Magic (Cell 2018), 0 mins elapsed.

Filtering 1 dims correlated > 0.75 to log10(depth + 1)



In [47]:
ChromVar_Z_Imputed<-imputeMatrix(
  mat = MotifMatrix@assays@data$z,
  imputeWeights =  getImputeWeights(proj),
  threads = 30,
  verbose = TRUE
)


Getting ImputeWeights

ArchR logging to : ArchRLogs/ArchR-imputeMatrix-545d49e9f280-Date-2024-04-16_Time-03-37-59.03934.log
If there is an issue, please report to github with logFile!

2024-04-16 03:38:05.366022 : Imputing Matrix (1 of 2), 0 mins elapsed.

Using weights on disk



2024-04-16 03:39:49.89707 : Imputing Matrix (2 of 2), 1.742 mins elapsed.

Using weights on disk



2024-04-16 03:41:54.920765 : Finished Imputing Matrix, 3.826 mins elapsed.



In [48]:
df<-as.data.frame(ChromVar_Z_Imputed)
df$row_names <- rownames(df) 

In [49]:
write_parquet(df, "chromVar_Z_L3_Imputed_cisbp.parquet")

# L2 -Jasper2020

In [27]:
proj <- loadArchRProject(path = 'PenSen_ATAC_L2/')

Successfully loaded ArchRProject!


                                                   / |
                                                 /    \
            .                                  /      |.
            \\\                              /        |.
              \\\                          /           `|.
                \\\                      /              |.
                  \                    /                |\
                  \\#####\           /                  ||
                ==###########>      /                   ||
                 \\##==......\    /                     ||
            ______ =       =|__ /__                     ||      \\\
       \               '        ##_______ _____ ,--,__,=##,__   ///
        ,    __==    ___,-,__,--'#'  ==='      `-'    | ##,-/
        -,____,---'       \\####\\________________,--\\_##,/
           ___      .______        ______  __    __  .______      
          /   \     |   _  \      /      ||  |  |  | |   _ 

In [28]:
getAvailableMatrices(ArchRProj = proj)


In [29]:
MotifMatrix<-getMatrixFromProject(
  ArchRProj = proj,
  useMatrix = "MotifMatrix",
  useSeqnames = NULL,
  verbose = TRUE,
  binarize = FALSE,
  threads = getArchRThreads(),
  logFile = createLogFile("getMatrixFromProject")
)

ArchR logging to : ArchRLogs/ArchR-getMatrixFromProject-6d6719f7814d-Date-2024-04-16_Time-15-09-21.809103.log
If there is an issue, please report to github with logFile!

2024-04-16 15:10:04.748548 : Organizing colData, 0.716 mins elapsed.

2024-04-16 15:10:05.83112 : Organizing rowData, 0.734 mins elapsed.

2024-04-16 15:10:05.83961 : Organizing rowRanges, 0.734 mins elapsed.

2024-04-16 15:10:05.847299 : Organizing Assays (1 of 2), 0.734 mins elapsed.

2024-04-16 15:10:19.322411 : Organizing Assays (2 of 2), 0.959 mins elapsed.

2024-04-16 15:10:34.456259 : Constructing SummarizedExperiment, 1.211 mins elapsed.

2024-04-16 15:10:38.799389 : Finished Matrix Creation, 1.283 mins elapsed.



In [30]:
proj<- addImputeWeights(proj)


ArchR logging to : ArchRLogs/ArchR-addImputeWeights-6d674d7ab6ad-Date-2024-04-16_Time-15-10-38.814492.log
If there is an issue, please report to github with logFile!

2024-04-16 15:10:38.844163 : Computing Impute Weights Using Magic (Cell 2018), 0 mins elapsed.

Filtering 1 dims correlated > 0.75 to log10(depth + 1)



In [31]:
ChromVar_Z_Imputed<-imputeMatrix(
  mat = MotifMatrix@assays@data$z,
  imputeWeights =  getImputeWeights(proj),
  threads = 30,
  verbose = TRUE
)


Getting ImputeWeights

ArchR logging to : ArchRLogs/ArchR-imputeMatrix-6d6717a06c98-Date-2024-04-16_Time-15-14-09.818267.log
If there is an issue, please report to github with logFile!

2024-04-16 15:14:14.231573 : Imputing Matrix (1 of 2), 0 mins elapsed.

Using weights on disk



2024-04-16 15:15:40.665597 : Imputing Matrix (2 of 2), 1.441 mins elapsed.

Using weights on disk



2024-04-16 15:17:21.529102 : Finished Imputing Matrix, 3.122 mins elapsed.



In [33]:
df<-as.data.frame(as.matrix(ChromVar_Z_Imputed))
df$row_names <- rownames(df)  

In [34]:
rownames(jaspar2020_df)<-jaspar2020_df$Motif_ID
jaspar2020_df<-jaspar2020_df[rownames(df),]
rownames(df)<-jaspar2020_df$Motif_Name
df$row_names <- rownames(df)  


In [35]:
write_parquet(df, "chromVar_Z_L2_Imputed_Jasper2020.parquet")

# L3-Jasper2020

In [4]:
proj <- loadArchRProject(path = 'PenSen_ATAC_L3/')

Successfully loaded ArchRProject!


                                                   / |
                                                 /    \
            .                                  /      |.
            \\\                              /        |.
              \\\                          /           `|.
                \\\                      /              |.
                  \                    /                |\
                  \\#####\           /                  ||
                ==###########>      /                   ||
                 \\##==......\    /                     ||
            ______ =       =|__ /__                     ||      \\\
       \               '        ##_______ _____ ,--,__,=##,__   ///
        ,    __==    ___,-,__,--'#'  ==='      `-'    | ##,-/
        -,____,---'       \\####\\________________,--\\_##,/
           ___      .______        ______  __    __  .______      
          /   \     |   _  \      /      ||  |  |  | |   _ 

In [5]:
MotifMatrix<-getMatrixFromProject(
  ArchRProj = proj,
  useMatrix = "MotifMatrix",
  useSeqnames = NULL,
  verbose = TRUE,
  binarize = FALSE,
  threads = getArchRThreads(),
  logFile = createLogFile("getMatrixFromProject")
)

ArchR logging to : ArchRLogs/ArchR-getMatrixFromProject-6d675dfc0bb8-Date-2024-04-16_Time-04-05-45.314295.log
If there is an issue, please report to github with logFile!

2024-04-16 04:06:22.383507 : Organizing colData, 0.618 mins elapsed.

2024-04-16 04:06:23.246806 : Organizing rowData, 0.632 mins elapsed.

2024-04-16 04:06:23.253589 : Organizing rowRanges, 0.632 mins elapsed.

2024-04-16 04:06:23.261598 : Organizing Assays (1 of 2), 0.632 mins elapsed.

2024-04-16 04:06:42.706964 : Organizing Assays (2 of 2), 0.957 mins elapsed.

2024-04-16 04:06:59.22638 : Constructing SummarizedExperiment, 1.232 mins elapsed.

2024-04-16 04:07:00.860256 : Finished Matrix Creation, 1.259 mins elapsed.



In [6]:
proj<- addImputeWeights(proj)


ArchR logging to : ArchRLogs/ArchR-addImputeWeights-6d6753fa05f2-Date-2024-04-16_Time-04-07-00.876405.log
If there is an issue, please report to github with logFile!

2024-04-16 04:07:00.906249 : Computing Impute Weights Using Magic (Cell 2018), 0 mins elapsed.

Filtering 1 dims correlated > 0.75 to log10(depth + 1)



In [7]:
ChromVar_Z_Imputed<-imputeMatrix(
  mat = MotifMatrix@assays@data$z,
  imputeWeights =  getImputeWeights(proj),
  threads = 30,
  verbose = TRUE
)


Getting ImputeWeights

ArchR logging to : ArchRLogs/ArchR-imputeMatrix-6d6711f1887a-Date-2024-04-16_Time-04-10-45.425955.log
If there is an issue, please report to github with logFile!

2024-04-16 04:10:49.98326 : Imputing Matrix (1 of 2), 0 mins elapsed.

Using weights on disk



2024-04-16 04:11:48.469287 : Imputing Matrix (2 of 2), 0.975 mins elapsed.

Using weights on disk



2024-04-16 04:12:57.295209 : Finished Imputing Matrix, 2.122 mins elapsed.



In [11]:
df<-as.data.frame(as.data.frame(as.matrix(ChromVar_Z_Imputed)))
df$row_names <- rownames(df)  

In [16]:
rownames(jaspar2020_df)<-jaspar2020_df$Motif_ID
jaspar2020_df<-jaspar2020_df[rownames(df),]

In [17]:
rownames(df)<-jaspar2020_df$Motif_Name
df$row_names <- rownames(df)  

In [18]:
write_parquet(df, "chromVar_Z_L3_Imputed_Jasper2020.parquet")