Skip to content

Commit

Permalink
Merge 74d148a into f3b9c56
Browse files Browse the repository at this point in the history
  • Loading branch information
alex-l-kong committed Jul 19, 2022
2 parents f3b9c56 + 74d148a commit 6589247
Show file tree
Hide file tree
Showing 10 changed files with 772 additions and 209 deletions.
12 changes: 7 additions & 5 deletions ark/phenotyping/cell_consensus_cluster.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@
# - clustToMeta: path to file where the SOM cluster to meta cluster mapping will be written
# - seed: random factor

library(arrow)
library(data.table)
library(ConsensusClusterPlus)
suppressPackageStartupMessages({
library(arrow)
library(data.table)
library(ConsensusClusterPlus)
})

# get the command line arguments
args <- commandArgs(trailingOnly=TRUE)
Expand Down Expand Up @@ -64,7 +66,7 @@ names(som_to_meta_map) <- clusterAvgs$cell_som_cluster
print("Writing consensus clustering")
cellClusterData <- arrow::read_feather(cellMatPath)
cellClusterData$cell_meta_cluster <- som_to_meta_map[as.character(cellClusterData$cell_som_cluster)]
arrow::write_feather(as.data.table(cellClusterData), cellMatPath)
arrow::write_feather(as.data.table(cellClusterData), cellMatPath, compression='uncompressed')

# save the mapping from cell_som_cluster to cell_meta_cluster
print("Writing SOM to meta cluster mapping table")
Expand All @@ -73,4 +75,4 @@ som_to_meta_map <- as.data.table(som_to_meta_map)
# assign cell_som_cluster column, then rename som_to_meta_map to cell_meta_cluster
som_to_meta_map$cell_som_cluster <- as.integer(rownames(som_to_meta_map))
som_to_meta_map <- setnames(som_to_meta_map, "som_to_meta_map", "cell_meta_cluster")
arrow::write_feather(som_to_meta_map, clustToMeta)
arrow::write_feather(som_to_meta_map, clustToMeta, compression='uncompressed')
10 changes: 6 additions & 4 deletions ark/phenotyping/create_cell_som.R
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@
# - cellWeightsPath: path to the SOM weights file
# - seed: the random seed to use for training

library(arrow)
library(data.table)
library(FlowSOM)
suppressPackageStartupMessages({
library(arrow)
library(data.table)
library(FlowSOM)
})

# a helper function for computing 99.9%
percentile_99_9_helper <- function(x) {
Expand Down Expand Up @@ -80,4 +82,4 @@ somResults <- SOM(data=as.matrix(clusterCountsNormSub), xdim=xdim, ydim=ydim,

# write the weights to feather
print("Save trained weights")
arrow::write_feather(as.data.table(somResults$codes), cellWeightsPath)
arrow::write_feather(as.data.table(somResults$codes), cellWeightsPath, compression='uncompressed')
10 changes: 6 additions & 4 deletions ark/phenotyping/create_pixel_som.R
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,11 @@
# - pixelWeightsPath: path to the SOM weights file
# - seed: the random seed to use for training

library(arrow)
library(data.table)
library(FlowSOM)
suppressPackageStartupMessages({
library(arrow)
library(data.table)
library(FlowSOM)
})

# get the command line arguments
args <- commandArgs(trailingOnly=TRUE)
Expand Down Expand Up @@ -77,4 +79,4 @@ somResults <- SOM(data=as.matrix(pixelSubsetData), rlen=numPasses,

# write the weights to feather
print("Save trained weights")
arrow::write_feather(as.data.table(somResults$codes), pixelWeightsPath)
arrow::write_feather(as.data.table(somResults$codes), pixelWeightsPath, compression='uncompressed')
91 changes: 60 additions & 31 deletions ark/phenotyping/pixel_consensus_cluster.R
Original file line number Diff line number Diff line change
@@ -1,36 +1,25 @@
# Runs consensus clustering on the pixel data averaged across all channels

# Usage: Rscript {fovs} {markers} {maxK} {cap} {pixelMatDir} {clusterAvgPath} {clustToMeta} {seed}
# Usage: Rscript {fovs} {markers} {maxK} {cap} {pixelMatDir} {clusterAvgPath} {clustToMetaPath} {seed}

# - fovs: list of fovs to cluster
# - markers: list of channel columns to use
# - maxK: number of consensus clusters
# - cap: max z-score cutoff
# - pixelMatDir: path to the pixel data with SOM clusters
# - clusterAvgPath: path to the averaged cluster data
# - clustToMeta: path to file where the SOM cluster to meta cluster mapping will be written
# - clustToMetaPath: path to file where the SOM cluster to meta cluster mapping will be written
# - seed: random factor

library(arrow)
library(ConsensusClusterPlus)
library(data.table)
library(doParallel)
library(foreach)
library(parallel)

# helper function to map a FOV to its consensus labels
mapConsensusLabels <- function(fov, pixelMatDir, som_to_meta_map) {
# read in pixel data, we'll need the cluster column for mapping
fileName <- file.path(fov, "feather", fsep=".")
matPath <- file.path(pixelMatDir, fileName)
fovPixelData <- arrow::read_feather(matPath)

# assign hierarchical cluster labels
fovPixelData$pixel_meta_cluster <- som_to_meta_map[as.character(fovPixelData$pixel_som_cluster)]

# write data with consensus labels
arrow::write_feather(as.data.table(fovPixelData), matPath)
}
suppressPackageStartupMessages({
library(arrow)
library(ConsensusClusterPlus)
library(data.table)
library(doParallel)
library(foreach)
library(parallel)
library(stringi)
})

# get the number of cores
nCores <- parallel::detectCores() - 1
Expand All @@ -57,7 +46,7 @@ pixelMatDir <- args[5]
clusterAvgPath <- args[6]

# get the clust to meta write path
clustToMeta <- args[7]
clustToMetaPath <- args[7]

# retrieve the batch size to determine number of threads to run in parallel
batchSize <- strtoi(args[8])
Expand Down Expand Up @@ -91,7 +80,9 @@ fovsProcessed <- 0
print("Mapping pixel data to consensus cluster labels")
for (batchStart in seq(1, length(fovs), batchSize)) {
# define the parallel cluster for this batch of fovs
parallelCluster <- parallel::makeCluster(nCores, type="FORK")
# NOTE: to prevent the occassional hanging first FOV issue, we need to log to an outfile
# to "force" a return out of the foreach loop in this case
parallelCluster <- parallel::makeCluster(nCores, type="FORK", outfile='log.txt')

# register parallel cluster for dopar
doParallel::registerDoParallel(cl=parallelCluster)
Expand All @@ -100,11 +91,46 @@ for (batchStart in seq(1, length(fovs), batchSize)) {
batchEnd <- min(batchStart + batchSize - 1, length(fovs))

# run the multithreaded batch process for mapping to SOM labels and saving
foreach(
fovStatuses <- foreach(
i=batchStart:batchEnd,
.combine='c'
.combine=rbind
) %dopar% {
mapConsensusLabels(fovs[i], pixelMatDir, som_to_meta_map)
fileName <- paste0(fovs[i], '.feather')
matPath <- file.path(pixelMatDir, fileName)

status <- tryCatch(
{
fovPixelData <- arrow::read_feather(matPath)

# assign hierarchical cluster labels
fovPixelData$pixel_meta_cluster <- som_to_meta_map[as.character(fovPixelData$pixel_som_cluster)]

# write data with consensus labels
tempPath <- file.path(paste0(pixelMatDir, '_temp'), fileName)
arrow::write_feather(as.data.table(fovPixelData), tempPath, compression='uncompressed')

# this won't be displayed to the user but is used as a helper to break out
# in the rare first FOV hang issue
print(paste('Done writing fov', fovs[i]))
0
},
error=function(cond) {
# this won't be displayed to the user but is used as a helper to break out
# in the rare first FOV hang issue
print(paste('Error encountered for fov', fovs[i]))
1
}
)

data.frame(fov=fovs[i], status=status)
}

# report any erroneous feather files
for (i in 1:nrow(fovStatuses)) {
if (fovStatuses[i, 'status'] == 1) {
print(paste("The data for FOV", fovStatuses[i, 'fov'], "has been corrupted, removing"))
fovsProcessed <- fovsProcessed - 1
}
}

# unregister the parallel cluster
Expand All @@ -115,13 +141,16 @@ for (batchStart in seq(1, length(fovs), batchSize)) {

# inform user that batchSize fovs have been processed
print(paste("Processed", as.character(fovsProcessed), "fovs"))

# remove log.txt
unlink('log.txt')
}

# save the mapping from pixel_som_cluster to pixel_meta_cluster
print("Writing SOM to meta cluster mapping table")
som_to_meta_map <- as.data.table(som_to_meta_map)
som_to_meta_map_table <- as.data.table(som_to_meta_map)

# assign pixel_som_cluster column, then rename som_to_meta_map to pixel_meta_cluster
som_to_meta_map$pixel_som_cluster <- as.integer(rownames(som_to_meta_map))
som_to_meta_map <- setnames(som_to_meta_map, "som_to_meta_map", "pixel_meta_cluster")
arrow::write_feather(som_to_meta_map, clustToMeta)
som_to_meta_map_table$pixel_som_cluster <- as.integer(rownames(som_to_meta_map_table))
som_to_meta_map_table <- setnames(som_to_meta_map_table, "som_to_meta_map", "pixel_meta_cluster")
arrow::write_feather(som_to_meta_map_table, clustToMetaPath, compression='uncompressed')
10 changes: 6 additions & 4 deletions ark/phenotyping/run_cell_som.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,11 @@
# - cellWeightsPath: path to the SOM weights file
# - cellMatNormPath: the path to write the normalized pixel SOM/meta cluster count data (normalized) with cell SOM labelss. This will be used for consensus clustering.

library(arrow)
library(data.table)
library(FlowSOM)
suppressPackageStartupMessages({
library(arrow)
library(data.table)
library(FlowSOM)
})

# get the command line arguments
args <- commandArgs(trailingOnly=TRUE)
Expand Down Expand Up @@ -62,4 +64,4 @@ clusterCountsNorm$cell_som_cluster <- as.integer(clusters[,1])

# write to feather
print("Writing clustered data")
arrow::write_feather(as.data.table(clusterCountsNorm), cellMatPathNorm)
arrow::write_feather(as.data.table(clusterCountsNorm), cellMatPathNorm, compression='uncompressed')
97 changes: 61 additions & 36 deletions ark/phenotyping/run_pixel_som.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,36 +7,14 @@
# - normValsPath: path to the 99.9% normalization values file (created during preprocessing)
# - pixelWeightsPath: path to the SOM weights file

library(arrow)
library(data.table)
library(doParallel)
library(FlowSOM)
library(foreach)
library(parallel)

# helper function to map a FOV to its SOM labels
mapSOMLabels <- function(fov, somWeights, pixelMatDir) {
fileName <- paste0(fov, ".feather")
matPath <- file.path(pixelMatDir, fileName)
fovPixelData_all <- data.table(arrow::read_feather(matPath))

# 99.9% normalization
fovPixelData <- fovPixelData_all[,..markers]
fovPixelData <- fovPixelData[,Map(`/`,.SD,normVals)]

# map FlowSOM data
clusters <- FlowSOM:::MapDataToCodes(somWeights, as.matrix(fovPixelData))

# add back other columns
to_add <- colnames(fovPixelData_all)[!colnames(fovPixelData_all) %in% markers]
fovPixelData <- cbind(fovPixelData_all[,..to_add],fovPixelData)

# assign cluster labels column to pixel data
fovPixelData$pixel_som_cluster <- as.integer(clusters[,1])

# write to feather
arrow::write_feather(as.data.table(fovPixelData), matPath)
}
suppressPackageStartupMessages({
library(arrow)
library(data.table)
library(doParallel)
library(FlowSOM)
library(foreach)
library(parallel)
})

# get the number of cores
nCores <- parallel::detectCores() - 1
Expand Down Expand Up @@ -78,7 +56,9 @@ fovsProcessed <- 0
print("Mapping pixel data to SOM cluster labels")
for (batchStart in seq(1, length(fovs), batchSize)) {
# define the parallel cluster for this batch of fovs
parallelCluster <- parallel::makeCluster(nCores, type="FORK")
# NOTE: to prevent the occassional hanging first FOV issue, we need to log to an outfile
# to "force" a return out of the foreach loop in this case
parallelCluster <- parallel::makeCluster(nCores, type="FORK", outfile='log.txt')

# register parallel cluster for dopar
doParallel::registerDoParallel(cl=parallelCluster)
Expand All @@ -87,11 +67,55 @@ for (batchStart in seq(1, length(fovs), batchSize)) {
batchEnd <- min(batchStart + batchSize - 1, length(fovs))

# run the multithreaded batch process for mapping to SOM labels and saving
foreach(
fovStatuses <- foreach(
i=batchStart:batchEnd,
.combine='c'
.combine=rbind
) %dopar% {
mapSOMLabels(fovs[i], somWeights, pixelMatDir)
fileName <- paste0(fovs[i], ".feather")
matPath <- file.path(pixelMatDir, fileName)

status <- tryCatch(
{
fovPixelData_all <- data.table(arrow::read_feather(matPath))
fovPixelData <- fovPixelData_all[,..markers]
fovPixelData <- fovPixelData[,Map(`/`,.SD,normVals)]

# map FlowSOM data
clusters <- FlowSOM:::MapDataToCodes(somWeights, as.matrix(fovPixelData))

# add back other columns
to_add <- colnames(fovPixelData_all)[!colnames(fovPixelData_all) %in% markers]
fovPixelData <- cbind(fovPixelData_all[,..to_add], fovPixelData)

# assign cluster labels column to pixel data
fovPixelData$pixel_som_cluster <- as.integer(clusters[,1])

# write data with SOM labels
tempPath <- file.path(paste0(pixelMatDir, '_temp'), fileName)
arrow::write_feather(as.data.table(fovPixelData), tempPath, compression='uncompressed')

# this won't be displayed to the user but is used as a helper to break out
# in the rare first FOV hang issue
print(paste('Done writing fov', fovs[i]))
0
},
error=function(cond) {
# this won't be displayed to the user but is used as a helper to break out
# in the rare first FOV hang issue
print(paste('Error encountered for fov', fovs[i]))
1
}
)

data.frame(fov=fovs[i], status=status)
}

# report any erroneous feather files
for (i in 1:nrow(fovStatuses)) {
if (fovStatuses[i, 'status'] == 1) {
print(paste("The data for FOV", fovStatuses[i, 'fov'], "has been corrupted, removing"))
fovsProcessed <- fovsProcessed - 1
}
}

# unregister the parallel cluster
Expand All @@ -102,6 +126,7 @@ for (batchStart in seq(1, length(fovs), batchSize)) {

# inform user that batchSize fovs have been processed
print(paste("Processed", as.character(fovsProcessed), "fovs"))
}

print("Done!")
# remove log.txt
unlink('log.txt')
}
Loading

0 comments on commit 6589247

Please sign in to comment.