Skip to content

Commit

Permalink
Remove old cluster batching code
Browse files Browse the repository at this point in the history
  • Loading branch information
alex-l-kong committed Mar 3, 2021
1 parent dd46518 commit 70970d2
Show file tree
Hide file tree
Showing 2 changed files with 2 additions and 61 deletions.
56 changes: 0 additions & 56 deletions ark/phenotyping/run_trained_som.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
# - normValsPath: path to the 99.9% normalized values file
# - pixelWeightsPath: path to the SOM weights file
# - pixelClusterDir: path to directory where the clustered data will be written to
# - batchSize: number of fovs to cluster at once

library(arrow)
library(data.table)
Expand All @@ -32,9 +31,6 @@ pixelWeightsPath <- args[4]
# get the cluster write path directory
pixelClusterDir <- args[5]

# get the batch size
batchSize <- strtoi(args[6])

# read the weights
somWeights <- as.matrix(arrow::read_feather(pixelWeightsPath))

Expand All @@ -44,59 +40,8 @@ normVals <- as.matrix(arrow::read_feather(normValsPath))
# get the marker names from the weights matrix
markers <- colnames(somWeights)

# divide the fovs into batches
fovBatches <- split(fovs, cut(seq_along(fovs), length(fovs) / batchSize, labels=FALSE))

print("Mapping data to cluster labels")
start <- proc.time()
batchNum <- 1
for (fovs in fovBatches) {
# create pixel data to cluster
batchPixelData <- matrix(nrow=0, ncol=length(markers))
colnames(batchPixelData) <- markers

batchFileName <- sprintf("cluster_%s.feather", batchNum)

for (fov in fovs) {
fovFileName <- paste(fov, ".feather", sep="")
matPath <- paste(pixelMatDir, fovFileName, sep="/")
fovPixelData <- as.matrix(arrow::read_feather(matPath, col_select=all_of(markers)))

batchPixelData <- rbind(batchPixelData, fovPixelData)
}

# 99.9% normalize pixel data
for (marker in markers) {
# this prevents all- or mostly-zero columns from getting normalized and becoming NA/Inf
if (normVals[1, marker] != 0) {
batchPixelData[, marker] = batchPixelData[, marker] / normVals[1, marker]
}
}

print(batchPixelData)

# map FlowSOM data
clusters <- FlowSOM:::MapDataToCodes(somWeights, batchPixelData)

# assign cluster labels column to pixel data
batchPixelData <- as.matrix(cbind(as.matrix(batchPixelData), cluster=clusters[,1]))

# write to feather
clusterPath <- paste(pixelClusterDir, batchFileName, sep="/")
arrow::write_feather(as.data.table(batchPixelData), clusterPath)

batchNum <- batchNum + 1

# print an update every 10 fovs
if (batchNum %% 10 == 0) {
sprintf("Finished clustering %s fovs", i)
}
}
print(proc.time() - start)

# using trained SOM, batch cluster the original dataset by fov
print("Mapping data to cluster labels")
start <- proc.time()
for (i in 1:length(fovs)) {
# read in pixel data
fileName <- paste(fovs[i], ".feather", sep="")
Expand Down Expand Up @@ -126,6 +71,5 @@ for (i in 1:length(fovs)) {
sprintf("Finished clustering %s fovs", i)
}
}
print(proc.time() - start)

print("Done!")
7 changes: 2 additions & 5 deletions ark/phenotyping/som_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ def train_som(fovs, channels, base_dir,

def cluster_pixels(fovs, base_dir, pre_dir='pixel_mat_preprocessed',
norm_vals_name='norm_vals.feather', weights_name='weights.feather',
cluster_dir='pixel_mat_clustered', batch_size=5):
cluster_dir='pixel_mat_clustered'):
"""Uses trained weights to assign cluster labels on full pixel data
Saves data with cluster labels to cluster_dir
Expand All @@ -307,8 +307,6 @@ def cluster_pixels(fovs, base_dir, pre_dir='pixel_mat_preprocessed',
The name of the weights file
cluster_dir (str):
The name of the directory to write the clustered data
batch_size (int):
Number of fovs to cluster at once
"""

# define the paths to the data
Expand Down Expand Up @@ -353,8 +351,7 @@ def cluster_pixels(fovs, base_dir, pre_dir='pixel_mat_preprocessed',

# run the trained SOM on the dataset, assigning clusters
process_args = ['Rscript', '/run_trained_som.R', ','.join(fovs),
preprocessed_path, norm_vals_path, weights_path,
clustered_path, str(batch_size)]
preprocessed_path, norm_vals_path, weights_path, clustered_path]

process = subprocess.Popen(process_args, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)

Expand Down

0 comments on commit 70970d2

Please sign in to comment.