Skip to content

Commit

Permalink
Merge 9c11720 into 47dd33c
Browse files Browse the repository at this point in the history
  • Loading branch information
alex-l-kong committed Apr 22, 2021
2 parents 47dd33c + 9c11720 commit 1574d64
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 18 deletions.
18 changes: 12 additions & 6 deletions ark/phenotyping/consensus_cluster.R
Original file line number Diff line number Diff line change
Expand Up @@ -55,17 +55,23 @@ hClust <- consensusClusterResults[[maxK]]$consensusClass
names(hClust) <- clusterAvgs$cluster

# append hClust to each fov's data
print('Writing consensus clustering results')
for (fov in fovs) {
print("Writing consensus clustering results")
for (i in 1:length(fovs)) {
# read in pixel data, we'll need the cluster column for mapping
fileName <- paste(fov, ".feather", sep="")
matPath <- paste(pixelClusterDir, fileName, sep="/")
fileName <- file.path(fovs[i], "feather", fsep=".")
matPath <- file.path(pixelClusterDir, fileName)
fovPixelData <- arrow::read_feather(matPath)

# assign hierarchical cluster labels
fovPixelData$hCluster_cap <- hClust[as.character(fovPixelData$cluster)]

# overwrite old cluster file with new one containing hCluster_cap
clusterPath <- paste(pixelMatConsensus, fileName, sep="/")
# write consensus clustered data
clusterPath <- file.path(pixelMatConsensus, fileName)
arrow::write_feather(as.data.table(fovPixelData), clusterPath)

# print an update every 10 fovs
if (i %% 10 == 0) {
print("# fovs clustered:")
print(i)
}
}
4 changes: 2 additions & 2 deletions ark/phenotyping/create_som_matrix.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ pixelSubsetData <- NULL

for (fov in fovs) {
# subset each matrix with only the markers columns
fileName <- paste(fov, ".feather", sep="")
subPath <- paste(pixelSubsetDir, fileName, sep="/")
fileName <- file.path(fov, "feather", fsep=".")
subPath <- file.path(pixelSubsetDir, fileName)
fovSubsetData <- arrow::read_feather(subPath, col_select=all_of(markers))

# attach each fov's dataset to pixelSubsetData
Expand Down
17 changes: 9 additions & 8 deletions ark/phenotyping/run_trained_som.R
Original file line number Diff line number Diff line change
Expand Up @@ -44,31 +44,32 @@ markers <- colnames(somWeights)
print("Mapping data to cluster labels")
for (i in 1:length(fovs)) {
# read in pixel data
fileName <- paste(fovs[i], ".feather", sep="")
matPath <- paste(pixelMatDir, fileName, sep="/")
fovPixelData <- as.matrix(arrow::read_feather(matPath, col_select=all_of(markers)))
fileName <- file.path(fovs[i], "feather", fsep=".")
matPath <- file.path(pixelMatDir, fileName)
fovPixelData <- arrow::read_feather(matPath)

# 99.9% normalize pixel data
for (marker in markers) {
# this prevents all- or mostly-zero columns from getting normalized and becoming NA/Inf
if (normVals[1, marker] != 0) {
fovPixelData[, marker] = fovPixelData[, marker] / normVals[1, marker]
fovPixelData[, marker] <- fovPixelData[, marker] / normVals[1, marker]
}
}

# map FlowSOM data
clusters <- FlowSOM:::MapDataToCodes(somWeights, fovPixelData)
clusters <- FlowSOM:::MapDataToCodes(somWeights, as.matrix(fovPixelData[, markers]))

# assign cluster labels column to pixel data
fovPixelData <- as.matrix(cbind(as.matrix(fovPixelData), cluster=clusters[,1]))
fovPixelData$cluster <- clusters[,1]

# write to feather
clusterPath <- paste(pixelClusterDir, fileName, sep="/")
clusterPath <- file.path(pixelClusterDir, fileName)
arrow::write_feather(as.data.table(fovPixelData), clusterPath)

# print an update every 10 fovs
if (i %% 10 == 0) {
sprintf("Finished clustering %s fovs", i)
print("# fovs clustered:")
print(i)
}
}

Expand Down
13 changes: 11 additions & 2 deletions ark/phenotyping/som_utils_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,10 @@ def mocked_cluster_pixels(fovs, base_dir, pre_dir='pixel_mat_preprocessed',
fov_mat_pre = feather.read_dataframe(os.path.join(base_dir, pre_dir, fov + '.feather'))

# only take the specified channel columns
fov_mat_pre = fov_mat_pre[weights.columns.values]
fov_mat_channels = fov_mat_pre[weights.columns.values].copy()

# perform 99.9% normalization
fov_mat_pre = fov_mat_pre.div(norm_vals, axis=1)
fov_mat_channels = fov_mat_channels.div(norm_vals, axis=1)

# get the mean weight for each channel column
sub_means = weights.mean(axis=1)
Expand Down Expand Up @@ -469,6 +469,9 @@ def test_consensus_cluster(mocker):
fovs = ['fov0', 'fov1', 'fov2']
chans = ['Marker1', 'Marker2', 'Marker3', 'Marker4']

# make it easy to name metadata columns
meta_colnames = ['fov', 'row_index', 'col_index', 'segmentation_label']

# create a dummy clustered matrix
os.mkdir(os.path.join(temp_dir, 'pixel_mat_clustered'))

Expand All @@ -480,6 +483,12 @@ def test_consensus_cluster(mocker):
columns=chans
)

# add metadata
fov_cluster_matrix = pd.concat(
[fov_cluster_matrix, pd.DataFrame(np.random.rand(1000, 4), columns=meta_colnames)],
axis=1
)

# assign dummy cluster labels
fov_cluster_matrix['cluster'] = np.repeat(np.arange(100), repeats=10)

Expand Down

0 comments on commit 1574d64

Please sign in to comment.