In [1]:
import pathlib
from umap_manager import UMAPManager

In [2]:
DATA_DIR = pathlib.Path(f'../TCGA/downloads')
case = 'TCGA-3C-AALK-01Z-00-DX1'

# instantiate manager
manager = UMAPManager(DATA_DIR / case)

# Disable computation of density column
# manager.compute_density = False

# Filter by class
manager.class_filters = ['CancerEpithelium']

# Sample 500 random cells
manager.sample_size = 500

# Exclude some column patterns
manager.exclude_columns = [
    c for c in manager.columns
    if any(substr in c for substr in ['Identifier.', 'ClassifProbab.', 'Unconstrained.'])
]

# View data
manager.data

Reading HIPS data.
Found 412219 features.
Computing density column.


Unnamed: 0,Orientation.Orientation,Size.Area,Size.ConvexHullArea,Size.MajorAxisLength,Size.MinorAxisLength,Size.Perimeter,Shape.Circularity,Shape.Eccentricity,Shape.EquivalentDiameter,Shape.Extent,...,Cytoplasm.Haralick.Entropy.Range,Cytoplasm.Haralick.DifferenceVariance.Mean,Cytoplasm.Haralick.DifferenceVariance.Range,Cytoplasm.Haralick.DifferenceEntropy.Mean,Cytoplasm.Haralick.DifferenceEntropy.Range,Cytoplasm.Haralick.IMC1.Mean,Cytoplasm.Haralick.IMC1.Range,Cytoplasm.Haralick.IMC2.Mean,Cytoplasm.Haralick.IMC2.Range,density
749,-1.385343,242.0,272.0,21.629913,15.600981,62.627417,0.775347,0.692655,17.553460,0.677871,...,0.481044,0.007825,0.003167,2.219511,0.506060,-0.284566,0.145848,0.914719,0.088667,0.000691
422,0.882874,172.0,179.0,15.384361,14.440125,47.556349,0.955699,0.344943,14.798554,0.819048,...,0.524038,0.009995,0.003044,1.721968,0.441184,-0.367871,0.154932,0.953685,0.043950,0.001527
513,-1.494500,118.0,123.0,12.688416,12.190073,39.071068,0.971362,0.277504,12.257335,0.893939,...,0.551784,0.012532,0.004349,1.448168,0.538931,-0.447973,0.191320,0.963852,0.034156,0.003053
797,0.631212,175.0,179.0,15.269653,14.687574,46.727922,1.007152,0.273472,14.927053,0.892857,...,0.490426,0.009385,0.003130,1.897850,0.527474,-0.347856,0.146096,0.944193,0.062499,0.001260
84,0.507099,87.0,88.0,10.626361,10.391822,31.313708,1.114962,0.208939,10.524820,0.870000,...,0.309704,0.014521,0.003116,1.639713,0.441740,-0.437268,0.150985,0.920449,0.051164,0.001207
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
309,0.378394,117.0,122.0,14.207699,10.673089,38.970563,0.968105,0.660053,12.205287,0.759740,...,0.439731,0.008189,0.002265,2.055091,0.442075,-0.233102,0.130535,0.884140,0.098724,0.001498
125,0.478700,193.0,196.0,16.711908,14.862812,49.313708,0.997314,0.457218,15.675944,0.919048,...,0.415257,0.006726,0.002583,2.332734,0.492405,-0.235525,0.134255,0.883296,0.113753,0.001924
225,-0.083340,91.0,93.0,12.162246,9.715781,33.656854,1.009495,0.601533,10.764051,0.919192,...,0.445894,0.009938,0.002883,1.764234,0.463111,-0.292599,0.158325,0.908595,0.103284,0.001478
10,-0.918383,268.0,273.0,20.105785,17.081579,58.384776,0.987974,0.527451,18.472363,0.875817,...,0.366425,0.010081,0.002234,1.774362,0.324677,-0.388427,0.113147,0.957335,0.031594,0.003374


In [3]:
# Show image
manager.image

VBox(children=(Map(center=[40960.0, 47744.0], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom…

In [4]:
# Show cell thumbnails
manager.show_cell_thumbnails()

GridBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00&\x00\x00\x00$\x08\x02\x00\x00\…

GridBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00&\x00\x00\x00$\x08\x02\x00\x00\…

In [5]:
n_training = 400
train_set = manager.data[:n_training]
test_set = manager.data[n_training:]

# First dimensionality reduction will train a UMAP Transform
manager.reduce_dims(train_set, plot=True)

# Subsequent dimensionality reductions will leverage trained UMAP Transform
manager.reduce_dims(test_set, plot=True)

Training UMAP Transform.
Completed training in 4.898204 seconds.
Running inference on 400 cells with trained UMAP Transform.
Completed inference in 0.000625 seconds.


FigureWidget({
    'data': [{'marker': {'color': {'bdata': ('AAABAAIAAwAEAAUABgAHAAgACQAKAA' ... 'UBhgGHAYgBiQGKAYsBjAGNAY4BjwE='),
                                   'dtype': 'i2'}},
              'mode': 'markers',
              'type': 'scatter',
              'uid': '51b6f99a-3c88-4656-8081-4e04267975aa',
              'x': {'bdata': ('1h6xQLr8ukB2IV5ArbC9QKO4X0C5MJ' ... '85r9pA6TS4QOMDmUBTimtA0ImlQA=='),
                    'dtype': 'f4'},
              'y': {'bdata': ('luPOQAJMnECPE8U/eyI+QJnv8EBzHE' ... 'DjC09AMIrLQNm8nkDHjGBApB+VQA=='),
                    'dtype': 'f4'}}],
    'layout': {'template': '...'}
})

VBox()

Running inference on 100 cells with trained UMAP Transform.
Completed inference in 1.660784 seconds.


FigureWidget({
    'data': [{'marker': {'color': {'bdata': ('AAECAwQFBgcICQoLDA0ODxAREhMUFR' ... '9QUVJTVFVWV1hZWltcXV5fYGFiYw=='),
                                   'dtype': 'i1'}},
              'mode': 'markers',
              'type': 'scatter',
              'uid': '93618f48-5565-4178-b815-0d4b63a813ea',
              'x': {'bdata': ('5beiQNjSbj5/DmJAhmJxQOflwkBw5q' ... 'CJExFAjE/CQABMQz9GifJAbTCOQA=='),
                    'dtype': 'f4'},
              'y': {'bdata': ('3lILQBXPdkCQ9wVAhEDyQJTeEkDxGt' ... 'AM0jJAVRemQAYT1z9kojZAxXy1QA=='),
                    'dtype': 'f4'}}],
    'layout': {'template': '...'}
})

VBox()

In [6]:
# For each target cell in a given list of ids, find the 10 most similar cells and display them
target_cells = [1, 2]
manager.nearest_neighbors(target_cells, n=10, show=True)

Running inference on 500 cells with trained UMAP Transform.
Completed inference in 0.513152 seconds.
Running inference on 2 cells with trained UMAP Transform.
Completed inference in 0.010166 seconds.
Cell 1:


GridBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x002\x00\x00\x00,\x08\x02\x00\x00\…

10 most similar cells:


GridBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00$\x00\x00\x00*\x08\x02\x00\x00\…


Cell 2:


GridBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00$\x00\x00\x00$\x08\x02\x00\x00\…

10 most similar cells:


GridBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00$\x00\x00\x00$\x08\x02\x00\x00\…




In [7]:
# Save trained transform to file
transform_path = DATA_DIR / 'my_umap.transform'
manager.save_transform(transform_path)

Saved UMAP Transform to ../TCGA/downloads/my_umap.transform.


In [8]:
# instantiate a second manager for a different case
manager_2 = UMAPManager(DATA_DIR / 'TCGA-3C-AALI-01Z-00-DX1')
manager_2.class_filters = ['CancerEpithelium']
manager_2.sample_size = 500
manager_2.exclude_columns = [
    c for c in manager_2.columns
    if any(substr in c for substr in ['Identifier.', 'ClassifProbab.', 'Unconstrained.'])
]

# Load the saved transform file
manager_2.load_transform(transform_path)

# Run an inference with the loaded transform
manager_2.reduce_dims(plot=True)

Reading HIPS data.
Found 489293 features.
Loaded UMAP Transform from ../TCGA/downloads/my_umap.transform.
Computing density column.
Running inference on 500 cells with trained UMAP Transform.
Completed inference in 0.214186 seconds.


FigureWidget({
    'data': [{'marker': {'color': {'bdata': ('AAABAAIAAwAEAAUABgAHAAgACQAKAA' ... 'HqAesB7AHtAe4B7wHwAfEB8gHzAQ=='),
                                   'dtype': 'i2'}},
              'mode': 'markers',
              'type': 'scatter',
              'uid': '21850c40-f915-4b3d-85ca-5b8c8c66c5c8',
              'x': {'bdata': ('dCE0viNhR0DDnzdAn/BCP+FBskBevE' ... 'JAzTAoPk9zUEC7ETRAI9C8PhOhoT4='),
                    'dtype': 'f4'},
              'y': {'bdata': ('f9PNP0OChkC/V1tA2j8tQOeWjEA7EC' ... 'JA1Z4mQH4Q1EC6SpxAznFGQGg7F0A='),
                    'dtype': 'f4'}}],
    'layout': {'template': '...'}
})

VBox()