In [1]:
import pathlib
from umap_manager import UMAPManager

In [2]:
DATA_DIR = pathlib.Path('../TCGA/downloads')
RESULT_DIR = pathlib.Path('./umap_results')

case = 'TCGA-3C-AALK-01Z-00-DX1'

# instantiate manager
manager = UMAPManager(DATA_DIR / case)

# Disable computation of density column
manager.compute_density = False

# Filter by class
manager.class_filters = ['CancerEpithelium']

# Sample 500 random cells
manager.sample_size = 500

# Exclude some column patterns
manager.exclude_columns = [
    c for c in manager.columns
    if any(substr in c for substr in ['Identifier.', 'ClassifProbab.', 'Unconstrained.'])
]

# View data
manager.data

Reading HIPS data.
Found 412219 features.


Unnamed: 0,Orientation.Orientation,Size.Area,Size.ConvexHullArea,Size.MajorAxisLength,Size.MinorAxisLength,Size.Perimeter,Shape.Circularity,Shape.Eccentricity,Shape.EquivalentDiameter,Shape.Extent,...,Cytoplasm.Haralick.Entropy.Mean,Cytoplasm.Haralick.Entropy.Range,Cytoplasm.Haralick.DifferenceVariance.Mean,Cytoplasm.Haralick.DifferenceVariance.Range,Cytoplasm.Haralick.DifferenceEntropy.Mean,Cytoplasm.Haralick.DifferenceEntropy.Range,Cytoplasm.Haralick.IMC1.Mean,Cytoplasm.Haralick.IMC1.Range,Cytoplasm.Haralick.IMC2.Mean,Cytoplasm.Haralick.IMC2.Range
556,1.351193,130.0,132.0,14.059332,11.879256,39.899495,1.026168,0.534865,12.865502,0.909091,...,4.787268,0.327396,0.011117,0.002044,1.564773,0.308880,-0.303214,0.112040,0.901905,0.062184
118,0.111431,228.0,235.0,18.898618,15.566667,54.970563,0.948166,0.567035,17.038152,0.844444,...,5.645060,0.430419,0.009547,0.003190,1.874300,0.487709,-0.414995,0.119111,0.972513,0.023938
77,-0.378582,122.0,124.0,13.170688,11.900208,38.485281,1.035095,0.428509,12.463355,0.924242,...,5.773408,0.555650,0.008029,0.003094,2.143763,0.575374,-0.295588,0.158671,0.924108,0.070024
60,1.570796,48.0,48.0,9.165151,6.831301,24.000000,1.047198,0.666667,7.817640,1.000000,...,6.206283,0.722718,0.007067,0.003460,2.309180,0.617862,-0.303550,0.189664,0.937089,0.080403
207,0.785398,48.0,48.0,8.082904,7.694695,23.414214,1.100251,0.306186,7.817640,0.979592,...,5.646288,0.485899,0.007829,0.003216,2.147113,0.455584,-0.258917,0.147203,0.894572,0.095794
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
12,1.396739,199.0,204.0,17.069172,14.935094,50.142136,0.994620,0.484169,15.917747,0.888393,...,5.524338,0.545713,0.010194,0.003174,1.705315,0.468621,-0.382605,0.158866,0.959690,0.038768
93,-1.353667,214.0,227.0,24.020784,11.605947,57.556349,0.811778,0.875531,16.506764,0.810606,...,6.145904,0.546612,0.008262,0.002755,2.075369,0.478173,-0.287378,0.152057,0.929545,0.072312
224,0.918614,56.0,58.0,9.288397,7.840318,25.899495,1.049096,0.536189,8.444016,0.777778,...,5.638143,0.612876,0.010712,0.004388,1.637141,0.530777,-0.486326,0.158083,0.985427,0.015870
50,-0.723282,258.0,279.0,21.661500,15.774957,62.041631,0.842293,0.685313,18.124453,0.714681,...,5.205793,0.558722,0.010029,0.003687,1.872471,0.610957,-0.329695,0.192929,0.927218,0.093629


In [3]:
# Write data to parquet for Girder
manager.write_data_parquet(RESULT_DIR / 'AALK_CancerEpithelium.parquet')

In [4]:
# Show image
manager.image

VBox(children=(Map(center=[40960.0, 47744.0], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom…

In [5]:
# Show cell thumbnails
manager.show_cell_thumbnails(range(20))

GridBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00,\x00\x00\x00$\x08\x02\x00\x00\…

GridBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00,\x00\x00\x00$\x08\x02\x00\x00\…

In [6]:
n_training = 400
train_set = manager.data[:n_training]
test_set = manager.data[n_training:]

# First dimensionality reduction will train a UMAP Transform
manager.reduce_dims(train_set, plot=True, parquet_path=RESULT_DIR / 'AALK_CancerEpithelium_UMAP_train.parquet')

# Subsequent dimensionality reductions will leverage trained UMAP Transform
manager.reduce_dims(test_set, plot=True, parquet_path=RESULT_DIR / 'AALK_CancerEpithelium_UMAP_predict.parquet')

Training UMAP Transform.
Completed training in 9.809561 seconds.
Running inference on 400 cells with trained UMAP Transform.
Completed inference in 0.000694 seconds.


FigureWidget({
    'data': [{'marker': {'color': {'bdata': ('AAABAAIAAwAEAAUABgAHAAgACQAKAA' ... 'UBhgGHAYgBiQGKAYsBjAGNAY4BjwE='),
                                   'dtype': 'i2'}},
              'mode': 'markers',
              'type': 'scatter',
              'uid': 'f26f573a-1508-4cc1-96ee-2e90c5668788',
              'x': {'bdata': ('s6rPQFKYZUBG2sNARXwkQZONu0CKea' ... 'H9UClBJM65QJ/FG0ENYoVAmBXgQA=='),
                    'dtype': 'f4'},
              'y': {'bdata': ('59UDQIPk2D1epURAxp5fQAnssECtJh' ... 'D94GFAp7PPP+8tgEBh/j8/WQGjQA=='),
                    'dtype': 'f4'}}],
    'layout': {'template': '...'}
})

VBox()

Running inference on 100 cells with trained UMAP Transform.
Completed inference in 2.757389 seconds.


FigureWidget({
    'data': [{'marker': {'color': {'bdata': ('AAECAwQFBgcICQoLDA0ODxAREhMUFR' ... '9QUVJTVFVWV1hZWltcXV5fYGFiYw=='),
                                   'dtype': 'i1'}},
              'mode': 'markers',
              'type': 'scatter',
              'uid': 'ff8dacd8-0802-4569-9890-8f25f6e2afd9',
              'x': {'bdata': ('Lq/wQM+0qkDMSH1Ax3uRQJFbdUDFYv' ... 'GOrVBAFqCyQPliF0GsFUhAiRbwQA=='),
                    'dtype': 'f4'},
              'y': {'bdata': ('KkiHP+fnsz/Xo8g/kFrCP8vJhkBQ1V' ... 'BeYxw/wvAJP9PvT0B1aOs/MAG8Pw=='),
                    'dtype': 'f4'}}],
    'layout': {'template': '...'}
})

VBox()

In [7]:
# For each target cell in a given list of ids, find the 10 most similar cells and display them
target_cells = [1, 2]
manager.nearest_neighbors(target_cells, n=10, show=True)

Running inference on 500 cells with trained UMAP Transform.
Completed inference in 1.124306 seconds.
Running inference on 2 cells with trained UMAP Transform.
Completed inference in 0.788249 seconds.
Cell 1:


GridBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x002\x00\x00\x00,\x08\x02\x00\x00\…

10 most similar cells:


GridBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00,\x00\x00\x008\x08\x02\x00\x00\…


Cell 2:


GridBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00$\x00\x00\x00$\x08\x02\x00\x00\…

10 most similar cells:


GridBox(children=(Image(value=b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00$\x00\x00\x00$\x08\x02\x00\x00\…




In [8]:
# Save trained transform to file
transform_path = DATA_DIR / 'my_umap.transform'
manager.save_transform(transform_path)

Saved UMAP Transform to ../TCGA/downloads/my_umap.transform.


In [9]:
# instantiate a second manager for a different case
manager_2 = UMAPManager(DATA_DIR / 'TCGA-3C-AALI-01Z-00-DX1')
manager_2.compute_density = False
manager_2.class_filters = ['CancerEpithelium']
manager_2.sample_size = 500
manager_2.exclude_columns = [
    c for c in manager_2.columns
    if any(substr in c for substr in ['Identifier.', 'ClassifProbab.', 'Unconstrained.'])
]

# Write data to parquet for Girder
manager_2.write_data_parquet(RESULT_DIR / 'AALI_CancerEpithelium.parquet')

# Load the saved transform file
manager_2.load_transform(transform_path)

# Run an inference with the loaded transform
manager_2.reduce_dims(plot=True, parquet_path=RESULT_DIR / 'AALI_CancerEpithelium_UMAP_predict.parquet')

Reading HIPS data.
Found 489293 features.
Loaded UMAP Transform from ../TCGA/downloads/my_umap.transform.
Running inference on 500 cells with trained UMAP Transform.
Completed inference in 0.635995 seconds.


FigureWidget({
    'data': [{'marker': {'color': {'bdata': ('AAABAAIAAwAEAAUABgAHAAgACQAKAA' ... 'HqAesB7AHtAe4B7wHwAfEB8gHzAQ=='),
                                   'dtype': 'i2'}},
              'mode': 'markers',
              'type': 'scatter',
              'uid': '3bfcc102-fdd1-46c6-9aa1-51c81f85e83c',
              'x': {'bdata': ('E50zQWnt30B6mlBAm82bQAncBkGg1f' ... '5AaArTQKKZEUFaRtBA2weIQO1Fy0A='),
                    'dtype': 'f4'},
              'y': {'bdata': ('d9NfQP12bEBXwi9AaW6+QJw7YkBoNZ' ... 'Q/5ziwP+OXG0AW1HRAc9YjP1KOekA='),
                    'dtype': 'f4'}}],
    'layout': {'template': '...'}
})

VBox()