In [None]:
%matplotlib inline

In [None]:
import deepometry.model
import deepometry.utils
import deepometry.visualize
import matplotlib.pyplot
import pandas
import skimage.io

# Extract

Suppose we have the following directory structure. Data from this experiment was shown to the model during training. Images are saved as NPY files:

    /data/parsed/
        patient_A/
            positive/
                patient_A__32e88e1ac3a8f44bf8f77371155553b9.npy
                patient_A__3dc56a0c446942aa0da170acfa922091.npy  
                ...
            negative/
                patient_A__8068ef7dcddd89da4ca9740bd2ccb31e.npy
                patient_A__8348deaa70dfc95c46bd02984d28b873.npy
                ...
        patient_B/
            positive/
                patient_B__3618e715e62a229aa78a7e373b49b888.npy
                patient_B__3cf53cea7f4db1cfd101e06c366c9868.npy
                ...
            negative/
                patient_B__84949e1eba7802b00d4a1755fa9af15e.npy
                patient_B__852a1edbf5729fe8721e9e5404a8ad20.npy
                ...
                

Use `deepometry.utils.load` to load images and their corresponding labels. The parameter `convert=False` ensures the values of `labels` are the subdirectory names of each patient (`positive`, or `negative`). 

In [None]:
directories = ["/data/parsed/patient_A", "/data/parsed/patient_B"]

x, labels, units = deepometry.utils.load(directories, convert=False)

Once the data is loaded, we can redefine the model and extract the embedded features of `x`. The features will be saved as a TSV file using pandas. We omit the column headers and the data frame indexes from the exported data.

In [None]:
import keras.backend
import tensorflow

configuration = tensorflow.ConfigProto()
configuration.gpu_options.allow_growth = True
configuration.gpu_options.visible_device_list = "3"

session = tensorflow.Session(config=configuration)
keras.backend.set_session(session)

In [None]:
# Instantiate the model
model = deepometry.model.Model(shape=x.shape[1:], units=units)
model.compile()

# Extract the features
features = model.extract(x, batch_size=32, standardize=True, verbose=1)

# Export features to data/features.tsv
features_df = pandas.DataFrame(data=features)
features_df.to_csv("data/features.tsv", header=False, index=False, sep="\t")

Additional metadata can also be exported as a TSV. In this example, we export a single-column TSV containing the label data. Single-column metadata should exclude the column header (`header=False`). Metadata containing more than one column should include the column header (`header=True`).

In [None]:
metadata_df = pandas.DataFrame(data=labels)
metadata_df.to_csv("data/metadata.tsv", header=False, index=False, sep="\t")

When using TensorBoard to visualize an embedding, you can display a "sprite" image for each data point. A "sprite" is a small image. Use `deepometry.visualize.images_to_sprites` to create an NxN grid of sprites which TensorBoard can use to display image data for each feature.

In [None]:
# Create the sprites image.
sprites = deepometry.visualize.images_to_sprite(x)

# Display the sprites image.
matplotlib.pyplot.rcParams["figure.figsize"] = (12, 12)
matplotlib.pyplot.imshow(sprites)

# Export the sprites image.
skimage.io.imsave("data/sprites.png", sprites)