In [2]:
# import libraries
import sys
import os
import h5py
import pandas as pd
import matplotlib.pylab as plt
from jupyterlab_h5web import H5Web
# import modules with the functionalities offered by CompositionSpace
from compositionspace.utils import get_file_size
# from compositionspace.io import get_reconstructed_positions, get_iontypes, get_ranging_info
from compositionspace.preparation import ProcessPreparation
from compositionspace.autophase import ProcessAutomatedPhaseAssignment
from compositionspace.segmentation import ProcessSegmentation
from compositionspace.clustering import ProcessClustering

In [3]:
# ! pip list
MY_PROCESSED_DATA_PATH = f"{os.getcwd()}"
print(f"Executing compositionspace in the following working directory: {os.getcwd()}")

Executing compositionspace in the following working directory: /home/kaiobach/Research/hu_hu_hu/sprint22/conda-compspace-step01/CompositionSpace


## Load reconstruction and ranging and voxelize with rectangular transfer function without creating slices

In [4]:
workdir = "/home/kaiobach/Research/paraprobe-toolbox/teaching/example_analyses/iuc09_saksena"
workdir = "/home/kaiobach/Research/paraprobe-toolbox/teaching/example_analyses/usa_denton_smith"
simid = 1
RECONSTRUCTION_AND_RANGING = (f"{workdir}/..")
RECONSTRUCTION_AND_RANGING = (f"{workdir}/PARAPROBE.Transcoder.Results.SimID.1.nxs",
                              f"{workdir}/PARAPROBE.Ranger.Results.SimID.1.nxs")
config_file_path = f"{MY_PROCESSED_DATA_PATH}/tests/experiment_params.yaml"
results_file_path = f"{MY_PROCESSED_DATA_PATH}/CompositionSpace.Results.{simid}.nxs"

In [6]:
get_file_size(RECONSTRUCTION_AND_RANGING[0])
get_file_size(RECONSTRUCTION_AND_RANGING[1])
# H5Web(RECONSTRUCTION_AND_RANGING[0])
# H5Web(RECONSTRUCTION_AND_RANGING[1])

16.239 MiB
0.871 MiB


In [12]:
voxelize = ProcessPreparation(config_file_path, results_file_path, entry_id=1, verbose=True)
voxelize.run(recon_file_path=RECONSTRUCTION_AND_RANGING[0],
             range_file_path=RECONSTRUCTION_AND_RANGING[1])

Load reconstructed positions shape (945211, 3), type <class 'numpy.ndarray'>, dtype float32
26 iontypes distinguished:
	ion0, ('unknown iontype', np.uint8(0))
	ion1, ('Si ++', np.uint8(1))
	ion2, ('Si +', np.uint8(2))
	ion3, ('Cr +', np.uint8(3))
	ion4, ('Si +', np.uint8(4))
	ion5, ('Si +', np.uint8(5))
	ion6, ('Si ++', np.uint8(6))
	ion7, ('Si ++', np.uint8(7))
	ion8, ('Cr +', np.uint8(8))
	ion9, ('Cr ++', np.uint8(9))
	ion10, ('Cr ++', np.uint8(10))
	ion11, ('Cu +', np.uint8(11))
	ion12, ('Cu +', np.uint8(12))
	ion13, ('C +', np.uint8(13))
	ion14, ('C ++', np.uint8(14))
	ion15, ('O +', np.uint8(15))
	ion16, ('O +', np.uint8(16))
	ion17, ('Cr O +', np.uint8(17))
	ion18, ('Cr O +', np.uint8(18))
	ion19, ('Cr O +', np.uint8(19))
	ion20, ('Cr O ++', np.uint8(20))
	ion21, ('Cr O ++', np.uint8(21))
	ion22, ('Cr O ++', np.uint8(22))
	ion23, ('Cr O O +', np.uint8(23))
	ion24, ('Cr O O ++', np.uint8(24))
	ion25, ('Cr Cr O ++', np.uint8(25))
9 charge-agnostic iontypes distinguished:
	unknown i

'\nvoxelize.init_ranging(ityp_info, elements)\nvoxelize.write_init_results()\nvoxelize.define_voxelization_grid(xyz_val)\nvoxelize.define_lookup_table(ityp_val)\nvoxelize.write_voxelization_grid_info()\nvoxelize.write_voxelization_results()\n'

In [13]:
get_file_size(results_file_path)
# H5Web(results_file_path)

2.279 MiB


Voxelization is performed on elements not on iontypes, i.e. using a atomic decomposition!

## Automated phase assignment

In [14]:
autophase = ProcessAutomatedPhaseAssignment(config_file_path, results_file_path, entry_id=1, verbose=True)
autophase.run()

Composition matrix has 5 chemical classes
Populating composition table column 1
Populating composition table column 2
Populating composition table column 3
Populating composition table column 4
Populating composition table column 5
sorted_index, feature_importance[sorted_index]
1, 0.8392149718450334
2, 0.08843114534741313
3, 0.06303204111209978
4, 0.005043336884034228
5, 0.004278504811419446
0, 0.0


In [15]:
get_file_size(results_file_path)
# H5Web(results_file_path)

2.292 MiB


<jupyterlab_h5web.widget.H5Web object>

## Segmentation PCA and IC minimization

In [None]:
segmentation = ProcessSegmentation(config_file_path, results_file_path, entry_id=1, verbose=True)
segmentation.perform_pca_and_write_results()
segmentation.perform_bics_minimization_and_write_results()

In [None]:
get_file_size(results_file_path)
# H5Web(results_file_path)

## DBScan clustering

In [None]:
clustering = ProcessClustering(
    config_file_path,
    results_file_path,
    entry_id=1,
    verbose=True)
clustering.run_and_write_results()

In [None]:
get_file_size(results_file_path)
H5Web(results_file_path)

<div class="alert alert-block alert-danger">
Discussion points:<br>
- Tests are too specific, hardcoded file names<br>
- Readthedocs documentation needs to be updated<br>
- GM and ML models are variables collect over<br>
- Loading file formats from the community should use ifes-apt-tc-data-modeling library currently using paraprobe result<br>
- Ion handling should use ifes-apt-tc-data-modeling is not added as a dependencies and loading properly<br>
-  tests/experiment_params.json should be removed?<br>
- NeXus renaming<br>
- CompositionSpace by design does not distinguish charge states iontypes should be atomic decomposed<br>
- Why is the center of the voxel defined by the median position of the ions but not by the barycenter of the voxel (currently using voxel barycenter)<br>
- Ran 2, and even 0.5 discretization speed is comparable<br>
- Triple loop in preparation step should be replaced with more fancy numpy indexing code that I know is somewhere but I couldnt find quickly<br>
- Move test data out of this repository<br>
</div>

## Meshing

Test for now with the SiGe dataset.

In [None]:
sige_file_path = "Output_DBSCAN_segmentation_phase_1.h5"
H5Web(sige_file_path)

In [None]:
import h5py
import numpy as np
with h5py.File(sige_file_path, "r") as h5r:
    n_vxls = 0
    aabb3d = np.zeros((3, 2), np.float64)
    for dim in [0, 1, 2]:
        aabb3d[dim, :] = [np.finfo(np.float64).max, np.finfo(np.float64).min]
    # print(aabb3d)
    for key in h5r["1"].keys():
        for dim in [0, 1, 2]:
            mimx = (np.min(h5r["1"][key][:, dim]), np.max(h5r["1"][key][:, dim]))
            if mimx[0] <= aabb3d[dim, 0]:
                aabb3d[dim, 0] = mimx[0]
            if mimx[1] >= aabb3d[dim, 1]:
                aabb3d[dim, 1] = mimx[1]
            n_vxls += int(np.shape(h5r["1"][key])[0])
    print(aabb3d)
    print(n_vxls)
    # assume cubic vxl 2nm edge length

Assume that the data were discretized on the following rectangular grid with 2nm cubic voxel

In [None]:
nx = int((88--96)/2)
ny = int((94--96)/2)
nz = int((0--222)/2)
grid = np.zeros((nx, ny, nz), np.uint32)
# that grid should intentionally be a cuboid to enable checking correct dimensions

def i_to_xyz(i):
    z = int(i / (nx * ny))
    rem = i - (nx * ny * z)
    y = int(rem / nx)
    x = rem - (y * nx)
    return (x, y, z)

with h5py.File(sige_file_path, "r") as h5r:
    for key in h5r["1"].keys():
        jds = np.asarray(h5r["1"][key][:, 3], np.uint32)
        for j in jds:
            x, y, z = i_to_xyz(j)
            grid[x, y, z] = int(key) + 1
        print(key)
print(np.shape(grid))
print(np.unique(grid))

In [None]:
import h5py
with h5py.File("input.grid.nxs", "w") as h5w:
    h5w.create_dataset("/grid", compression="gzip", compression_opts=1, data=grid)

In [None]:
H5Web("input.grid.nxs")