In [1]:
# import libraries
import sys
import os
import yaml
import h5py
import pandas as pd
import flatdict as fd
import matplotlib.pylab as plt
from jupyterlab_h5web import H5Web
from IPython.display import Image
# import modules with the functionalities offered by CompositionSpace
sys.path.append("../")
from compositionspace.utils import get_file_size
from compositionspace.visualization import generate_xdmf_for_visualizing_content
from compositionspace.preparation import ProcessPreparation
from compositionspace.autophase import ProcessAutomatedPhaseAssignment
from compositionspace.segmentation import ProcessSegmentation
from compositionspace.clustering import ProcessClustering

In [2]:
# ! pip list
MY_PROCESSED_DATA_PATH = f"{os.getcwd()}"
print(f"Executing compositionspace in the following working directory: {os.getcwd()}")

Executing compositionspace in the following working directory: /home/kaiobach/Research/hu_hu_hu/sprint22/compspace-mpie-remaining-fixes/CompositionSpace/tests


## Load reconstruction and ranging and voxelize with rectangular transfer function without creating slices

Prerequisite: Properly formatted reconstructed dataset and ranging definitions:<br>
* Generated **either** with paraprobe-toolbox NeXus/HDF5 file
* **or** as a pair of APT or POS reconstruction and RRNG ranging definition file.

In [3]:
simid = 1
use_nexus = False
if use_nexus:  # NeXus/HDF and example for typical file locations with paraprobe-toolbox
    paraprobe_toolbox_root = ""
    workdir = [f"{paraprobe_toolbox_root}/teaching/example_analyses/usa_denton_smith",
               f"{paraprobe_toolbox_root}/teaching/example_analyses/iuc09_saksena"]
    RECONSTRUCTION_AND_RANGING = (f"{workdir[simid - 1]}/PARAPROBE.Transcoder.Results.SimID.1.nxs",
                                  f"{workdir[simid - 1]}/PARAPROBE.Ranger.Results.SimID.1.nxs")
else:  # APT or POS + RRNG
    RECONSTRUCTION_AND_RANGING = (f"{MY_PROCESSED_DATA_PATH}/data/Si.pos",
                                  f"{MY_PROCESSED_DATA_PATH}/data/Si.RRNG")

config_file_path = f"{MY_PROCESSED_DATA_PATH}/experiment_params.yaml"
results_file_path = f"{MY_PROCESSED_DATA_PATH}/CompositionSpace.Results.{simid}.nxs"
print(config_file_path)
print(results_file_path)

/home/kaiobach/Research/hu_hu_hu/sprint22/compspace-mpie-remaining-fixes/CompositionSpace/tests/experiment_params.yaml
/home/kaiobach/Research/hu_hu_hu/sprint22/compspace-mpie-remaining-fixes/CompositionSpace/tests/CompositionSpace.Results.1.nxs


In [4]:
get_file_size(RECONSTRUCTION_AND_RANGING[0])
get_file_size(RECONSTRUCTION_AND_RANGING[1])
# H5Web(RECONSTRUCTION_AND_RANGING[0])
# H5Web(RECONSTRUCTION_AND_RANGING[1])

14.423 MiB
0.001 MiB


In [5]:
voxelize = ProcessPreparation(config_file_path, results_file_path, entry_id=1, verbose=True)

In [6]:
voxelize.run(recon_file_path=RECONSTRUCTION_AND_RANGING[0],
             range_file_path=RECONSTRUCTION_AND_RANGING[1])

Load reconstructed positions shape (945211, 3), type <class 'numpy.ndarray'>, dtype float32
Found 25 ranging definitions, performed reduction to 25 unique ones
/home/kaiobach/Research/hu_hu_hu/sprint22/compspace-mpie-remaining-fixes/CompositionSpace/tests/data/Si.RRNG parsed successfully
('unknown', np.uint8(0), array([0.    , 0.0005]))
('Si ++', np.uint8(1), array([13.8745, 14.241 ]))
('Si +', np.uint8(2), array([27.856, 28.595]))
('Si +', np.uint8(3), array([28.826, 29.255]))
('Si +', np.uint8(4), array([29.783, 30.252]))
('Si ++', np.uint8(5), array([14.407, 14.643]))
('Si ++', np.uint8(6), array([14.912, 15.171]))
('Cr +', np.uint8(7), array([51.699, 54.243]))
('Cr +', np.uint8(8), array([49.612, 50.526]))
('Cr ++', np.uint8(9), array([25.771, 27.211]))
('Cr ++', np.uint8(10), array([24.895, 25.445]))
('Cu +', np.uint8(11), array([62.567, 63.496]))
('Cu +', np.uint8(12), array([64.619, 65.548]))
('C +', np.uint8(13), array([11.866, 12.198]))
('C ++', np.uint8(14), array([5.896, 6.1

In [7]:
get_file_size(results_file_path)
# H5Web(results_file_path)

2.312 MiB


Voxelization is performed on elements not on iontypes, i.e. using a atomic decomposition!

## Automated phase assignment

In [8]:
autophase = ProcessAutomatedPhaseAssignment(config_file_path, results_file_path, entry_id=1, verbose=True)
autophase.run()

Composition matrix has 5 elements
Populating composition table for element1
Populating composition table for element2
Populating composition table for element3
Populating composition table for element4
Populating composition table for element5
sorted_indices [5 2 4 1 3 0] in decreasing feature importance
sorted_index, feature_importance[sorted_index]
5, 0.8425501856917044
2, 0.08796359545615835
4, 0.06032072282260312
1, 0.005201161349588438
3, 0.003964334679945743
0, 0.0


In [9]:
get_file_size(results_file_path)
# H5Web(results_file_path)

2.325 MiB


## Segmentation PCA and IC minimization

In [10]:
segmentation = ProcessSegmentation(config_file_path, results_file_path, entry_id=1, verbose=True)
segmentation.run()

Composition matrix has 5 elements
Populating composition table for element1
Populating composition table for element2
Populating composition table for element3
Populating composition table for element4
Populating composition table for element5
Composition matrix has 5 elements
Populating composition table for element1
Populating composition table for element2
Populating composition table for element3
Populating composition table for element4
Populating composition table for element5
Using results with automated phase assignment
np.shape(X_train) (158400, 2)
GaussianMixture ML analysis with n_cluster 1
Using results with automated phase assignment
np.shape(X_train) (158400, 2)
GaussianMixture ML analysis with n_cluster 2
Using results with automated phase assignment
np.shape(X_train) (158400, 2)
GaussianMixture ML analysis with n_cluster 3
Using results with automated phase assignment
np.shape(X_train) (158400, 2)
GaussianMixture ML analysis with n_cluster 4
Using results with automated

In [11]:
get_file_size(results_file_path)
# H5Web(results_file_path)

2.516 MiB


## DBScan clustering

In [12]:
clustering = ProcessClustering(config_file_path, results_file_path, entry_id=1, verbose=True)
clustering.run()

DBScan configuration: eps 3 nm, min_samples 5
['cluster_analysis0', 'cluster_analysis1', 'cluster_analysis2', 'cluster_analysis3', 'cluster_analysis4', 'result', 'sequence_index']
cluster_analysis0
ic_run_id 0 >>>>
np.shape(all_vxl_pos) (158400, 3) list(set(phase_identifier) [np.uint64(0)]
	Loop 0
	np.shape(trg_vxl_pos) (158400, 3)
	np.shape(trg_vxl_idx) (158400,)
	1
	type(db.labels_) <class 'numpy.ndarray'> dtype int64
[0]
cluster_analysis1
ic_run_id 1 >>>>
np.shape(all_vxl_pos) (158400, 3) list(set(phase_identifier) [np.uint64(0), np.uint64(1)]
	Loop 0
	np.shape(trg_vxl_pos) (110080, 3)
	np.shape(trg_vxl_idx) (110080,)
	1
	type(db.labels_) <class 'numpy.ndarray'> dtype int64
[0]
	Loop 1
	np.shape(trg_vxl_pos) (48320, 3)
	np.shape(trg_vxl_idx) (48320,)
	1
	type(db.labels_) <class 'numpy.ndarray'> dtype int64
[0]
cluster_analysis2
ic_run_id 2 >>>>
np.shape(all_vxl_pos) (158400, 3) list(set(phase_identifier) [np.uint64(0), np.uint64(1), np.uint64(2)]
	Loop 0
	np.shape(trg_vxl_pos) (1100

## Default plotting to explore the results using HDF5/XDMF via Paraview

In [14]:
generate_xdmf_for_visualizing_content(results_file_path)

Inspecting /home/kaiobach/Research/hu_hu_hu/sprint22/compspace-mpie-remaining-fixes/CompositionSpace/tests/CompositionSpace.Results.1.nxs...
Found dimensionality, uint64, (), 3
Found extent, uint64, (3,), [45 44 80]
Found origin, float64, (3,), [-23. -20. -78.]
Found cell_dimensions, float64, (3,), [1. 1. 1.]


In [None]:
Image(f"CompositionSpace.Results.{simid}.nxs.xdmf.png")

This example of a screenshot of interactive visualization was created with https://www.paraview.org/.<br>
For achieving this drag-and-drop the XDMF file into Paraview. Note that the original location of the<br>
HDF5 file where the results are stored (the heavy data) is coded with absolute paths in XDMF.<br>
Make sure to choose the legacy "XDMFReader", i.e. not the "XdmfReader3 S" or "XdmfReader3 T" readers!<br>

## Inspect the results interactively with H5Web

In [18]:
get_file_size(results_file_path)
H5Web(results_file_path)

3.923 MiB


<jupyterlab_h5web.widget.H5Web object>

## Correlation plots

As were exemplified by A. Saxena here https://github.com/Alaukiksaxena/CompositionSpaceNFDI/commit/44a81ffe9e9bd994f41e1c501b9d1630f7dcf019

## Meshing

Use MeshAnalysis.ipynb