In [None]:
# import libraries
import sys
import os
import pandas as pd
import matplotlib.pylab as plt
from jupyterlab_h5web import H5Web
# import modules with the functionalities offered by CompositionSpace
from compositionspace.utils import get_file_size
from compositionspace.io import get_reconstructed_positions, get_iontypes, get_iontypes_info
from compositionspace.segmentation import ProcessSegmentation

In [None]:
# ! pip list  
MY_PROCESSED_DATA_PATH = f"{os.getcwd()}"
print(f"Executing compositionspace in the following working directory: {os.getcwd()}")

In [None]:
RECONSTRUCTION_AND_RANGING = ("R21_08680-v02.pos", "R21_08680.rrng")
RECONSTRUCTION_AND_RANGING = ("PARAPROBE.Transcoder.Results.SimID.636502001.nxs",
                              "PARAPROBE.Ranger.Results.SimID.636502001.nxs")
config_file_path = f"{MY_PROCESSED_DATA_PATH}/experiment_params.yaml"
results_file_path = f"{MY_PROCESSED_DATA_PATH}/CompositionSpace.Results.nxs"

In [None]:
segmentation = ProcessSegmentation(
    config_file_path,
    results_file_path,
    entry_id=2,
    verbose=False)
segmentation.perform_pca_and_write_results()
segmentation.perform_bics_minimization_and_write_results()

In [None]:
H5Web(results_file_path)

In [None]:
segmentation = ProcessSegmentation(
    config_file_path,
    results_file_path,
    verbose=False)
segmentation.get_pca_cumsum()
segmentation.write_pca_

from sklearn.decomposition import PCA
import h5py
import numpy as np

input_file_name = "apm.composition.space.nxs"
h5r = h5py.File(input_file_name, "r")
src = "/entry1/voxelization"
total_weights = h5r[f"{src}/total"][:]
n_ion_types = 73
EPSILON = 1.0e-6  # move to params
composition_matrix = np.zeros([np.shape(total_weights)[0], n_ion_types], np.float64)
for ityp in np.arange(0, n_ion_types):  # make flexible
    ityp_weights = h5r[f"{src}/ion{ityp}/weight"][:]
    if np.shape(ityp_weights) == np.shape(total_weights):
        composition_matrix[:, ityp] = np.divide(ityp_weights, total_weights, where= total_weights >= EPSILON)
        composition_matrix[np.where(composition_matrix[:, ityp] < EPSILON), ityp] = 0.
        composition_matrix[np.isnan(composition_matrix[:, ityp]), ityp] = 0.        
    else:
        raise LogicError("Length of iontype-specific and total weight arrays needs to be the same!")
        break
print(composition_matrix)
h5r.close()

run = True
if run is True:  
    X_train = composition_matrix  # ratios.drop(['Total_no','vox'], axis=1)
    PCAObj = PCA(n_components = n_ion_types)  # len(spec_lst)) 
    PCATrans = PCAObj.fit_transform(X_train)
    PCACumsumArr = np.cumsum(PCAObj.explained_variance_ratio_)

run = False
if run is True:
    plt.figure(figsize=(5,5))
    plt.plot( range(1,len(PCACumsumArr)+1,1),PCACumsumArr,"-o")
    plt.ylabel("Explained Variance")
    plt.xlabel('Dimensions')
    plt.grid()
    output_path = "output"
    output_path = os.path.join(output_path, "PCA_cumsum.png")
    plt.savefig(output_path)
    plt.show()

In [None]:
output_file_name = "apm.composition.space.pca.nxs"
h5w = h5py.File(output_file_name, "w")
trg = "/entry1/composition_clustering_pca"
grp = h5w.create_group(trg)
grp.attrs["NX_class"] = "NXprocess"
dst = h5w.create_dataset(f"{trg}/sequence_index", data=np.uint64(2))
trg = "/entry1/composition_clustering_pca/result"
grp = h5w.create_group(trg)
grp.attrs["NX_class"] = "NXdata"
grp.attrs["axes"] = "axis_pca_dimension"
grp.attrs["axis_pca_dimension"] = np.uint64(0)
grp.attrs["signal"] = "axis_explained_variance"
# further attributes
# to render it a proper NeXus NXdata object
axis_dim = np.asarray(np.linspace(0, n_ion_types - 1, num=n_ion_types, endpoint=True), np.uint32)
dst = h5w.create_dataset(f"{trg}/axis_pca_dimension", compression="gzip", compression_opts=1, data=axis_dim)
dst.attrs["long_name"] = "Dimension"
# dst.attrs["unit"] = ""
axis_expl_var = np.asarray(PCACumsumArr, np.float64)
dst = h5w.create_dataset(f"{trg}/axis_explained_variance", compression="gzip", compression_opts=1, data=axis_expl_var)
dst.attrs["long_name"] = "Explained variance"
# dst.attrs["unit"] = "1"
h5w.close()

In [None]:
H5Web(output_file_name)

In [None]:
comps = CompositionClustering("experiment_params.yaml")
res = comps.get_PCA_cumsum(data.voxel_ratio_file, data.voxel_files[0])

In [None]:
with h5py.File(data.voxel_files[0],"r") as hdf:
    group = hdf.get("Group_sm_vox_xyz_Da_spec")
    group0 = hdf.get("0")
    spec_lst = list(list(group0.attrs.values())[1])
    print(f"value {spec_lst}, type {type(spec_lst)}, len {len(spec_lst)}")

In [None]:
res = comps.get_bics_minimization(data.voxel_ratio_file, data.voxel_files[0])

In [None]:
## Prepare storage of results of BICS minimization via Gaussian mixture

In [None]:
output_file_name = "apm.composition.space.bics.nxs"
h5w = h5py.File(output_file_name, "w")
trg = "/entry1/composition_clustering_ic_opt"  # information criterion optimization (minimization)
grp = h5w.create_group(trg)
grp.attrs["NX_class"] = "NXprocess"
dst = h5w.create_dataset(f"{trg}/sequence_index", data=np.uint64(3))
h5w.close()

<div class="alert alert-block alert-danger">
Discussion points:<br>
* Modify NXapm_composition_space<br>
* What to show how to show?<br>
* Number of cluster vs Number of clusters? wording...?<br>
* Why to run the gm several times, ones in the bics loop ones in get composition cluster files 
</div>