In [4]:
import sys 
sys.path.append('../')
from read_file import *
import dionysus as d

In [23]:
# Construct topological signatures among three structures as an example
struct_path = '../files/mof_structs/'
structures = [struct_path + 'str_m4_o1_o1_acs_sym.10.cif', struct_path + 'str_m4_o1_o1_acs_sym.46.cif', struct_path + 'str_m4_o1_o14_acs_sym.68.cif']

In [25]:
def structure_to_pd(filename, supercell_size):
    """Convert structure file to all dimensions of persistence diagrams.

    Args:
        filename: Path to structure file.
        supercell_size: If wanting to create a cubic supercell, specify in Angstrom
        the dimension (i.e. length/width/height).

    Return:
        List[pd_objects]

    """
    if supercell_size:
        coords = read_data(filename, size=supercell_size, supercell=True)
    else:
        coords = read_data(filename, size=None, supercell=False)
    dgms = construct_pds(coords)

    return dgms

# Since this is a small list of structures, we can just calculate all of them here. Otherwise, calculate and store.
all_dgms = [structure_to_pd(structure, None) for structure in structures]

In [31]:
def compute_dist(dgms1, dgms2, n, q=1, delta=0.05):
    """
    Args:
        dgms1: all PDs (dionysus object) for one structure
        dgms2: all PDs (dionysus object) for another structure
        n: n-th persistence diagram (e.g., 0, 1, 2)
        q: qth Wasserstein distance
        delta: approximation term
    
    Return:
        Distance between topological signatures
    """
    wdist = d.wasserstein_distance(dgms1[n], dgms2[n], q=q, delta=delta)
    return wdist

In [38]:
# Compute all the combinations of pairs between the topological signatures
from itertools import combinations
combos = combinations(all_dgms, 2)

# Calculating distances between structures, this can be easily parallelized
for combo in combos:
    dgms1 = combo[0]
    dgms2 = combo[1]
    wdist = compute_dist(dgms1, dgms2, 2)
    print(wdist)

9.752080917358398
13.34542465209961
10.466400146484375
