This is the test code for checking the speed of neighbor_list ase vs matscipy vs torch_geometric vs pymatgen

In [3]:
from ase.io import read
from ase.neighborlist import primitive_neighbor_list # ase
from matscipy.neighbours import neighbour_list # matscipy
# torch_geometric
import torch
from torch_geometric.data import Data
from torch_geometric.transforms import RadiusGraph 

# pymatgen
from pymatgen.core import Structure
from pymatgen.analysis.local_env import CutOffDictNN
from pymatgen.analysis.graphs import StructureGraph

import sys
import numpy as np
# from mace.data.neighborhood import get_neighborhood
from sevenn.train.dataload import unlabeled_atoms_to_graph
import matplotlib.pyplot as plt

In [63]:
# atoms = read('POSCAR_Li28La12Zr8O48')
# atoms = read('Li20Ge2P4S24.cif') # 1714
# atoms = read('Li20Ge2P4S24_112.cif') # 3428
atoms = read('Li20Ge2P4S24_122.cif') # 6856
# atoms = read('Li20Ge2P4S24_222.cif') # 13712
pos = atoms.get_positions()
cell = np.array(atoms.get_cell())
cutoff = 5.0
pbc = atoms.get_pbc()

## Time check
---
### ASE

In [64]:
%%timeit -n 30
# ase.neighborlist
edge_src, edge_dst, edge_vec, shifts = primitive_neighbor_list(
        'ijDS', pbc, cell, pos, cutoff, self_interaction=False
    )

41.1 ms ± 6.17 ms per loop (mean ± std. dev. of 7 runs, 30 loops each)


### Matscipy

In [61]:
%%timeit -n 50
# matscipy.neighbours
edge_src, edge_dst, edge_vec, shifts = neighbour_list(
        quantities="ijDS",
        pbc=pbc,
        cell=cell,
        positions=pos,
        cutoff=5.0,
        # self_interaction=True,  # we want edges from atom to itself in different periodic images
        # use_scaled_positions=False,  # positions are not scaled positions
    )

4.09 ms ± 232 µs per loop (mean ± std. dev. of 7 runs, 50 loops each)


## Identical Graph?
---

### ASE

In [17]:
edge_src_ase, edge_dst_ase, edge_vec_ase, shifts_ase = primitive_neighbor_list(
        'ijDS', pbc, cell, pos, cutoff, self_interaction=False
    )

In [18]:
print(edge_src_ase, len(edge_src_ase))
print(edge_dst_ase, len(edge_dst_ase))
print(edge_vec_ase, len(edge_vec_ase))
print(shifts_ase, len(shifts_ase))

[ 0  0  0 ... 49 49 49] 1714
[12  7 30 ... 32 33 32] 1714
[[ 1.14124607  1.26613539 -4.34042884]
 [ 1.222837    3.17278221 -1.55589879]
 [-1.04476372  3.60125877 -1.57947462]
 ...
 [-2.12953627 -3.38584678 -2.45500792]
 [-3.74929744 -0.49800834 -1.47275623]
 [ 0.39346799  4.24146521 -2.45500792]] 1714
[[ 0  1 -1]
 [ 0  1 -1]
 [ 0  1 -1]
 ...
 [ 0  0  0]
 [-1  0  0]
 [ 0  1  0]] 1714


### Matscipy

In [136]:
edge_src_mat, edge_dst_mat, edge_vec_mat, shifts_mat = neighbour_list(
        quantities="ijDS",
        pbc=pbc,
        cell=cell,
        positions=pos,
        cutoff=5.0,
        # self_interaction=True,  # we want edges from atom to itself in different periodic images
        # use_scaled_positions=False,  # positions are not scaled positions
    )

In [137]:
print(edge_src_mat, len(edge_src_mat))
print(edge_dst_mat, len(edge_dst_mat))
print(edge_vec_mat, len(edge_vec_mat))
print(shifts_mat, len(shifts_mat))

[ 0  0  0 ... 95 95 95] 4288
[ 6  9 10 ... 49 66 89] 4288
[[-4.21951352  2.39852148  0.        ]
 [-2.39852148 -4.21951352  0.        ]
 [-2.39852148 -2.39852148  3.1754265 ]
 ...
 [ 2.4964684   0.57833684  1.906018  ]
 [ 4.31059092 -1.26880967  0.24680685]
 [ 0.95834441  0.95834441  4.54121554]] 4288
[[ 0  0 -1]
 [ 0  0 -1]
 [ 0  0 -1]
 ...
 [ 0  1  1]
 [ 0  1  1]
 [ 0  1  1]] 4288


### Torch_geometric

In [138]:
pos_tensor = torch.tensor(pos, dtype=torch.float)
data = Data(pos=pos_tensor)
data = RadiusGraph(5.0)(data)

In [115]:
structure = Structure.from_file("POSCAR_Li28La12Zr8O48")
cutoff = 5.0

# Use a cutoff-based nearest neighbors strategy
nn_strategy = CutOffDictNN(cut_off_dict={"Li": cutoff, "La": cutoff, "Zr": cutoff, "O": cutoff})
# structure_graph = StructureGraph.with_local_env_strategy(structure, nn_strategy)

# Extract edges (source, destination) and edge vectors
# edges = structure_graph.graph.edges(data=True)
# edge_src, edge_dst, edge_vec = [], [], []

# for u, v, d in edges:
#     edge_src.append(u)
#     edge_dst.append(v)
#     edge_vec.append(d["to_jimage"])  # Fractional lattice vector

# print("Pymatgen edges:", len(edge_src))

ValueError: not enough values to unpack (expected 2, got 1)

## Compare graphs
---

In [142]:
def compare_graphs(edge_vec_1, edge_src_1, edge_dst_1, 
                   edge_vec_2, edge_src_2, edge_dst_2):
    """
    Compare two graphs (ASE and matscipy) based on edge vectors, sources, and destinations.

    Parameters:
        edge_vec_1: np.ndarray
            Edge vectors from ASE.
        edge_src_1: np.ndarray
            Source nodes from ASE.
        edge_dst_1: np.ndarray
            Destination nodes from ASE.
        edge_vec_2: np.ndarray
            Edge vectors from matscipy.
        edge_src_2: np.ndarray
            Source nodes from matscipy.
        edge_dst_2: np.ndarray
            Destination nodes from matscipy.
    """
    # Sort edge_vec and get sorting indices
    sorted_indices_ase = np.lexsort(edge_vec_1.T)
    sorted_indices_mat = np.lexsort(edge_vec_2.T)

    # Sorted edge_vec
    sorted_vec_ase = edge_vec_1[sorted_indices_ase]
    sorted_vec_mat = edge_vec_2[sorted_indices_mat]

    # Compare sorted edge_vec
    are_equivalent = np.allclose(sorted_vec_ase, sorted_vec_mat, atol=1e-6)
    print("Are the edge_vec matrices row-equivalent?", are_equivalent)

    if not are_equivalent:
        # Find rows that differ
        differences = np.abs(sorted_vec_ase - sorted_vec_mat)
        differing_rows = np.where(np.max(differences, axis=1) > 1e-6)[0]

        print("\nRows that differ (exceeding atol):")
        for row in differing_rows:
            print(f"ASE: {sorted_vec_ase[row]}, matscipy: {sorted_vec_mat[row]}, difference: {differences[row]}")

    if are_equivalent:
        # Match edge_src and edge_dst for ASE
        sorted_src_1 = edge_src_1[sorted_indices_ase]
        sorted_dst_1 = edge_dst_1[sorted_indices_ase]

        # Match edge_src and edge_dst for matscipy
        sorted_src_2 = edge_src_2[sorted_indices_mat]
        sorted_dst_2 = edge_dst_2[sorted_indices_mat]

        # Check if the sources and destinations align
        sources_match = np.array_equal(sorted_src_1, sorted_src_2)
        destinations_match = np.array_equal(sorted_dst_1, sorted_dst_2)

        # Check if the sources and destinations align
        sources_match = np.array_equal(sorted_src_1, sorted_src_2)
        destinations_match = np.array_equal(sorted_dst_1, sorted_dst_2)

        if sources_match and destinations_match:
            print("Two graphs have the same node indices.")
        else:
            print("The graphs have different node indices.")

        # Print edges side by side for comparison
        print("ASE (src, dst) | matscipy (src, dst)")
        print("-------------------------------------")
        for ase_edge, mat_edge in zip(zip(sorted_src_1, sorted_dst_1), zip(sorted_src_2, sorted_dst_2)):
            print(f"{ase_edge} | {mat_edge}")
    else:
        print("The graphs are not equivalent.")

def compare_graphs_ver2(edge_vec_1, edge_src_1, edge_dst_1, shift_1,
                   edge_vec_2, edge_src_2, edge_dst_2, shift_2):
    """
    Compare two graphs (ASE and matscipy) based on edge vectors, sources, and destinations.

    Parameters:
        edge_vec_1: np.ndarray
            Edge vectors from ASE.
        edge_src_1: np.ndarray
            Source nodes from ASE.
        edge_dst_1: np.ndarray
            Destination nodes from ASE.
        edge_vec_2: np.ndarray
            Edge vectors from matscipy.
        edge_src_2: np.ndarray
            Source nodes from matscipy.
        edge_dst_2: np.ndarray
            Destination nodes from matscipy.
    """
    # Sort edge_vec and get sorting indices
    sorted_indices_ase = np.lexsort(edge_vec_1.T)
    sorted_indices_mat = np.lexsort(edge_vec_2.T)

    # Sorted edge_vec
    sorted_vec_ase = edge_vec_1[sorted_indices_ase]
    sorted_vec_mat = edge_vec_2[sorted_indices_mat]

    # Sorted shifts
    sorted_shifts_ase = shift_1[sorted_indices_ase]
    sorted_shifts_mat = shift_2[sorted_indices_mat]

    for i in range(len(sorted_vec_ase)):
        if np.allclose(sorted_vec_ase[i], sorted_vec_mat[i], atol=1e-6):
            # print(f"ASE: {sorted_vec_ase[i]}, matscipy: {sorted_vec_mat[i]}")
            # print(f"ASE shift: {sorted_shifts_ase[i]}, matscipy shift: {sorted_shifts_mat[i]}")
            # print()
            if not np.array_equal(sorted_shifts_ase[i], sorted_shifts_mat[i]):
                print(f"ASE: {sorted_vec_ase[i]}, matscipy: {sorted_vec_mat[i]}")
                print(f"ASE shift: {sorted_shifts_ase[i]}, matscipy shift: {sorted_shifts_mat[i]}")
                print()

    

In [140]:
compare_graphs(edge_vec_ase, edge_src_ase, edge_dst_ase, edge_vec_mat, edge_src_mat, edge_dst_mat)

Are the edge_vec matrices row-equivalent? True
Two graphs have the same node indices.
ASE (src, dst) | matscipy (src, dst)
-------------------------------------
(35, 8) | (35, 8)
(33, 4) | (33, 4)
(7, 33) | (7, 33)
(11, 38) | (11, 38)
(34, 9) | (34, 9)
(36, 10) | (36, 10)
(37, 11) | (37, 11)
(38, 6) | (38, 6)
(10, 39) | (10, 39)
(5, 35) | (5, 35)
(9, 36) | (9, 36)
(6, 32) | (6, 32)
(32, 5) | (32, 5)
(8, 37) | (8, 37)
(4, 34) | (4, 34)
(39, 7) | (39, 7)
(16, 14) | (16, 14)
(24, 22) | (24, 22)
(18, 12) | (18, 12)
(27, 21) | (27, 21)
(19, 13) | (19, 13)
(25, 23) | (25, 23)
(26, 20) | (26, 20)
(17, 15) | (17, 15)
(90, 22) | (90, 22)
(81, 13) | (81, 13)
(26, 94) | (26, 94)
(17, 85) | (17, 85)
(16, 84) | (16, 84)
(82, 14) | (82, 14)
(89, 21) | (89, 21)
(80, 12) | (80, 12)
(27, 95) | (27, 95)
(18, 86) | (18, 86)
(25, 93) | (25, 93)
(91, 23) | (91, 23)
(19, 87) | (19, 87)
(88, 20) | (88, 20)
(24, 92) | (24, 92)
(83, 15) | (83, 15)
(81, 87) | (81, 87)
(82, 84) | (82, 84)
(89, 95) | (89, 95)
(88

In [143]:
compare_graphs_ver2(edge_vec_ase, edge_src_ase, edge_dst_ase, shifts_ase, edge_vec_mat, edge_src_mat, edge_dst_mat, shifts_mat)