In [15]:
# To visualize the data, we will use the pymatviz package
# ! pip install pymatviz

In [5]:
import numpy as np
import pandas as pd
from ast import literal_eval

from pymatgen.core import Structure
from pymatviz import StructureWidget
from pymatviz import structure_3d

from symmetry import find_spg
from pyxtal.symmetry import Group

In [6]:
#data = pd.read_csv('/home/user_wanglei/private/datafile/crystalgpt/csp/alex20s/csp-8c3cb/adam_bs_8000_lr_0.001_decay_0_clip_1_A_119_W_28_N_21_a_1_w_1_l_1_Nf_5_Kx_16_Kl_4_h0_256_l_16_H_16_k_64_m_64_e_32_drop_0.1_0.1//relaxed_structures_CsPbI3_200_ehull.csv')

#data = pd.read_csv('/home/user_wanglei/private/datafile/crystalgpt/csp/alex20s/csp-07d3f/adam_bs_8000_lr_0.0001_decay_0_clip_1_A_119_W_28_N_21_a_1_w_1_l_1_Nf_5_Kx_16_Kl_4_h0_256_l_16_H_16_k_64_m_64_e_32_drop_0.1_0.1//relaxed_structures_Cd3As2_4100_ehull.csv')

#data = pd.read_csv('/home/user_wanglei/private/datafile/crystalgpt/csp/alex20s/csp-0d128/adam_bs_8000_lr_0.0001_decay_0_clip_1_A_119_W_28_N_21_a_1_w_1_l_1_Nf_5_Kx_16_Kl_4_h0_256_l_16_H_8_k_16_m_128_e_128_drop_0.1_0.1/relaxed_structures_Ti13Al9Co8_ehull.csv')

#data = pd.read_csv('/home/user_wanglei/private/datafile/crystalgpt/csp/alex20s/csp-84e88/Ti13Al9Co8_orb-v2_ppo_5_a_0.01_b_0_spg_160_g_0_w_5_a_5_xyz_1_l_1_adam_bs_1000_lr_1e-05_Nf_5_Kx_16_Kl_4_h0_256_l_16_H_8_k_16_m_128_e_128_drop_0//relaxed_structures_Ti13Al9Co8_010070_ehull.csv')

data = pd.read_csv('/home/user_wanglei/private/datafile/crystalgpt/csp/alex20s/csp-f5171/adam_bs_8000_lr_0.0001_decay_0_clip_1_A_119_W_28_N_21_a_1_w_1_l_1_Nf_5_Kx_16_Kl_4_h0_256_l_16_H_8_k_32_m_256_e_256_drop_0.1_0.1/relaxed_structures_Nb3Cl8_ehull.csv')


In [7]:
data = data.sort_values(by='relaxed_ehull', ascending=True).reset_index(drop=True)


In [8]:
cif_strings = data['relaxed_cif']   # string of pymatgen structure dict

In [9]:
structures =[Structure.from_dict(literal_eval(cif)) for cif in cif_strings]

In [10]:
for i, struct in enumerate(structures):
    
    sg_num = find_spg(struct)
    
    g = Group(sg_num)
    # Get ehull values if available
    try:
        unrelaxed_ehull = data.loc[i, 'unrelaxed_ehull'] if 'unrelaxed_ehull' in data.columns else None
    except Exception:
        unrelaxed_ehull = None
    try:
        relaxed_ehull = data.loc[i, 'relaxed_ehull'] if 'relaxed_ehull' in data.columns else None
    except Exception:
        relaxed_ehull = None

    reduced_formula = struct.composition.reduced_formula
    num_atoms = len(struct)

    print(f"{i}: {reduced_formula} {num_atoms}, {sg_num} {g.symbol}, {unrelaxed_ehull:.6f}, {relaxed_ehull:.6f}")




0: Nb3Cl8 22, 164 P-3m1, 1.392900, -0.005674
1: Nb3Cl8 11, 156 P3m1, 1.793844, -0.005385
2: Nb3Cl8 22, 1 P1, 3.256946, 0.179240
3: Nb3Cl8 22, 1 P1, 1.923617, 0.329090
4: Nb3Cl8 22, 1 P1, 3.731092, 0.341069
5: Nb3Cl8 22, 1 P1, 2.686131, 0.448291
6: Nb3Cl8 22, 1 P1, 15.341636, 0.456991
7: Nb3Cl8 11, 12 C2/m, 1.966177, 0.461197
8: Nb3Cl8 11, 8 Cm, 1.995100, 0.461717
9: Nb3Cl8 22, 1 P1, 67.874143, 0.462954
10: Nb3Cl8 11, 1 P1, 1.194702, 0.470199
11: Nb3Cl8 11, 1 P1, 0.891580, 0.496094
12: Nb3Cl8 11, 164 P-3m1, 1.570044, 0.497224
13: Nb3Cl8 11, 1 P1, 4.426661, 0.498822
14: Nb3Cl8 11, 8 Cm, 1.615744, 0.500835
15: Nb3Cl8 22, 1 P1, 2.182288, 0.616266
16: Nb3Cl8 22, 1 P1, 1.750665, 0.618515
17: Nb3Cl8 22, 1 P1, 4.181227, 0.625537
18: Nb3Cl8 22, 1 P1, 4.738461, 0.625680
19: Nb3Cl8 22, 1 P1, 645.299030, 0.628368


In [11]:
from pymatgen.analysis.structure_matcher import StructureMatcher


structures = structures[:50]
 
# Create a StructureMatcher instance with reasonable defaults
matcher = StructureMatcher()

unique_structures = []
unique_indices = []

for i, s in enumerate(structures):
    is_unique = True
    for us in unique_structures:
        if matcher.fit(us, s):
            is_unique = False
            break
    if is_unique:
        unique_structures.append(s)
        unique_indices.append(i)

# Print summary of unique structures
for idx, s in zip(unique_indices, unique_structures):
    sg_num = find_spg(s)
    g = Group(sg_num)
    reduced_formula = s.composition.reduced_formula
    num_atoms = len(s)
    try:
        unrelaxed_ehull = data.loc[idx, 'unrelaxed_ehull'] if 'unrelaxed_ehull' in data.columns else None
    except Exception:
        unrelaxed_ehull = None
    try:
        relaxed_ehull = data.loc[idx, 'relaxed_ehull'] if 'relaxed_ehull' in data.columns else None
    except Exception:
        relaxed_ehull = None
    print(f"Unique {idx}: {reduced_formula} {num_atoms}, {sg_num} {g.symbol}, {unrelaxed_ehull:.6f}, {relaxed_ehull:.6f}")


Unique 0: Nb3Cl8 22, 164 P-3m1, 1.392900, -0.005674
Unique 1: Nb3Cl8 11, 156 P3m1, 1.793844, -0.005385
Unique 2: Nb3Cl8 22, 1 P1, 3.256946, 0.179240
Unique 3: Nb3Cl8 22, 1 P1, 1.923617, 0.329090
Unique 4: Nb3Cl8 22, 1 P1, 3.731092, 0.341069
Unique 5: Nb3Cl8 22, 1 P1, 2.686131, 0.448291
Unique 6: Nb3Cl8 22, 1 P1, 15.341636, 0.456991
Unique 7: Nb3Cl8 11, 12 C2/m, 1.966177, 0.461197
Unique 9: Nb3Cl8 22, 1 P1, 67.874143, 0.462954
Unique 11: Nb3Cl8 11, 1 P1, 0.891580, 0.496094
Unique 15: Nb3Cl8 22, 1 P1, 2.182288, 0.616266
Unique 16: Nb3Cl8 22, 1 P1, 1.750665, 0.618515
Unique 17: Nb3Cl8 22, 1 P1, 4.181227, 0.625537
Unique 19: Nb3Cl8 22, 1 P1, 645.299030, 0.628368


In [12]:
structures = unique_structures

In [17]:
matcher = StructureMatcher()
for s in structures:
    print (matcher.fit(nb3cl8, s))


False
False
False
False
False
False
False
False
False
False
False
False
False
False


In [183]:
nb3cl8, structures[0]

(Structure Summary
 Lattice
     abc : 6.801299020000001 6.80129902 12.76792964
  angles : 90.00000000000001 90.0 120.00000000000001
  volume : 511.4872950115204
       A : np.float64(-3.400649510000001) np.float64(-5.890097730054207) np.float64(-8.329189074887031e-16)
       B : np.float64(-3.400649509999999) np.float64(5.890097730054207) np.float64(4.1645945374435153e-16)
       C : np.float64(0.0) np.float64(0.0) np.float64(-12.76792964)
     pbc : True True True
 PeriodicSite: Nb0 (Nb2.67+) (-4.806, -2.775, -3.138) [0.9422, 0.4711, 0.2458]
 PeriodicSite: Nb0 (Nb2.67+) (-1.995, 2.775, -9.63) [0.05779, 0.5289, 0.7542]
 PeriodicSite: Nb0 (Nb2.67+) (-1.995, -2.775, -3.138) [0.5289, 0.05779, 0.2458]
 PeriodicSite: Nb0 (Nb2.67+) (-4.806, 2.775, -9.63) [0.4711, 0.9422, 0.7542]
 PeriodicSite: Nb0 (Nb2.67+) (-3.401, -0.3404, -3.138) [0.5289, 0.4711, 0.2458]
 PeriodicSite: Nb0 (Nb2.67+) (-3.401, 0.3404, -9.63) [0.4711, 0.5289, 0.7542]
 PeriodicSite: Cl1 (Cl-) (-3.401, -3.946, -10.98) [0.835,

In [205]:

structure_3d([nb3cl8, structures[0], gt_structure], n_cols=1)

In [None]:
from pyxtal import pyxtal
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer

tol = 0.01
crystal = structures[0]
spga = SpacegroupAnalyzer(crystal, symprec=tol)
crystal = spga.get_refined_structure()
c = pyxtal()
c.from_seed(crystal, tol=tol)
c



------Crystal from Seed------
Dimension: 3
Composition: Nb6Cl16
Group: P -3 m 1 (164)
  6.8458,   6.8458,  13.5517,  90.0000,  90.0000, 120.0000, trigonal
Wyckoff sites:
	Nb @ [ 0.4716 -0.4716  0.2578], WP [6i] Site [.m.]
	Cl @ [ 0.3333  0.6667  0.8420], WP [2d] Site [3m.]
	Cl @ [ 0.3333  0.6667  0.3929], WP [2d] Site [3m.]
	Cl @ [ 0.1699 -0.1699  0.1367], WP [6i] Site [.m.]
	Cl @ [ 0.8348 -0.8348  0.3572], WP [6i] Site [.m.]

In [16]:
tol = 0.01
crystal = nb3cl8
spga = SpacegroupAnalyzer(crystal, symprec=tol)
crystal = spga.get_refined_structure()
c = pyxtal()
c.from_seed(crystal, tol=tol)
c

NameError: name 'SpacegroupAnalyzer' is not defined

In [32]:
# Output the cif string for the first structure using pymatgen's Structure to_cif method
from pymatgen.io.cif import CifWriter

# Get the first structure in the list
first_structure = structures[0]

# Create a CIF writer and get string output
cif_writer = CifWriter(first_structure)
cif_output = cif_writer.__str__()

print(cif_output)


# generated using pymatgen
data_Nb3Cl8
_symmetry_space_group_name_H-M   'P 1'
_cell_length_a   6.84593475
_cell_length_b   6.84561907
_cell_length_c   13.55169916
_cell_angle_alpha   89.99934161
_cell_angle_beta   90.00943785
_cell_angle_gamma   120.00387625
_symmetry_Int_Tables_number   1
_chemical_formula_structural   Nb3Cl8
_chemical_formula_sum   'Nb6 Cl16'
_cell_volume   549.98759769
_cell_formula_units_Z   2
loop_
 _symmetry_equiv_pos_site_id
 _symmetry_equiv_pos_as_xyz
  1  'x, y, z'
loop_
 _atom_site_type_symbol
 _atom_site_label
 _atom_site_symmetry_multiplicity
 _atom_site_fract_x
 _atom_site_fract_y
 _atom_site_fract_z
 _atom_site_occupancy
  Cl  Cl0  1  0.33319261  0.66665858  0.34203551  1
  Cl  Cl1  1  0.66656988  0.33316580  0.65800747  1
  Cl  Cl2  1  0.33356469  0.66664328  0.89287545  1
  Cl  Cl3  1  0.66641617  0.33322528  0.10710549  1
  Cl  Cl4  1  0.16988805  0.82985567  0.63664628  1
  Cl  Cl5  1  0.16983375  0.33964157  0.63664591  1
  Cl  Cl6  1  0.66009981  0.

In [26]:
cif_string="""# generated using pymatgen
data_TiO2
_symmetry_space_group_name_H-M   'P 1'
_cell_length_a   3.78253968
_cell_length_b   3.78253968
_cell_length_c   9.61502157
_cell_angle_alpha   90.00000000
_cell_angle_beta   90.00000000
_cell_angle_gamma   90.00000000
_symmetry_Int_Tables_number   1
_chemical_formula_structural   TiO2
_chemical_formula_sum   'Ti4 O8'
_cell_volume   137.56794451
_cell_formula_units_Z   4
loop_
 _symmetry_equiv_pos_site_id
 _symmetry_equiv_pos_as_xyz
  1  'x, y, z'
loop_
 _atom_type_symbol
 _atom_type_oxidation_number
  Ti4+  4.0
  O2-  -2.0
loop_
 _atom_site_type_symbol
 _atom_site_label
 _atom_site_symmetry_multiplicity
 _atom_site_fract_x
 _atom_site_fract_y
 _atom_site_fract_z
 _atom_site_occupancy
  Ti4+  Ti0  1  0.50000000  0.50000000  0.50000000  1
  Ti4+  Ti1  1  0.50000000  0.00000000  0.75000000  1
  Ti4+  Ti2  1  0.00000000  0.00000000  0.00000000  1
  Ti4+  Ti3  1  0.00000000  0.50000000  0.25000000  1
  O2-  O4  1  0.00000000  0.50000000  0.45715213  1
  O2-  O5  1  0.50000000  0.50000000  0.70715213  1
  O2-  O6  1  0.50000000  0.00000000  0.54284787  1
  O2-  O7  1  0.00000000  0.00000000  0.79284788  1
  O2-  O8  1  0.50000000  0.00000000  0.95715213  1
  O2-  O9  1  0.00000000  0.00000000  0.20715212  1
  O2-  O10  1  0.00000000  0.50000000  0.04284788  1
  O2-  O11  1  0.50000000  0.50000000  0.29284788  1"""

In [14]:
cif_string='''# generated using pymatgen
data_Nb3Cl8
_symmetry_space_group_name_H-M   P-3m1
_cell_length_a   6.80129902
_cell_length_b   6.80129902
_cell_length_c   12.76792964
_cell_angle_alpha   90.00000000
_cell_angle_beta   90.00000000
_cell_angle_gamma   120.00000000
_symmetry_Int_Tables_number   164
_chemical_formula_structural   Nb3Cl8
_chemical_formula_sum   'Nb6 Cl16'
_cell_volume   511.48729426
_cell_formula_units_Z   2
loop_
 _symmetry_equiv_pos_site_id
 _symmetry_equiv_pos_as_xyz
  1  'x, y, z'
  2  '-x, -y, -z'
  3  '-y, x-y, z'
  4  'y, -x+y, -z'
  5  '-x+y, -x, z'
  6  'x-y, x, -z'
  7  'y, x, -z'
  8  '-y, -x, z'
  9  'x-y, -y, -z'
  10  '-x+y, y, z'
  11  '-x, -x+y, -z'
  12  'x, x-y, z'
loop_
 _atom_type_symbol
 _atom_type_oxidation_number
  Nb2.67+  2.666666666666666
  Cl-  -1.0
loop_
 _atom_site_type_symbol
 _atom_site_label
 _atom_site_symmetry_multiplicity
 _atom_site_fract_x
 _atom_site_fract_y
 _atom_site_fract_z
 _atom_site_occupancy
  Nb2.67+  Nb0  6  0.05778762  0.52889381  0.75423880  1
  Cl-  Cl1  6  0.16504503  0.33009007  0.13968306  1
  Cl-  Cl2  6  0.16844803  0.33689606  0.62475816  1
  Cl-  Cl3  2  0.33333333  0.66666667  0.35153654  1
  Cl-  Cl4  2  0.33333333  0.66666667  0.89921843  1
'''

In [15]:
from pymatgen.io.cif import CifParser
parser = CifParser.from_str(cif_string)
nb3cl8 = parser.get_structures()[0]

The only difference is that primitive defaults to False in the new parse_structures method.So parse_structures(primitive=True) is equivalent to the old behavior of get_structures().
  nb3cl8 = parser.get_structures()[0]
  return self.parse_structures(*args, **kwargs)


In [28]:
spga = SpacegroupAnalyzer(crystal, symprec=0.01)
c = pyxtal()
c.from_seed(crystal, tol=0.01)

In [29]:
c


------Crystal from Seed------
Dimension: 3
Composition: O8Ti4
Group: I 41/a m d:2 (141)
  3.7825,   3.7825,   9.6150,  90.0000,  90.0000,  90.0000, tetragonal
Wyckoff sites:
	 O @ [ 0.0000  0.2500  0.5822], WP [8e] Site [2mm.]
	Ti @ [ 0.0000  0.2500  0.3750], WP [4b] Site [-4m2]

In [141]:
StructureWidget(structures[1])

StructureWidget(structure={'@module': 'pymatgen.core.structure', '@class': 'Structure', 'charge': 0.0, 'lattic…

In [18]:
from crystalformer.src.utils import GLXYZAW_from_file
#alex20_folder = '/opt/data/bcmdata/ZONES/data/PROJECTS/datafile/PRIVATE/zdcao/crystal_gpt/dataset/alex/PBE/alex20/'
alex20_folder = '/opt/data/bcmdata/ZONES/data/PROJECTS/datafile/PRIVATE/zdcao/crystal_gpt/dataset/alex/PBE_20241204/'

train_path = alex20_folder+'/train.lmdb'
valid_path = alex20_folder+'/val.lmdb'
test_path = alex20_folder+'/test.lmdb'

atom_types = 119 
wyck_types = 28 
n_max = 21

train_dataset = GLXYZAW_from_file(train_path, atom_types, wyck_types, n_max)
valid_dataset = GLXYZAW_from_file(valid_path, atom_types, wyck_types, n_max)
test_dataset = GLXYZAW_from_file(test_path, atom_types, wyck_types, n_max)

ERROR:2025-10-22 21:30:37,995:jax._src.xla_bridge:487: Jax plugin configuration error: Exception when calling jax_plugins.xla_cuda12.initialize()
Traceback (most recent call last):
  File "/home/user_wanglei/.local/lib/python3.13/site-packages/jax/_src/xla_bridge.py", line 485, in discover_pjrt_plugins
    plugin_module.initialize()
    ~~~~~~~~~~~~~~~~~~~~~~~~^^
  File "/home/user_wanglei/.local/lib/python3.13/site-packages/jax_plugins/xla_cuda12/__init__.py", line 328, in initialize
    _check_cuda_versions(raise_on_first_error=True)
    ~~~~~~~~~~~~~~~~~~~~^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/user_wanglei/.local/lib/python3.13/site-packages/jax_plugins/xla_cuda12/__init__.py", line 285, in _check_cuda_versions
    local_device_count = cuda_versions.cuda_device_count()
RuntimeError: jaxlib/cuda/versions_helpers.cc:113: operation cuInit(0) failed: Unknown CUDA error 303; cuGetErrorName failed. This probably means that JAX was unable to load the CUDA libraries.


G: (1387800,)
L: (1387800, 6)
XYZ: (1387800, 21, 3)
A: (1387800, 21)
W: (1387800, 21)
G: (173475,)
L: (173475, 6)
XYZ: (173475, 21, 3)
A: (173475, 21)
W: (173475, 21)
G: (173475,)
L: (173475, 6)
XYZ: (173475, 21, 3)
A: (173475, 21)
W: (173475, 21)


In [19]:
G, L, XYZ, A, W = valid_dataset

In [20]:
idx =  89347

In [21]:
G, L, XYZ, A, W = G[idx], L[idx], XYZ[idx], A[idx], W[idx]

In [22]:
import sys
sys.path.append('../crystalformer/src/')
from awl2struct import get_struct_from_lawx
from crystalformer.src.wyckoff import mult_table


In [23]:
M  = mult_table[G-1, W]
num_atoms = np.sum(M)

length, angle = np.split(L, 2)
length = length*num_atoms**(1/3)
angle = angle * (180.0 / np.pi) # to deg
L = np.concatenate([length, angle])


In [24]:
L, A, W, XYZ = np.array(L), np.array(A), np.array(W), np.array(XYZ)
gt_structure = get_struct_from_lawx(G, L, A, W, XYZ)
gt_structure = Structure.from_dict(gt_structure)

In [27]:
from pymatgen.analysis.structure_matcher import StructureMatcher

In [28]:
matcher = StructureMatcher()
matcher.fit(gt_structure, structures[2])


False

In [37]:
print(CifWriter(nb3cl8).__str__())

# generated using pymatgen
data_Nb3Cl8
_symmetry_space_group_name_H-M   'P 1'
_cell_length_a   6.80129902
_cell_length_b   6.80129902
_cell_length_c   12.76792964
_cell_angle_alpha   90.00000000
_cell_angle_beta   90.00000000
_cell_angle_gamma   120.00000000
_symmetry_Int_Tables_number   1
_chemical_formula_structural   Nb3Cl8
_chemical_formula_sum   'Nb6 Cl16'
_cell_volume   511.48729501
_cell_formula_units_Z   2
loop_
 _symmetry_equiv_pos_site_id
 _symmetry_equiv_pos_as_xyz
  1  'x, y, z'
loop_
 _atom_type_symbol
 _atom_type_oxidation_number
  Nb2.67+  2.666666666666666
  Cl-  -1.0
loop_
 _atom_site_type_symbol
 _atom_site_label
 _atom_site_symmetry_multiplicity
 _atom_site_fract_x
 _atom_site_fract_y
 _atom_site_fract_z
 _atom_site_occupancy
  Nb2.67+  Nb0  1  0.94221238  0.47110619  0.24576120  1.0
  Nb2.67+  Nb0  1  0.05778762  0.52889381  0.75423880  1.0
  Nb2.67+  Nb0  1  0.52889381  0.05778762  0.24576120  1.0
  Nb2.67+  Nb0  1  0.47110619  0.94221238  0.75423880  1.0
  Nb2.67+

  print(CifWriter(nb3cl8).__str__())
