In [2]:
import os
import re
import numpy as np
from ase.io import read

def parse_lattice(line: str):
    """从字符串 'Lattice="a1 a2 a3 b1 b2 b3 c1 c2 c3"' 解析晶格矩阵"""
    match = re.search(r'Lattice="([^"]+)"', line)
    if not match:
        raise ValueError("No lattice found in XYZ comment line.")
    values = list(map(float, match.group(1).split()))
    return np.array(values).reshape(3, 3)

def xyz_to_cif(xyz_path, cif_path):
    """转换单个 XYZ 文件到 CIF，保留晶格信息"""
    with open(xyz_path, 'r') as f:
        lines = f.readlines()
    
    # 提取晶格（假设第二行是注释行，如 'Lattice="..." Properties=...'）
    lattice = parse_lattice(lines[1]) if len(lines) > 1 else None
    
    # 读取原子数据（跳过前两行）
    atoms = read(xyz_path, format='xyz')
    if lattice is not None:
        atoms.set_cell(lattice)
        atoms.set_pbc([True, True, True])
    
    # 写入 CIF 文件
    with open(cif_path, 'w') as f:
        f.write(f"data_{os.path.splitext(os.path.basename(xyz_path))[0]}\n")
        if lattice is not None:
            a, b, c = np.linalg.norm(lattice, axis=1)
            alpha = np.degrees(np.arccos(np.dot(lattice[1], lattice[2]) / (b * c)))
            beta  = np.degrees(np.arccos(np.dot(lattice[0], lattice[2]) / (a * c)))
            gamma = np.degrees(np.arccos(np.dot(lattice[0], lattice[1]) / (a * b)))
            f.write(f"_cell_length_a      {a:.6f}\n")
            f.write(f"_cell_length_b      {b:.6f}\n")
            f.write(f"_cell_length_c      {c:.6f}\n")
            f.write(f"_cell_angle_alpha   {alpha:.6f}\n")
            f.write(f"_cell_angle_beta    {beta:.6f}\n")
            f.write(f"_cell_angle_gamma   {gamma:.6f}\n")
        f.write("_space_group_name_H-M_alt 'P 1'\n")
        f.write("loop_\n  _atom_site_type_symbol\n  _atom_site_label\n  _atom_site_fract_x\n  _atom_site_fract_y\n  _atom_site_fract_z\n")
        
        # 写入原子（分数坐标）
        for i, atom in enumerate(atoms):
            symbol = atom.symbol
            pos = atom.position
            if lattice is not None:
                frac_pos = np.linalg.solve(lattice.T, pos).flatten()  # 直角坐标 → 分数坐标
            else:
                frac_pos = pos  # 无晶格时直接使用直角坐标
            f.write(f"{symbol}  {symbol}{i+1}  {frac_pos[0]:.6f}  {frac_pos[1]:.6f}  {frac_pos[2]:.6f}\n")

# 批量处理所有 structure_* 文件夹
root_dir = "visualization"
for folder in os.listdir(root_dir):
    if folder.startswith("structure_"):
        folder_path = os.path.join(root_dir, folder)
        for file in os.listdir(folder_path):
            if file.endswith(".xyz"):
                xyz_path = os.path.join(folder_path, file)
                cif_path = os.path.join(folder_path, file.replace(".xyz", ".cif"))
                try:
                    xyz_to_cif(xyz_path, cif_path)
                    print(f"Converted: {xyz_path} → {cif_path}")
                except Exception as e:
                    print(f"Failed to convert {xyz_path}: {str(e)}")

Converted: visualization/structure_0/train.xyz → visualization/structure_0/train.cif
Converted: visualization/structure_0/cluster_0.xyz → visualization/structure_0/cluster_0.cif
Converted: visualization/structure_0/cluster_1.xyz → visualization/structure_0/cluster_1.cif
Converted: visualization/structure_0/cluster_2.xyz → visualization/structure_0/cluster_2.cif
Converted: visualization/structure_0/cluster_3.xyz → visualization/structure_0/cluster_3.cif
Converted: visualization/structure_1/train.xyz → visualization/structure_1/train.cif
Converted: visualization/structure_1/cluster_0.xyz → visualization/structure_1/cluster_0.cif
Converted: visualization/structure_1/cluster_1.xyz → visualization/structure_1/cluster_1.cif
Converted: visualization/structure_1/cluster_2.xyz → visualization/structure_1/cluster_2.cif
Converted: visualization/structure_2/train.xyz → visualization/structure_2/train.cif
Converted: visualization/structure_2/cluster_0.xyz → visualization/structure_2/cluster_0.cif
C