In [1]:
import numpy as np
import pandas as pd

# 读取文件
with open('cp2k.extxyz', 'r') as f:
    lines = f.readlines()

# 初始化数据
data = []
num_atoms = 104

# 动态计算帧数
num_steps = len(lines) // (num_atoms + 2)  # 每帧包含 num_atoms + 2 行

# 解析每一帧
for step in range(num_steps):
    # 读取原子数
    n_atoms = int(lines[step * (num_atoms + 2)])
    
    # 读取注释行
    comment = lines[step * (num_atoms + 2) + 1].strip()
    
    # 解析注释行（提取晶格常数、能量和应力）
    lattice = None
    energy = None
    stress = None
    
    # 提取晶格常数
    if 'Lattice="' in comment:
        try:
            lattice = comment.split('Lattice="')[1].split('"')[0]
        except IndexError:
            pass  # 如果提取失败，保持 lattice 为 None
    
    # 提取能量
    if 'energy=' in comment:
        try:
            energy = float(comment.split('energy=')[1].split()[0])
        except (IndexError, ValueError):
            pass  # 如果提取失败，保持 energy 为 None
    
    # 提取应力
    if 'stress="' in comment:
        try:
            stress = comment.split('stress="')[1].split('"')[0]
        except IndexError:
            pass  # 如果提取失败，保持 stress 为 None
    
    # 将晶格常数拆分为 9 列
    lattice_values = [None] * 9
    if lattice:
        try:
            lattice_values = list(map(float, lattice.split()))
        except ValueError:
            pass  # 如果转换失败，保持 lattice_values 为 None
    
    # 将应力拆分为 9 列
    stress_values = [None] * 9
    if stress:
        try:
            stress_values = list(map(float, stress.split()))
        except ValueError:
            pass  # 如果转换失败，保持 stress_values 为 None
    
    # 读取原子坐标和力
    atoms = []
    for i in range(num_atoms):
        atom_line = lines[step * (num_atoms + 2) + 2 + i].strip().split()
        atom = {
            'element': atom_line[0],
            'x': float(atom_line[1]),
            'y': float(atom_line[2]),
            'z': float(atom_line[3]),
            'fx': float(atom_line[4]),  # 提取力信息
            'fy': float(atom_line[5]),
            'fz': float(atom_line[6])
        }
        atoms.append(atom)
    
    # 将原子坐标和力横向排列，并修改特征名称
    row = {'step': step, 'energy': energy}
    for i in range(9):
        row[f'lattice[{i+1}]'] = lattice_values[i] if i < len(lattice_values) else None
        row[f'stress[{i+1}]'] = stress_values[i] if i < len(stress_values) else None
    
    for i, atom in enumerate(atoms):
        element = atom['element']
        # 添加原子坐标和力信息
        row[f'atom_{i+1}_{element}_x'] = atom['x']
        row[f'atom_{i+1}_{element}_y'] = atom['y']
        row[f'atom_{i+1}_{element}_z'] = atom['z']
        row[f'atom_{i+1}_{element}_fx'] = atom['fx']
        row[f'atom_{i+1}_{element}_fy'] = atom['fy']
        row[f'atom_{i+1}_{element}_fz'] = atom['fz']
    
    # 添加到数据列表
    data.append(row)

# 转换为DataFrame
df = pd.DataFrame(data)

# 保存为CSV文件
df.to_csv('test.csv', index=False)

In [3]:
import pandas as pd

# 读取CSV文件
df = pd.read_csv('test.csv')

# 定义原子数
num_atoms = 104

# 打开输出文件
with open('cp2k_reconstructed.extxyz', 'w') as f:
    # 遍历每一帧
    for step in range(len(df)):
        # 获取当前帧的数据
        row = df.iloc[step]
        
        # 写入原子数
        f.write(f"{num_atoms}\n")
        
        # 构建注释行
        lattice = ' '.join(map(str, row[['lattice[1]', 'lattice[2]', 'lattice[3]', 
                                        'lattice[4]', 'lattice[5]', 'lattice[6]', 
                                        'lattice[7]', 'lattice[8]', 'lattice[9]']].values))
        energy = row['energy']
        stress = ' '.join(map(str, row[['stress[1]', 'stress[2]', 'stress[3]', 
                                       'stress[4]', 'stress[5]', 'stress[6]', 
                                       'stress[7]', 'stress[8]', 'stress[9]']].values))
        
        comment = f'Lattice="{lattice}" Properties=species:S:1:pos:R:3:forces:R:3 energy={energy} stress="{stress}" pbc="T T T"'
        
        # 写入注释行
        f.write(f"{comment}\n")
        
        # 写入原子坐标和力
        for i in range(1, num_atoms + 1):
            # 提取元素类型
            element = next(col.split('_')[2] for col in df.columns if f'atom_{i}_' in col and '_x' in col)
            
            # 提取坐标
            x = row[f'atom_{i}_{element}_x']
            y = row[f'atom_{i}_{element}_y']
            z = row[f'atom_{i}_{element}_z']
            
            # 提取力（假设力信息在CSV中以类似的列名存储）
            fx = row.get(f'atom_{i}_{element}_fx', 0.0)  # 如果不存在力信息，默认为 0.0
            fy = row.get(f'atom_{i}_{element}_fy', 0.0)
            fz = row.get(f'atom_{i}_{element}_fz', 0.0)
            
            # 写入原子行
            f.write(f"{element} {x} {y} {z} {fx} {fy} {fz}\n")