In [9]:
import numpy as np
import pandas as pd

# 读取文件
with open('cp2k.extxyz', 'r') as f:
    lines = f.readlines()

# 初始化数据
data = []
num_atoms = 104
num_steps = 980

# 解析每一帧
for step in range(num_steps):
    # 读取原子数
    n_atoms = int(lines[step * (num_atoms + 2)])
    
    # 读取注释行
    comment = lines[step * (num_atoms + 2) + 1].strip()
    
    # 解析注释行（提取晶格常数、能量和应力）
    lattice = None
    energy = None
    stress = None
    
    # 提取晶格常数
    if 'Lattice="' in comment:
        try:
            lattice = comment.split('Lattice="')[1].split('"')[0]
        except IndexError:
            pass  # 如果提取失败，保持 lattice 为 None
    
    # 提取能量
    if 'energy=' in comment:
        try:
            energy = float(comment.split('energy=')[1].split()[0])
        except (IndexError, ValueError):
            pass  # 如果提取失败，保持 energy 为 None
    
    # 提取应力
    if 'stress="' in comment:
        try:
            stress = comment.split('stress="')[1].split('"')[0]
        except IndexError:
            pass  # 如果提取失败，保持 stress 为 None
    
    # 将晶格常数拆分为 9 列
    lattice_values = [None] * 9
    if lattice:
        try:
            lattice_values = list(map(float, lattice.split()))
        except ValueError:
            pass  # 如果转换失败，保持 lattice_values 为 None
    
    # 将应力拆分为 9 列
    stress_values = [None] * 9
    if stress:
        try:
            stress_values = list(map(float, stress.split()))
        except ValueError:
            pass  # 如果转换失败，保持 stress_values 为 None
    
    # 读取原子坐标
    atoms = []
    for i in range(num_atoms):
        atom_line = lines[step * (num_atoms + 2) + 2 + i].strip().split()
        atom = {
            'element': atom_line[0],
            'x': float(atom_line[1]),
            'y': float(atom_line[2]),
            'z': float(atom_line[3])
        }
        atoms.append(atom)
    
    # 将原子坐标横向排列，并修改特征名称
    row = {'step': step, 'energy': energy}
    for i in range(9):
        row[f'lattice[{i+1}]'] = lattice_values[i] if i < len(lattice_values) else None
        row[f'stress[{i+1}]'] = stress_values[i] if i < len(stress_values) else None
    
    for i, atom in enumerate(atoms):
        element = atom['element']
        # 不再添加 atom_X_element 列
        row[f'atom_{i+1}_{element}_x'] = atom['x']
        row[f'atom_{i+1}_{element}_y'] = atom['y']
        row[f'atom_{i+1}_{element}_z'] = atom['z']
    
    # 添加到数据列表
    data.append(row)

# 转换为DataFrame
df = pd.DataFrame(data)

# 保存为CSV文件
df.to_csv('cp2k_trajectory_wide_final3333.csv', index=False)