In [1]:
import re
import pandas as pd

In [2]:
def parse_string_to_dict(s):
    # 먼저 key=value 쌍을 구분
    pairs = re.findall(r'(\w+)=(".*?"|\S+)', s)
    
    # 각 key=value 쌍을 딕셔너리에 저장
    parsed_dict = {}
    for key, value in pairs:
        # 따옴표 제거 및 값의 형태에 따라 변환

        if re.match(r'^-?\d+(\.\d+)?$', value):
            value = float(value) if '.' in value else int(value)
        elif value.startswith('"') and value.endswith('"'):
            value = list(value[1:-1].split(' '))
        parsed_dict[key] = value

    return parsed_dict

def parse_xyz(filename):
    with open(filename, 'r') as file:
        lines = file.readlines()
    
    data = []
    start_idx = 0
    
    while start_idx < len(lines):
        num_atoms = int(lines[start_idx].strip())
        properties_line = lines[start_idx + 1].strip()
        pro_dict = parse_string_to_dict(properties_line)
        print(pro_dict)
        
        energy = float(properties_line.split('energy=')[1].split()[0])
        atom_type = []
        position = []
        force = []
        for i in range(start_idx + 2, start_idx + 2 + num_atoms):
            parts = lines[i].split()
            atom_type.append(parts[0])
            position.append([float(parts[1]), float(parts[2]), float(parts[3])])
            force.append([float(parts[4]), float(parts[5]), float(parts[6])])
        
        data.append({
            'e': energy,
            't': atom_type,
            'p': position,
            'f': force
        })
        start_idx += 2 + num_atoms
    
    return data

def reindexing(org, prd):
    # org 데이터의 첫 번째 p 값을 키로, 해당 인덱스를 값으로 하는 딕셔너리 생성
    org_map = {tuple(item['p'][0]): i for i, item in enumerate(org)}
    print(org_map)
    # prd 데이터를 재정렬할 리스트
    reordered_prd = [None] * len(prd)
    
    # prd 데이터에서 p 값을 찾아 org_map에 있는 인덱스로 재정렬
    for item in prd:
        p_value = tuple(item['p'][0])
        if p_value in org_map:
            reordered_prd[org_map[p_value]] = item
    
    # 재정렬된 prd 반환
    return reordered_prd
    

def make_df(data):
    d = []
    for i, item in enumerate(data):
        d.append({
            'ID': f'TEST_{i:04d}',
            'energy': item['e'],
            'forces': item['f']
        })
    df = pd.DataFrame(d)
    df['energy_uncertainty'] = 1
    cols = ['ID', 'energy', 'energy_uncertainty', 'forces']
    df = df[cols]
    return df


In [3]:
prd_data = parse_xyz('output.xyz')
prd_df = make_df(prd_data)

{'Lattice': ['11.07075023651123', '0.0', '0.0', '0.0', '11.07075023651123', '0.0', '0.0', '0.0', '11.07075023651123'], 'Properties': 'species:S:1:pos:R:3:energies:R:1:forces:R:3', 'original_dataset_index': 1500, 'energy': -935.5009638117633, 'stress': ['0.015231227058282735', '0.011092548143728249', '-0.009007896007943142', '0.011092548143728249', '0.004126900691110468', '0.006716275255536929', '-0.009007896007943142', '0.006716275255536929', '-0.01775453875035384'], 'pbc': ['F', 'F', 'F']}
{'Lattice': ['11.07075023651123', '0.0', '0.0', '0.0', '11.07075023651123', '0.0', '0.0', '0.0', '11.07075023651123'], 'Properties': 'species:S:1:pos:R:3:energies:R:1:forces:R:3', 'original_dataset_index': 1501, 'energy': -919.2319128210048, 'stress': ['-0.0005505010880860654', '-0.008527133771437904', '-0.00665196479025389', '-0.008527133771437904', '-0.03026582883343552', '0.019961495606283432', '-0.00665196479025389', '0.019961495606283432', '-0.040327771198191316'], 'pbc': ['F', 'F', 'F']}
{'Lat

In [4]:
prd_df.to_csv('sub.csv', index=False)

In [9]:
!tar -xf si6.zip -C /

results/silicon-tutorial/si/: Can't create '\\?\C:\results\silicon-tutorial\si': No such file or directory
results/silicon-tutorial/si/trainer.pth: Can't create '\\?\C:\results\silicon-tutorial\si\trainer.pth': No such file or directory
results/silicon-tutorial/si/last_model.pth: Can't create '\\?\C:\results\silicon-tutorial\si\last_model.pth': No such file or directory
results/silicon-tutorial/si/config.yaml: Can't create '\\?\C:\results\silicon-tutorial\si\config.yaml': No such file or directory
results/silicon-tutorial/si/metrics_initialization.csv: Can't create '\\?\C:\results\silicon-tutorial\si\metrics_initialization.csv': No such file or directory
results/silicon-tutorial/si/best_model.pth: Can't create '\\?\C:\results\silicon-tutorial\si\best_model.pth': No such file or directory
results/silicon-tutorial/si/metrics_epoch.csv: Can't create '\\?\C:\results\silicon-tutorial\si\metrics_epoch.csv': No such file or directory
results/silicon-tutorial/si/metrics_batch_train.csv: Can't 