# Convert Data to ASE DBs
That is the format that SchNetPack prefers for their data

In [1]:
from schnetpack.data import AtomsData
from ase.io.xyz import read_xyz
from io import StringIO
import pandas as pd
import numpy as np
import os

Configuration

In [2]:
target = 'oxidation_potential.smb-vacuum'
geom_column = 'data.xtb.neutral.xyz'

## Save Each Dataset
Loop over and store the datasets 

In [3]:
os.makedirs('datasets', exist_ok=True)

In [4]:
for name in ['train', 'test', 'valid']:
    # Load data and compute delta
    data = pd.read_csv(f'../datasets/{name}.csv')
    
    # Compute ASE atoms objects
    data['atoms'] = data[geom_column].apply(lambda x: next(read_xyz(StringIO(x), slice(None))))
    
    # Store in an ASE db
    out_path = f'datasets/{name}.db'
    if os.path.isfile(out_path):
        os.unlink(out_path)
        
    # Make the database
    db = AtomsData(out_path, available_properties=['ip'])
    
    # Add the data
    db.add_systems(data['atoms'], data[target].apply(lambda x: {'ip': np.atleast_1d(x)}).tolist())