In [6]:
import pandas as pd
import sys
import numpy as np
import os
sys.path.insert(0, '/home/misa/git_repositories/APDFT/prototyping/atomic_energies/hitp/')
sys.path.insert(0, '/home/misa/git_repositories/APDFT/prototyping/atomic_energies/')
import prepare_calculations
import explore_qml_data as eqd
import utils_qm as uqm

In [7]:
def read_xyz_data(data, look_for_charge=True):
    """
    xyz data already as list where every item is a line of xyz-file
    """

    atomic_symbols = []
    xyz_coordinates = []
    charge = 0
    title = ""


    for line_number, line in enumerate(data):
        if line_number == 0:
            num_atoms = int(line)
        elif line_number == 1:
            title = line
            if "charge=" in line:
                charge = int(line.split("=")[1])
        elif str.isalpha(line[0]):
            atomic_symbol, x, y, z = line.split()
            atomic_symbols.append(atomic_symbol)
            xyz_coordinates.append([float(x), float(y), float(z)])
        else:
            break

    atoms = [atom.upper() for atom in atomic_symbols]

    return atoms, charge, xyz_coordinates

In [8]:
num_ve = 38
amons_db = pd.read_pickle('/home/misa/datasets/amons_qm9_11k/amons_database.pd')
smiles_list = list(amons_db.loc[amons_db['num_ve'] == num_ve, 'smiles'])

In [4]:
len(smiles_list)

3359

In [5]:
print(f'Generating input files for {len(smiles_list)} compounds')
amons_dict = uqm.load_obj('/home/misa/datasets/amons_qm9_11k/unique_amons_dict')

template_inp = '/home/misa/projects/atomic-energies/data/cpmd_params_template_pbe.inp'
template_inp_small_lambda ='/home/misa/projects/atomic-energies/data/cpmd_params_template_pbe_small_lambda.inp'
batch_no = f'amons_{num_ve}'


for i, smiles in enumerate(smiles_list):
        
    xyz_data = amons_dict[smiles]
    
    num_digits_i = len(str(i+1))
    leading_zeros = (6 - num_digits_i)*'0'
    compound_name = f'amon_{leading_zeros}{i+1}'
    compound_path = f'/home/misa/projects/atomic-energies/data/ueg_reference/amons/{batch_no}/{compound_name}'

    lambda_values = np.array([0.2, 0.6, 0.8, 1.0])
    atom_symbols, charge, coords_initial = read_xyz_data(xyz_data)
    
    elements = {'H':1, 'C':6, 'N':7, 'O':8, 'F':9}
    nuc_charges = []
    for a in atom_symbols:
        nuc_charges.append(elements[a])

    # calculation parameters (independent of lambda value)
    num_ve = eqd.get_num_val_elec(nuc_charges) # get number of ve
    boxsize = prepare_calculations.get_boxsize(num_ve) # get boxsize
    num_gpts_lower, num_gpts_higher = prepare_calculations.get_gpts(num_ve) # get gridpoints
    num_gpts = num_gpts_higher

    # shift molecule to center of box
    coords_final = eqd.shift2center(coords_initial, np.array([boxsize, boxsize, boxsize])/2)

    # get correct lambda value
    for lam_val in lambda_values:
        new_lambda, scaled_ve = prepare_calculations.get_lambda(lam_val, num_ve)
        # scaled_ve is number of electrons added from pseudopotential file, the remaining electrons must be added in form of a negative charge
        charge = scaled_ve - num_ve # write input

        # create directory if necessary
        if scaled_ve < 10:
            scaled_ve_str = '0'+str(scaled_ve)
        else:
            scaled_ve_str = str(scaled_ve)
        work_dir = os.path.join(compound_path, f've_{scaled_ve_str}/')
        os.makedirs(work_dir, exist_ok=True)

        # generate input file
        input_path = os.path.join(work_dir, 'run.inp')
        if new_lambda > 0.5:
            prepare_calculations.write_input(atom_symbols, charge, coords_final, num_gpts, boxsize, input_path, template_inp, debug = False)
        else:
            prepare_calculations.write_input(atom_symbols, charge, coords_final, num_gpts, boxsize, input_path, template_inp_small_lambda, debug = False)
        # generate pp-files
        prepare_calculations.write_pp_files_compound(atom_symbols, new_lambda, work_dir, pp_dir='/home/misa/PP_LIBRARY', pp_type='_GH_PBE')
        
        # write smiles
        with open(f'/home/misa/projects/atomic-energies/data/ueg_reference/amons/{batch_no}/{compound_name}/smiles', 'w') as f:
            f.write(smiles+'\n')

Generating input files for 3359 compounds
