In [1]:
import os
import pathlib
import glob
import pandas as pd
from rdkit import Chem
from rdkit.Chem import Descriptors

import sys
sys.path.insert(0, '/scicore/home/lilienfeld/sahre0000/APDFT/prototyping/atomic_energies/hitp/')
import cpmd_io
import get_status_report

sys.path.insert(0, '/scicore/home/lilienfeld/sahre0000/APDFT/prototyping/atomic_energies/')
import utils_qm as uqm

In [2]:
db = pd.read_pickle('/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/amons/amons_32/calculation_manager.pd')
db_copy = db.copy()

#get_status_report.update(db_copy)

In [6]:
job_term = []


for wd in workdirs:
    error_file = cpmd_io.get_last_errorfile(wd)
    with open(error_file, 'r') as f:
        file_content = f.readlines()
    for line in file_content:
        if 'forcing job termination' in line:
            job_term.append(wd)

In [12]:
with open('/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/amons/amons_32/broken_fixed', 'w') as f:
    for line in job_term:
        f.write(line+'\n')

In [4]:
error_code = {k:[] for k in cpmd_io.error_message_lookup.keys()}
for wd in workdirs:
    error_file = cpmd_io.get_last_errorfile(wd)
    with open(error_file, 'r') as f:
        file_content = f.readlines()
    ec = cpmd_io.parse_error_file(file_content)
    
    if ec == 'e000':
        localerrorfiles = glob.glob(os.path.join(wd, 'LocalError-*'))
        if len(localerrorfiles) > 0:
            ec = 'e007'
    error_code[ec].append(wd)

In [5]:
for k in error_code.keys():
    print(f'{k}:{len(error_code[k])}')

e000:1
e001:0
e002:0
e003:126
e004:0
e005:0
e006:0
e007:0


In [10]:
error_code['e000']

['/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/amons/amons_32/amon_000001/ve_19']

### Add initialize random

In [8]:
def get_tot_num_ve(wd):
    smiles_path = os.path.join('/'.join(wd.split('/')[:-1]), 'smiles')
    with open(smiles_path, 'r') as f:
        smiles = f.readlines()[0].strip('\n')
    mol = Chem.MolFromSmiles(smiles)
    num_ve = Descriptors.NumValenceElectrons(mol)
    return(num_ve)

In [10]:
error_fix = workdirs
for wd in error_fix:
    # add random initialize if necessary
    tot_num_ve = get_tot_num_ve(wd)
    num_ve = int(wd.split('/')[-1].split('_')[1])
    lam = num_ve/tot_num_ve

    if lam < 0.5:
        print(wd)
        path_inp = os.path.join(wd, 'run.inp')
        with open(path_inp, 'r') as f:
            inp_file = f.readlines()
        inp_file_modified = cpmd_io.enable_initialize_random(inp_file)
        with open(path_inp, 'w') as f:
            f.writelines(inp_file_modified)
# get lambda value

/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/amons/amon_000406/ve_06
/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/amons/amon_000406/ve_11


In [None]:
with open('/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/amons/new_inp', 'w') as f:
    for i in new_inp:
        f.write(i+'\n')

### Rewrite input

In [6]:
import numpy as np
def shift2center(coordinates_initial, centroid_final):
    """
    shifts set of coordinates so that centroid is at centroid_final
    """
    centroid_initial = np.mean(coordinates_initial, axis=0)
    shift = centroid_final - centroid_initial
    return(coordinates_initial+shift)

def get_xyz(xyz_data):
    elements = []
    positions = []
    for i, line in enumerate(xyz_data):
        if i > 1:
            splitted = line.split()
            elements.append(splitted[0])
            positions.append([float(i) for i in splitted[1:]])
    nuc_charges = []      
    for el in elements:
        if el == 'H':
            nuc_charges.append(1)
        elif el == 'C':
            nuc_charges.append(6)
        elif el == 'N':
            nuc_charges.append(7)
        elif el == 'O':
            nuc_charges.append(8)
        elif el == 'F':
            nuc_charges.append(9)
    return(nuc_charges, elements, positions)

def get_tot_num_ve(wd):
    smiles_path = os.path.join('/'.join(wd.split('/')[:-1]), 'smiles')
    with open(smiles_path, 'r') as f:
        smiles = f.readlines()[0].strip('\n')
    mol = Chem.MolFromSmiles(smiles)
    num_ve = Descriptors.NumValenceElectrons(mol)
    return(num_ve)

In [9]:
import prepare_calculations

In [10]:
# make new input files

# get amons dict
amons_dict = uqm.load_obj('/scicore/home/lilienfeld/sahre0000/datasets/amons_qm9_11k/unique_amons_dict')

# define parameters
pp_dir = '/scicore/home/lilienfeld/sahre0000/opt/PP_LIBRARY'
pp_type = '_GH_PBE'
template_inp = '/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/cpmd_params_template_pbe.inp'
template_inp_small_lambda = '/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/cpmd_params_template_pbe_small_lambda.inp'

In [11]:
new_inp =workdirs

In [12]:
#wd = '/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/amons/amon_000644/ve_18'
for wd in new_inp:
    smiles_path = os.path.join('/'.join(wd.split('/')[:-1]), 'smiles')
    with open(smiles_path, 'r') as f:
        smiles = f.readlines()[0].strip('\n')

    xyz_data = amons_dict[smiles]

    nuc_charges, atom_symbols, coords_initial = get_xyz(xyz_data)

    # calculation parameters (independent of lambda value)
    num_ve = Descriptors.NumValenceElectrons(Chem.MolFromSmiles(smiles)) # get number of ve # get number of ve
    boxsize = prepare_calculations.get_boxsize(num_ve) # get boxsize
    num_gpts_lower, num_gpts_higher = prepare_calculations.get_gpts(num_ve) # get gridpoints
    num_gpts = num_gpts_higher

    # shift molecule to center of box
    coords_final = shift2center(coords_initial, np.array([boxsize, boxsize, boxsize])/2)

    # get correct lambda value

    lam_val = int(wd.split('/')[-1].split('_')[1])/get_tot_num_ve(wd)
    new_lambda, scaled_ve = prepare_calculations.get_lambda(lam_val, num_ve)

    # scaled_ve is number of electrons added from pseudopotential file, the remaining electrons must be added in form of a negative charge
    charge = scaled_ve - num_ve # write input

    # create directory if necessary
    os.makedirs(wd, exist_ok=True)

    # generate input file
    input_path = os.path.join(wd, 'run.inp')
    if new_lambda > 0.5:
        prepare_calculations.write_input(atom_symbols, charge, coords_final, num_gpts, boxsize, input_path, template_inp, debug = False)
    else:
        print(wd)
        prepare_calculations.write_input(atom_symbols, charge, coords_final, num_gpts, boxsize, input_path, template_inp_small_lambda, debug = False)

    # generate pp-files
    prepare_calculations.write_pp_files_compound(atom_symbols, new_lambda, wd, pp_dir, pp_type)
