In [1]:
#import find_converged as fc
import os
import glob
import pandas as pd

import shutil

In [2]:
def fix_input_file(p, restart_corrupted=False):
# read input file
    inp_file = []
    with open(os.path.join(p, 'run.inp'), 'r') as f:
        for line in f:
            inp_file.append(line)
    # set restart option correctly
    restart_file = os.path.join(p, 'RESTART.1')
    restart_exists = os.path.isfile(restart_file)
    found_restart_option = False
    for i, line in enumerate(inp_file):
        if 'RESTART WAVEFUNCTION LATEST' in line and restart_exists and not restart_corrupted:
            found_restart_option = True # everything is ok
            print('Everything ok')
        # disable restart if there is no restart file
        if 'RESTART WAVEFUNCTION LATEST' in line and not restart_exists:
            inp_file[i] = '  rESTART WAVEFUNCTION LATEST\n'
            found_restart_option = True
            print('Disabled restart because couldnt find a restart file. I hope you dont mind.')
        # disable restart if there is restart file that is corrupted
        if 'RESTART WAVEFUNCTION LATEST' in line and restart_corrupted:
            inp_file[i] = '  rESTART WAVEFUNCTION LATEST\n'
            found_restart_option = True
            print('Disabled restart because restart file is corrupted. Is that ok with you?')
        # enable restart if there is a restart file that is not corrupted
        elif 'rESTART WAVEFUNCTION LATEST' in line and restart_exists and not restart_corrupted:
            inp_file[i] = '  RESTART WAVEFUNCTION LATEST\n'
            found_restart_option = True
            print('Enabled restart (from rESTART) because there is an intact restart file. Isn\'t that great, sunshine?')
    # enable restart if there is a restart file that is not corrupted and restart not added yet
    if not found_restart_option and restart_exists and not restart_corrupted:
        for i, line in enumerate(inp_file):
            if 'OPTIMIZE WAVEFUNCTION' in line:
                print(f'Added the restart option because there is an intact restart file. This should help you to finish your PhD in time.')
                inp_file.insert(i+1, '  RESTART WAVEFUNCTION LATEST\n')
                break
    
    # check that PCG MINIMIZE is used
    # change TIMESTEP to 5 if necessary
    timestep = False
    for i, line in enumerate(inp_file):
        if 'TIMESTEP' in line:
            timestep = True
            inp_file[i+1] = '    5\n'
            if not 'PCG MINIMIZE' in inp_file[i-1]:
                inp_file.insert(i, '  PCG MINIMIZE\n')
                print('I think you forgot to use the PCG minimizer instead of DIIS. I changed that for you. Aint I a nice program?')
    assert timestep

    with open(os.path.join(p, 'run.inp'), 'w') as f:
        for line in inp_file:
            f.write(line)

In [None]:
def check_input(p, restart_corrupted=False):
# read input file
    inp_file = []
    with open(os.path.join(p, 'run.inp'), 'r') as f:
        for line in f:
            inp_file.append(line)
    # set restart option correctly
    restart_file = os.path.join(p, 'RESTART.1')
    restart_exists = os.path.isfile(restart_file)
    found_restart_option = False
    for i, line in enumerate(inp_file):
        if 'RESTART WAVEFUNCTION LATEST' in line and restart_exists and not restart_corrupted:
            found_restart_option = True # everything is ok
            print('Everything ok')
        # disable restart if there is no restart file
        if 'RESTART WAVEFUNCTION LATEST' in line and not restart_exists:
            inp_file[i] = '  rESTART WAVEFUNCTION LATEST\n'
            found_restart_option = True
            print('Disabled restart because couldnt find a restart file. I hope you dont mind.')
        # disable restart if there is restart file that is corrupted
        if 'RESTART WAVEFUNCTION LATEST' in line and restart_corrupted:
            inp_file[i] = '  rESTART WAVEFUNCTION LATEST\n'
            found_restart_option = True
            print('Disabled restart because restart file is corrupted. Is that ok with you?')
        # enable restart if there is a restart file that is not corrupted
        elif 'rESTART WAVEFUNCTION LATEST' in line and restart_exists and not restart_corrupted:
            inp_file[i] = '  RESTART WAVEFUNCTION LATEST\n'
            found_restart_option = True
            print('Enabled restart (from rESTART) because there is an intact restart file. Isnt that great, sunshine?')
    # enable restart if there is a restart file that is not corrupted and restart not added yet
    if not found_restart_option and restart_exists and not restart_corrupted:
        for i, line in enumerate(inp_file):
            if 'OPTIMIZE WAVEFUNCTION' in line:
                print(f'Added the restart option because there is an intact restart file. This should help you to finish your PhD in time.')
                inp_file.insert(i+1, '  RESTART WAVEFUNCTION LATEST\n')
                break
    
    # check that PCG MINIMIZE is used
    # change TIMESTEP to 5 if necessary
    timestep = False
    for i, line in enumerate(inp_file):
        if 'TIMESTEP' in line:
            timestep = True
            inp_file[i+1] = '    5\n'
            if not 'PCG MINIMIZE' in inp_file[i-1]:
                inp_file.insert(i, '  PCG MINIMIZE\n')
                print('I think you forgot to use the PCG minimizer instead of DIIS. I changed that for you. Aint I a nice program?')
    assert timestep
    return(inp_file)

In [None]:
# make new directory if not exists
def make_new_dir_name(p):
    p = p.rstrip('/') # sometimes path ends with / sometimes not, stripping makes sure that there is never a /
    comp_name, lam_val, run_number = p.split('/')[-3], p.split('/')[-1], p.split('/')[-2]
    run_number_new = run_number[:3] + str(int(run_number[3:])+1)
    basename = p[0:-(len(comp_name)+len(lam_val)+len(run_number)+3)]#p.rstrip(f'{comp_name}/{run_number}/{lam_val}')
    new_dir = os.path.join(basename, f'{comp_name}/{run_number_new}/{lam_val}')
    return(new_dir)

# move files
def get_PP_names(p):
    PP_full_path = glob.glob(os.path.join(p,'*_SG_LDA'))
    pp_names = []
    for PP_path in PP_full_path:
        pp_names.append(PP_path.split('/')[-1])
    return(pp_names)

def make_filenames(p):
    filenames = get_PP_names(p)
    filenames.extend(['RESTART.1', 'run.inp', 'LATEST'])
    return(filenames)

def generate_full_paths(dirname, filenames):
    """
    generate list of files to move
    """
    files2move = [os.path.join(dirname, f) for f in filenames]

    return(files2move)

In [None]:
paths = []
with open('/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/slice_ve38/broken', 'r') as f:
    for line in f:
        paths.append(line.strip('\tfs_cropped is empty\n'))

In [None]:
# make paths
for p in paths[5:]:
    old_dir = p#paths[0]
    new_dir = make_new_dir_name(old_dir)
    #check_input(old_dir)
    # create new directory if not exists
    os.makedirs(new_dir, exist_ok=True)

    # generate filenames of source and destination
    fnames = make_filenames(old_dir)
    source_paths = generate_full_paths(old_dir, fnames)
    dest_paths = generate_full_paths(new_dir, fnames)
    # move files
    for source, dest in zip(source_paths, dest_paths):
        shutil.move(source, dest)
        
    # check input file
    fix_input_files(new_dir)

In [None]:
new_submit_paths = []
for p in paths:
    new_submit_paths.append(make_new_dir_name(p)+'\n')

In [None]:
with open('/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/slice_ve38/broken_submit', 'w') as f:
    for line in new_submit_paths:
        f.write(line)

In [None]:
def check_convergence(file, linenumber):
    """
    if converged9 lines above FINAL RESULTS should be last iteration
    try to parse this one
    """
    success = True
    try:
        last_iteration = file[linenumber-8]
        last_iteration = last_iteration.strip('\n')
        iteration, conv = int(last_iteration.split()[0]), float(last_iteration.split()[1])
    except:
        try:
            last_iteration = file[linenumber-9]
            last_iteration = last_iteration.strip('\n')
            iteration, conv = int(last_iteration.split()[0]), float(last_iteration.split()[1])
        except:
            print("Could not parse log-file")
            success = False
    return(iteration, conv, success)

def analyse_logfile(logfile):
    status = 'broken'
    # determine status of calculation
    for i, line in enumerate(logfile):
        if ' JOB LIMIT TIME EXCEEDED FOR A NEW LOOP' in line:
            status = 'not converged'
        # FINAL RESULTS also appears in unconverged calculations but only after JOB LIMIT ..., so this should never be seen for unconverged calcs
        elif 'FINAL RESULTS' in line and not status == 'not converged': 
            iteration, conv, success = check_convergence(logfile, i)
            if not success:
                status = 'broken'
            elif conv < 1e-6:
                status = 'converged'
            else:
                status = 'not converged'
                #print('CPMD did not run out of time and the calculation terminated normally, but it did apparently not converge.')
        elif 'CPU TIME' in line:
            timeline = line.split()
            time = float(timeline[3])*3600 + float(timeline[5])*60 + float(timeline[7])

    if status = 'broken':
        time = None
    return(status, time)

In [None]:
comps = 
lambdas = ['ve_8', 've_15', 've_23', 've_30', 've_38']

In [None]:
lambdas = ['ve_8', 've_15', 've_23', 've_30', 've_38']

In [None]:
lambdas[::-1]

In [None]:
# Initialize powerful dict

In [None]:
# get and sort run dirs
# test initialization for small subset
# find out if just submitted

- list of all compounds
- make database
- update database

In [None]:
all_comps = []
with open('/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/slice_ve38/all_comps', 'r') as f:
    for line in f:
        all_comps.append(line.strip('\n'))

In [None]:
completed_comps = []
with open('/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/slice_ve38/paths_atomic_energies', 'r') as f:
    for line in f:
        completed_comps.append(line.split('/')[-2])

In [None]:
new_comps = list(set(all_comps) - set(completed_comps))

In [None]:
def initialize_database(compounds):
    compounds.sort()
    lam_vals = ['ve_08', 've_15', 've_23', 've_30', 've_38']
    indices = []
    for c in compounds:
        for l in lam_vals:
            indices.append(f'{c}:{l}')
    database = pd.DataFrame(index = indices, columns=['comp_name', 'lam_val', 'status', 'time', 'workdir'])
    for i in database.index:
        comp_name, lam_val = i.split(':')
        database.loc[i]['comp_name'] = comp_name
        database.loc[i]['lam_val'] = lam_val
    return(database)

In [None]:
def sort_rundirs(run_dirs):
    runs_index = []
    for r in run_dirs:
        run_number = int(r.split('/')[-1][3:])
        runs_index.append((run_number, r))
    runs_index.sort()
    runs_sorted = []
    for r in runs_index:
        runs_sorted.append(r[1])
    return(runs_sorted)

def update_workdir(basepath, comp_name, lam_val):
    """
    finds the directory with highest run number
    """
    
    # find directory where last calculation was carried out
    run_dirs = glob.glob(os.path.join(basepath, f'{comp_name}/run*'))
    run_dirs = sort_rundirs(run_dirs)
    workdir = None
    for r in run_dirs[::-1]:
        if os.path.isdir(os.path.join(r, lam_val)):
            workdir = os.path.join(r, lam_val)
            break
    if not workdir:
        raise Exception(f"There does not exist a directory for {comp_name}:{lam_val}")
    return(workdir)

### Find latest directory

In [None]:
basepath =  '/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/slice_ve38'

for i in database.index:
    comp_name = database.loc[i]['comp_name']
    lam_val = database.loc[i]['lam_val']
    if lam_val == 've_08':
        lam_val = 've_8'
    database.loc[i]['workdir'] = update_workdir(basepath, comp_name, lam_val)

### Determine status of system

In [2]:
import sys
sys.path.insert(0, '~/sahre0000/git_repositories/APDFT/prototyping/atomic_energies/hitp')

import get_status_report

In [3]:
paths = []
with open('/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/density_slices/dirs', 'r') as f:
    for line in f:
        paths.append(line.strip('\n'))

status = []
for p in paths:
    s, t = get_status_report.get_status(p)
    status.append((p, s))

### Rename cube-files

In [23]:
import sys
sys.path.insert(0, '~/sahre0000/git_repositories/APDFT/prototyping/atomic_energies/hitp')
import os
import shutil
import get_status_report

In [24]:
paths = []
with open('/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/density_slices/dirs', 'r') as f:
    for line in f:
        paths.append(line.strip('\n'))
        
for p in paths:
    os.makedirs(os.path.join('/'.join(p.split('/')[:-1]), f'cube-files'), exist_ok=True)
    name = p.split('/')[-1]
    dest = os.path.join('/'.join(p.split('/')[:-1]), f'cube-files/{name}.cube')
    src = os.path.join(p, f"DENSITY.cube")
    shutil.move(src, dest)

In [21]:
paths

['/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/density_slices/dsgdb9nsd_000940/ve_07',
 '/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/density_slices/dsgdb9nsd_000940/ve_14',
 '/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/density_slices/dsgdb9nsd_000940/ve_22',
 '/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/density_slices/dsgdb9nsd_000940/ve_29',
 '/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/density_slices/dsgdb9nsd_000940/ve_36',
 '/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/density_slices/dsgdb9nsd_000949/ve_07',
 '/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/density_slices/dsgdb9nsd_000949/ve_14',
 '/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/density_slices/dsgdb9nsd_000949/ve_22',
 '/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/density_slices/dsgdb9nsd_000949/ve_29',
 '/scicore

### Load database

In [3]:
db = pd.read_pickle("/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/slice_ve38/calculations_databse")

In [None]:
len(db.loc[database['status']=='no logfile'])

### Restart calculations

In [4]:
for wd in db.loc[db['status']=='not converged', 'workdir']:
    print(wd)
    fix_input_file(wd)

/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/slice_ve38/dsgdb9nsd_000229/run5/ve_8
Everything ok
/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/slice_ve38/dsgdb9nsd_000276/run2/ve_8
Everything ok
/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/slice_ve38/dsgdb9nsd_000276/run5/ve_15
Everything ok
/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/slice_ve38/dsgdb9nsd_000278/run5/ve_30
Everything ok
/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/slice_ve38/dsgdb9nsd_000291/run2/ve_15
Everything ok
/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/slice_ve38/dsgdb9nsd_000544/run5/ve_30
Everything ok
/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/slice_ve38/dsgdb9nsd_000545/run5/ve_30
Everything ok
/scicore/home/lilienfeld/sahre0000/projects/atomic_energies/results/slice_ve38/dsgdb9nsd_000814/run2/ve_15
Everything ok
/scicore/home/lilienfeld/sahre0000