In [None]:
import glob
from tqdm.auto import tqdm

import numpy as np

from ase import units
from ase.db import connect
from ase.calculators.singlepoint import SinglePointCalculator
from pymatgen.io.vasp import Vasprun


In [None]:
import os
import numpy as np
import warnings
from datetime import datetime

from pymatgen.core.structure import Structure
from pymatgen.io.vasp.outputs import Vasprun


def parse_steps(ionic_steps, number_of_steps=10):
    # Select the steps we want to use (10 equally-spaced steps by default)
    indexes = [
        int(i) for i in np.linspace(start=0, stop=len(ionic_steps)-1, num=number_of_steps)
    ]
    selected_steps = [
        ionic_steps[i] for i in indexes
    ]
    # Select the quantities we want to use: Energy, Force, Stress, Structure
    for i, step in enumerate(selected_steps):
        selected_steps[i] = {
            'e_0_energy': step['e_0_energy'],
            'forces': step['forces'],
            'stress': (np.array(step['stress']) * -0.1).tolist(),  # from kPa to GPa
            "structure": step["structure"],
        }
    return selected_steps


def sort_vaspruns_by_date(list_of_vasprun_files):
    """Sort list of vasprunfiles by date (last part of name)"""
    # Last vasprun.xml file (most recent) is assigned the current date+time
    now = datetime.now()
    now = (int(now.year), now.month, now.day, now.hour, now.minute)
    if len(list_of_vasprun_files) > 1:
        # Then sort files by date (last part of name)
        dates = {
            f.split("vasprun.xml")[-1]: f for f in list_of_vasprun_files
            if "on" in f
        }
        #print(dates)
        d = {
            (
                int(f.split("on")[1].split("_")[2].split(".")[0]), # year
                int(f.split("on")[1].split("_")[1]), # month
                int(f.split("on")[1].rsplit("_")[0]), # day
                int(f.split("on")[0].split("_", 1)[1].split("_")[0]), # hour
                int(f.split("on")[0].split("_", 1)[1].split("_")[1]), # minute
            ): v  for f, v in dates.items()
        }
        # Append last vasprun file (no date in the filename)
        d[now] = "vasprun.xml"
        # Sort by keys (oldest to newest)
        d = {k: d[k] for k in sorted(d)}
        #print(d)
    elif len(list_of_vasprun_files) == 1:
        d = {now: "vasprun.xml"}
    else:
        print("No vasprun files found!")
        return None
    return list(d.values())


def parse_distortion_vaspruns(defect_path, dist,):
    """
    Parse all electronically-converged ionic steps for a given SnB distortion
    folder.

    Args:
        defect_path (_type_): _description_
        dist (_type_): _description_

    Raises:
        ValueError: _description_

    Returns:
        list: list of electronically converged ionic steps.
    """
    vasprun_files = [
        f for f in os.listdir(os.path.join(defect_path, dist)) if "vasprun.xml" in f
    ]  # Get all vasprun files in the folder
    sorted_vasprun_files = sort_vaspruns_by_date(
        vasprun_files
    )  # sort by date (oldest first)
    if not sorted_vasprun_files:
        return None
    print(f"Dist {dist}, files to parse:", sorted_vasprun_files)
    all_ionic_steps = []
    # Parse each vasprun file
    for vasprun_file in [sorted_vasprun_files[0], sorted_vasprun_files[-1]]:
        #print(vasprun_file)
        try:
            vasprun = Vasprun(
                filename=os.path.join(defect_path, dist, vasprun_file),
                parse_potcar_file=False,
                parse_dos=False,
                parse_eigen=False,
                exception_on_bad_xml=False,
            )
            # Loop over ionic steps to check if electronically-converged
            # Check if calc run out of time -> last step not converged, yet n_elec_steps < NELM
            if not vasprun.converged_electronic:
                print(f"Calculation did not converge for {vasprun_file}. Removing last ionic step.")
                ionic_steps_tmp = vasprun.ionic_steps[:-1]
            else:
                ionic_steps_tmp = vasprun.ionic_steps
            # Read NELM
            try:
                nelm = vasprun.parameters["NELM"]
            except:
                raise ValueError("NELM not found in vasprun parameters")

            # Filter electronically-converged ionic steps
            for ionic_step in ionic_steps_tmp:
                if len(ionic_step["electronic_steps"]) < nelm: # Converged
                    # Check not already present in all_ionic_steps (repeat vasprun files)
                    if ionic_step not in all_ionic_steps:
                        all_ionic_steps.append(ionic_step)
                    else:
                        print(f"Ionic step already present in {vasprun_file}")
        except:
            print(f"Error reading {os.path.join(defect_path, dist, vasprun_file)} file")
    return all_ionic_steps


def parse_defects(
    base_path="../../02_Defects/01_Training",
    calculated_compositions=[],
    compositions=None,
):
    """
    Parse all ionic steps for defects in base_path, whose host
    is not in calculated_compositions.
    """
    if not compositions:
        compositions = os.listdir(base_path)
    data = {}
    for comp in os.listdir(base_path):
        if (
            os.path.isdir(os.path.join(base_path, comp))
            and comp not in calculated_compositions
            and comp in compositions
        ):
            print("\n", comp)
            for defect in tqdm(os.listdir(os.path.join(base_path, comp))):
                if (
                    os.path.isdir(os.path.join(base_path, comp, defect))
                    and defect.endswith("_0")  # Neutral defects only
                ):
                    print(defect)
                    data[f"{comp}_{defect}"] = {}
                    # Loop over distortions
                    for dist in os.listdir(os.path.join(base_path, comp, defect)):
                        if os.path.isdir(os.path.join(base_path, comp, defect, dist)):
                            #print(f"Parsing vasprun for {dist}")
                            ionic_steps = parse_distortion_vaspruns(
                                os.path.join(base_path, comp, defect), dist,
                            )
                            if ionic_steps:
                                data[f"{comp}_{defect}"][dist] = ionic_steps
    return data


def select_steps(data, number_of_steps=10):
    selected_data = {}
    for defect in data:
        selected_data[defect] = {}
        for dist, d in data[defect].items():
            selected_data[defect][dist] = []
            indexes = [
                int(i)
                for i in np.linspace(
                    start=0, stop=len(d) - 1,
                    num=number_of_steps
                )
            ]
            selected_steps = [
                d[i] for i in indexes
            ]
            for step in selected_steps:
                selected_data[defect][dist].append(step)
    return selected_data


def save_ionic_indexes(data, number_of_steps=10):
    ind_dict = {}
    for defect in data:
        ind_dict[defect] = {}
        for dist, d in data[defect].items():
            indexes = [
                int(i)
                for i in np.linspace(
                    start=0, stop=len(d) - 1,
                    num=number_of_steps
                )
            ]
            ind_dict[defect][dist] = indexes
    return ind_dict


def refactor_to_lists_with_defect(data):
    """
    Refactor data dict (matching comp -> defect -> dist -> all/selected ionic steps)
    to lists of energies, forces, stresses, structures and defect names.
    """
    processed_data = {
        "energies": [],
        "forces": [],
        "stresses": [],
        "structures": [],
        "defects": [],
    }
    for defect in data:
        for dist, d in data[defect].items():
            for step in d:
                if step["e_0_energy"] < 0:
                    processed_data["energies"].append(step["e_0_energy"])
                    processed_data["forces"].append(step["forces"])
                    processed_data["stresses"].append(
                        (np.array(step['stress']) * -0.1).tolist()
                    )  # from kPa to GPa
                    processed_data["structures"].append(step["structure"])
                    processed_data["defects"].append(defect)
    return processed_data

In [24]:
from collections import defaultdict

db = connect('HSE06_.db')

vrun_files = glob.glob("./bulk_primitive_folders/*/vasprun.xml")

for file in tqdm(vrun_files):
    
    composition = file.split('/')[-2]
    
    try:
        vrun = Vasprun(
            file,
            parse_potcar_file=False,
            parse_dos=False,
            parse_eigen=False,
        )
        assert vrun.converged_electronic == True
        assert vrun.converged_ionic == True
        
        last_step = vrun.ionic_steps[-1]
        energy = last_step['e_0_energy']
        stress = np.array(last_step['stress']) * -0.1 * units.GPa
        forces = np.array(last_step['forces'])
        atoms = last_step['structure'].to_ase_atoms()

        atoms.calc = SinglePointCalculator(
            atoms=atoms,
            energy=energy,
            forces=forces,
            stress=stress
        )
        
        id = db.reserve(host=composition, functional='HSE06', defect='pristine')
        
        if id is None:
            continue
        else:
            db.write(atoms, host=composition, functional='HSE06', defect='pristine')

    except Exception as e:
        print(e)
        print(file)

  0%|          | 0/68 [00:00<?, ?it/s]

no element found: line 5804, column 0
./bulk_primitive_folders/BiSI/vasprun.xml


In [None]:
defects = parse_defects(
    base_path="./defect_relaxations/01_Training/",
)

In [23]:
distortions.keys()

dict_keys(['Bond_Distortion_20.0%', 'Unperturbed', 'Bond_Distortion_40.0%', 'Bond_Distortion_-10.0%', 'Bond_Distortion_0.0%', 'Bond_Distortion_-60.0%', 'Bond_Distortion_-20.0%', 'Bond_Distortion_30.0%', 'Bond_Distortion_10.0%', 'Dimer', 'Bond_Distortion_-40.0%', 'Bond_Distortion_-50.0%', 'Bond_Distortion_50.0%', 'Bond_Distortion_60.0%', 'Bond_Distortion_-30.0%'])

In [25]:
for defect, distortions in defects.items():
    for distortion in distortions:
        
        step = distortions[distortion][0]
        
        energy = step['e_0_energy']
        stress = np.array(step['stress']) * -0.1 * units.GPa
        forces = np.array(step['forces'])
        atoms = step['structure'].to_ase_atoms()

        atoms.calc = SinglePointCalculator(
            atoms=atoms,
            energy=energy,
            forces=forces,
            stress=stress
        )
        
        db.write(
            atoms, 
            host=defect.split('_')[0], 
            functional='HSE06', 
            defect='_'.join(defect.split('_')[1:]),
            distortion=distortion,
            relaxed=False
        )
        
        step = distortions[distortion][-1]
        
        energy = step['e_0_energy']
        stress = np.array(step['stress']) * -0.1 * units.GPa
        forces = np.array(step['forces'])
        atoms = step['structure'].to_ase_atoms()

        atoms.calc = SinglePointCalculator(
            atoms=atoms,
            energy=energy,
            forces=forces,
            stress=stress
        )
        
        db.write(
            atoms, 
            host=defect.split('_')[0], 
            functional='HSE06', 
            defect='_'.join(defect.split('_')[1:]),
            distortion=distortion,
            relaxed=True
        )
        

In [16]:

db = connect('HSE06.db')

ids = [row.id for row in db.select() if len(row.toatoms())==0]

db.delete(ids)