# AIRSS Workflow
### Workflow that generates AIRSS metal-chalcogen inorganic structures, charge balances them with H, calculates their energetics, and ranks the structures by their energetics

## Import libraries, global variables

In [1]:
import numpy as np
import os
import ase
import subprocess as sp
from ase.io import read, write
import ase.neighborlist as nl
from ase import Atoms
from pymatgen.io.vasp.inputs import Kpoints, Poscar
from pymatgen.io.vasp.sets import MPRelaxSet
from pymatgen.core import Structure
from pymatgen.io.vasp import Vasprun
import time
import pandas as pd
import warnings

warnings.filterwarnings('ignore')

# path = "/Users/adrianaladera/Desktop/MIT/research/MOChAs/airss/AgS_test/" # local
path = "/pscratch/sd/a/aladera/airss/AgS/var2/"
os.chdir(path)
metal, chalc = 'Ag', 'S'
seed = metal + chalc
path = "/pscratch/sd/a/aladera/airss/{}/var2/".format(seed)
incar_settings_1 = {"NELMIN": 5, "ALGO": 'Normal', "ISPIN": 1, "ISMEAR": 0, "NSW": 4, "ENCUT": 600, "EDIFF" : 1e-4, "EDIFFG" : -0.001, "KPAR": 1, "NCORE": 32, "SIGMA" : 0.01, "NELM" : 150}  # user custom incar settings
incar_settings_2 = {"NELMIN": 5, "ALGO": 'Normal', "ISPIN": 1, "ISMEAR": 0, "NSW": 20, "ENCUT": 700, "EDIFF" : 1e-6, "EDIFFG" : -0.001, "KPAR": 1, "NCORE": 32, "SIGMA" : 0.01}  # user custom incar settings

## Helper Functions

In [2]:
def generate_structs(path, n_structures, seed):
    '''Generates structures with AIRSS and converts from .res to .vasp
    - path: path to main directory storing all AIRSS generated structures
    - n_structures: max number of structures to be generated
    - seed: <seed>.cell, source file for generating structures'''
    sp.run("airss.pl -vasp -build -max {} -seed {}".format(n_structures, seed), shell=True)
    cunt = 0
    for file in os.listdir(path):
        if ".res" in file:
            struct = read(file)
            write("{}.vasp".format(cunt+1), struct)
            cunt += 1
    sp.run("rm *.res", shell=True)
    print("{} VASP files generated".format(cunt))

def attach_hydrogens(path, metal, chalcogen, met_chalc_dist, chalc_H_dist, file):
    mocha = Structure.from_file("{}{}".format(path,file))
    ind_largest_v = np.argmax(mocha.lattice.abc)
    positions = []
    for atom in range(len(mocha)):
        if str(mocha[atom].species) == "{}1".format(chalcogen):
            pos = np.array(mocha[atom].coords) # coords are Cartesian
            neighbors = mocha.get_neighbors(site = mocha[atom], r = met_chalc_dist)
            for n in neighbors:
                flag = 0
                if str(n.species) == "{}1".format(metal):
                    flag = 1
                    if mocha[atom].coords[ind_largest_v] > n.coords[ind_largest_v]: # chalc above metal
                        pos[ind_largest_v] += chalc_H_dist  # add H above chalc
                    elif mocha[atom].coords[ind_largest_v] < n.coords[ind_largest_v]: # chalc below metal
                        pos[ind_largest_v] -= chalc_H_dist  # add H below chalc
                if flag:
                    break
            positions.append(pos)
    for p in positions:
        mocha.append('H', p, coords_are_cartesian=True)
    poscar = Poscar(mocha)
    poscar.write_file("{}{}{}H-{}".format(path, metal, chalcogen, file))

def create_job_dirs(path, seed): 
    '''
    - relax 4 x NSW = 4
    - then relax NSW = 20, get energies but no calculations (see Aria's notebook)
    - pick a subset based on energies and how many times each structures are repeated (based on space group)
    - relax subset until convergence + run static; final structures
    '''
    job_dirs = []
    for file in os.listdir(path):
        if ".vasp" in file and seed in file:
            sp.run("mkdir {} ".format(file[:-5]), shell=True)
            job_dirs.append(str(file[:-5]))
            sp.run("cp {} {}/POSCAR".format(file, file[:-5]), shell=True) # POSCAR
            sp.run("cp {} {}/POSCAR_0".format(file, file[:-5]), shell=True) # POSCAR
            sp.run("pot".format(file[:-5]), shell=True) # POINTS
            structure = Structure.from_file("{}/POSCAR".format(file[:-5])) # cp POSCAR, make KPOINTS, INCAR
            relax = MPRelaxSet(structure, user_incar_settings=incar_settings_1)
            relax.write_input("{}".format(file[:-5]), potcar_spec=False)
            sp.run("python pot.py {}/POSCAR".format(file[:-5]), shell=True)
        elif ".vasp" in file and seed not in file:
            sp.run("rm {}".format(file), shell=True)
    return job_dirs

def is_converged(path, seed, ionic_steps):
    '''ionic_steps - number of steps (AIRSS needs only 3 or 4 steps, considered "converged")'''
    if os.path.isfile("{}/job.out".format(path)):
        sp.run("grep \"{} F=\" {}/job.out | awk '{}print $1{}' > {}/ionic_steps".format(ionic_steps, path, '{','}', path), shell=True)
        with open("{}/ionic_steps".format(path), "r") as f:
            if str(ionic_steps) in f.read():
                return 1
            else:
                return 0
        
def is_running(path, dir_name):
    '''Checks to see if a job is currently running in a directory. If so, the job is not overridden'''
    # os.chdir(path)
    os.system("squeue -u aladera --format \"%Z\" > running_jobs")
    with open("running_jobs", 'r') as f:  
        if dir_name in f.read():
            return 1
        else:
            return 0

## Main code that calls all the functions
### Do not run until doing another test on NERSC!

In [7]:
generate_structs(path, 10, seed)

10 VASP files generated


In [34]:
generate_structs(path, 10, seed)

# for each structure, take generated structures and attach H, then write to new vasp file
for file in os.listdir(path):
    if seed not in file and ".vasp" in file:
        attach_hydrogens(path, metal, chalc, 3.0, 1.3, file)
        sp.run("rm {}".format(file), shell=True)
job_dirs = create_job_dirs(path, seed)

10 VASP files generated


/bin/sh: pot: command not found
python: can't open file 'pot.py': [Errno 2] No such file or directory
/bin/sh: pot: command not found
python: can't open file 'pot.py': [Errno 2] No such file or directory
/bin/sh: pot: command not found
python: can't open file 'pot.py': [Errno 2] No such file or directory
/bin/sh: pot: command not found
python: can't open file 'pot.py': [Errno 2] No such file or directory
/bin/sh: pot: command not found
python: can't open file 'pot.py': [Errno 2] No such file or directory
/bin/sh: pot: command not found
python: can't open file 'pot.py': [Errno 2] No such file or directory
/bin/sh: pot: command not found
python: can't open file 'pot.py': [Errno 2] No such file or directory
/bin/sh: pot: command not found
python: can't open file 'pot.py': [Errno 2] No such file or directory
/bin/sh: pot: command not found
python: can't open file 'pot.py': [Errno 2] No such file or directory
/bin/sh: pot: command not found
python: can't open file 'pot.py': [Errno 2] No suc

In [15]:
# run the calculations
cunt = 0
for j in job_dirs:
    if os.path.isdir("{}{}".format(path, j)):
        os.chdir("{}{}".format(path, j))
        os.system("vrun 1 32 4 {} > submit_note".format(j));
        os.chdir(path)
        cunt += 1
print("{} {} jobs submitted".format(cunt, seed))

0 AgS jobs submitted


### Only do this for the first round of convergence 4x4
Will overwrite with new CSV of other converged structures otherwise

In [7]:
converged = []
unconverged = []
for i in os.listdir(path):
    if os.path.isdir(i) and seed in i:
        if is_converged("{}{}".format(path, i), seed, 4):
            converged.append(i)
        else:
            unconverged.append(i)
            
print("Converged: {}, Unconverged: {}".format(len(converged), len(unconverged)))
# dfc = pd.DataFrame({"converged":converged})
# dfu = pd.DataFrame({"unconverged":unconverged})
# dfc.to_csv("{}1st_round_converged.csv".format(path), index=False)
# dfu.to_csv("{}1st_round_unconverged.csv".format(path), index=False)

Converged: 698, Unconverged: 302


### Checking how many structures are in the queue

In [3]:
# cunt = 1
# while cunt < 10:
    # time.sleep(14400)
running, queued = 0,0
# dtf = pd.read_csv("{}1st_round_converged.csv".format(path))
# converged = dtf["converged"]
for i in range(len(converged)):
    # os.chdir(path + converged[i])
    if is_running(path, converged[i]):
        running += 1
    else:
        os.chdir(path + converged[i])
        queued += 1
        os.system("cp CONTCAR POSCAR")
        for j in range(1,5):
            if not os.path.exists("POSCAR_{}".format(j)):
                os.system("cp CONTCAR POSCAR_{}".format(j))
                break
        os.system("vrun 1 32 5 {} > submit_note".format(converged[i]))
        os.chdir(path)
print("{} structures already running, {} structures queued".format(running, queued))

85 structures already running, 856 structures queued


In [None]:
# now we check the convergence again and run the incar_settings_2
converged_calcs, unconverged_calcs = check_convergence(path, seed, 4)

for i in range(len(converged_calcs)):
    os.chdir(path + converged_calcs[i])
    os.system("cp CONTCAR POSCAR_4")
    relax_2 = MPRelaxSet.from_prev_calc(".", user_incar_settings=incar_settings_2)
    relax_2.write_input("{}".format(converged_calcs[i]), potcar_spec=True)

# We need to get the energies of the structures that are converged and rank them from lowest to highest

converged_calcs = check_convergence(PATH, 20, structures)

for i in range(len(converged_calcs)):
    energies = []
    os.chdir(PATH + f"struc_{converged_calcs[i]}" + f"r2_struc_{converged_calcs[i]}") # need to change this to the correct path
    energy = Vasprun("vasprun.xml").final_energy #write energy of each structure
    energies.append(energy)

# now match the energy list and the structure name as a dataframe and sort the energies from lowest to highest

import pandas as pd

df = pd.DataFrame(list(zip(converged_calcs, energies)), columns = ["structure", "energy"])
df.sort_values(by=["energy"], inplace=True)
#more proper relaxations after analysis