In [None]:
import os
import re
import io
from pathlib import Path
import shutil
import subprocess
from scipy.constants import physical_constants, angstrom
import numpy as np
from numpy.linalg import norm
import xml.etree.ElementTree as et
import matplotlib.pyplot as plt

from ase.io import read,write
from ase.visualize import view,ngl
from ase.build import molecule
from ase.spacegroup import crystal
from ase.build import bulk
from ase.dft.kpoints import get_special_points, bandpath
import nglview as nv

### Preparation of workspace

In [None]:
clean = True  # Set to True to remove existing directories
current_dir = Path.cwd().absolute()

for task in ['TASK_0', 'TASK_0b', 'TASK_0c', 'TASK_2', 'TASK_3', 'TASK_4', 'TASK_free']:
    task_dir = current_dir / task

    if task_dir.exists():
        if clean:
            shutil.rmtree(task_dir)  # This will remove the directory and all its contents
            print(f"Removed existing directory: {task_dir}")
            task_dir.mkdir()
            print(f"Directory created: {task_dir}")
    else:
        task_dir.mkdir()
        print(f"Directory created: {task_dir}")

In [None]:
pw_exec=Path('/home/jovyan/.conda/envs/quantum-espresso-7.4/bin/pw.x')

In [None]:
# Get the Bohr radius
bohr_radius = physical_constants['Bohr radius'][0]/angstrom

In [None]:
def view_structure(structure,myvec=[]):
    t = nv.ASEStructure(structure)
    w = nv.NGLWidget(t, gui=True)
    w.add_unitcell()
    w.add_ball_and_stick()
    w.add_representation('label',label_type='atomindex',color='black')
    w.add_representation('spacefill',selection=myvec,color="blue",radius=0.5)
    return w

### Function to convert into a multiline string the atomic coordinates and the cell of an ASE Atoms object

In [None]:
def atoms2string(atoms):
    lines=[]
    for atom in atoms:
        symbol = atom.symbol  # Get the atomic symbol
        scaled_position = atoms.get_scaled_positions()[atom.index]  # Get the scaled position of the current atom
        # Format the symbol and scaled coordinates into a string and add it to the lines list
        lines.append(f"{symbol} {scaled_position[0]} {scaled_position[1]} {scaled_position[2]}")

    # Join the lines into a single string with each line separated by a newline character
    positions = "\n".join(lines)
    lines=[]
    for vector in atoms.get_cell():
        lines.append(f"{vector[0]} {vector[1]} {vector[2]}")
    cellvectors = "\n".join(lines)
    return cellvectors ,positions

### Function to extract from a QE output the total energy

In [None]:
def parse_energy(pw_out):
    pattern = r"!\s+total energy\s+=\s+(-?\d+\.\d+)"
    energies = re.findall(pattern, pw_out)
    return float(energies[-1])

### Function responsible for parsing the xml file output of QE. Reads the xml file following the provided path and retrieves the Fermi energy, the k-points,  the KS eigenvalues needed to plot the bands as well as the recirocal lattice vectors in units of $2\pi/a$

In [None]:
def read_qe_xml(xml_file):
    """
    Reads data from QE bands calculations (new XML)
    Returns:
      - kpts[i_kpt] = [kx, ky, kz] in [2*pi/a]
      - eigvals[i_kpt, i_band] in [eV]
      - fermi_en in [eV]
    """

    data_file_xml = et.parse(xml_file)
    data_file_root = data_file_xml.getroot()

    output_node = data_file_root.find('output')

    # Find fermi
    band_node = output_node.find('band_structure')
    fermi_en = float(band_node.find('fermi_energy').text)*27.21138602
    lsda = band_node.find('spinorbit').text

    kpts = []
    eigvals = []

    for kpt in band_node.findall("ks_energies"):
        k_coords = np.array(kpt.find('k_point').text.split(), dtype=float)
        kpts.append(k_coords)

        eig_vals = np.array(kpt.find('eigenvalues').text.split(), dtype=float)
        eigvals.append(eig_vals*27.21138602)
    kpts = np.array(kpts)
    eigvals = np.array(eigvals)
    
    basis_set = output_node.find('basis_set')
    b1 = np.array(list(map(float, basis_set.find('reciprocal_lattice/b1').text.strip().split())))
    b2 = np.array(list(map(float, basis_set.find('reciprocal_lattice/b2').text.strip().split())))
    b3 = np.array(list(map(float, basis_set.find('reciprocal_lattice/b3').text.strip().split())))
        
    return {'kpts':kpts,'eigvals': eigvals,'fermi_en': fermi_en,'rep_lat':np.array([b1,b2,b3]),
            'alat':float(output_node.find('atomic_structure').attrib['alat'])}

In [None]:
def add_x_tick(x_ticks,label,dkm_sum):
    if label in x_ticks:
        x_ticks[label+' ']=dkm_sum
    else:
        x_ticks[label]=dkm_sum
    return x_ticks
    

### Function to retrieve from the eigenvalues the ones belonging to a specific k-point that is provided in crystal coordinates

In [None]:
def get_eig_k(kpt_cryst,rep_lat,kpts,eigvals):
    # Check for closeness
    matching = np.all(np.isclose(kpts, kpt_cryst@rep_lat, atol=0.001), axis=1)

    # Find indices where all elements are close
    index = np.where(matching)[0]
    return eigvals[index][0]

### This function plots the eigenvalues wrt kpoints producing a bandplot. The path labels provided in input are used.

In [None]:
def plot_bands(bands_path,kpts,eigvals,fermi_en,emin,emax):
    k_arr = [0.0]

    dkm_sum = 0.0
    x_ticks = {bands_path[0]:0}
    ticks_done=1
    dk_old = kpts[1]-kpts[0]
    
    for i_k in range(1, len(kpts)):
        k0 = kpts[i_k-1]
        k1 = kpts[i_k]
        dk = k1-k0
        #update k-point label if change of direction befroe updating dkm_sum
        if not np.allclose(dk,dk_old,atol=0.001):
            add_x_tick(x_ticks,bands_path[ticks_done],dkm_sum)
            ticks_done+=1
        dk_old = dk
        dkm_sum += np.linalg.norm(dk)
        k_arr.append(dkm_sum)
        
    add_x_tick(x_ticks,bands_path[ticks_done],dkm_sum)

    plt.figure(figsize=(6, 10))
    plt.plot(k_arr, eigvals-fermi_en, 'b', lw=2.0)
    plt.xlim([0.0, np.max(k_arr)])
    #plt.ylim([np.min(eigvals-fermi_en), np.max(eigvals-fermi_en)])
    plt.ylim(emin, emax)
    plt.xticks(list(x_ticks.values()), list(x_ticks.keys()))
    for xtick in list(x_ticks.values()):
        plt.axvline(xtick, color='gray')
    plt.ylabel("energy [eV]")
    plt.show()
#plt.savefig("./task3_bands.png", dpi=200)

### The funcion executes the pw executable (path provided by pw_exec) using the input string "pw_in". The output file and standrd QE files are written in workdir

In [None]:
def run_pw(pw_in, pw_exec, workdir, file_out,live=False):
    workdir = Path(workdir)
    file_out = workdir / file_out
    pw_exec = Path(pw_exec)
    pw_inp = workdir / 'pw.inp'
    
    # Ensure workdir exists
    workdir.mkdir(parents=True, exist_ok=True)

    # Clean previous calculation data for SCF calculations
    if 'bands' not in pw_in:
        (workdir / 'mycalc.xml').unlink(missing_ok=True)
        shutil.rmtree(workdir / 'mycalc.save', ignore_errors=True)

    # Write pw_in to the input file
    pw_inp.write_text(pw_in)

    # Open the output file and run the command
    with file_out.open('w') as output_file:
        process = subprocess.Popen(
            [pw_exec, '-in', str(pw_inp)], # Pass input file as argument
            stdout=subprocess.PIPE,         # Capture stdout
            stderr=subprocess.STDOUT,       # Merge stderr with stdout
            text=True
        )

        output = []
        for line in process.stdout:
            if(live):
                print(line, end='')            # Optionally print to console
            output_file.write(line)        # Write to file
            output.append(line)

        process.wait()

    # Return the full output as a string
    return ''.join(output)

### We use this template for QE input to compute the equilibrium charge density and the band structure. Depending on the type of calculation, the set of k-points changes from a 3D grid (scf calculation) to a 1D path (bandstructure).

In [None]:
def pw_input(atoms,calc_type,kpt_mesh,workdir,ecutwfc,ecutrho):
    cell,positions = atoms2string(atoms)
    automatic='automatic'
    kpts = kpt_mesh
    if calc_type=='bands':
        automatic='crystal'
        kpoints=kpt_mesh.split() # splits 'GWXL 100' into 'GWXL' , 100
        path = atoms.cell.bandpath(kpoints[0], npoints=int(kpoints[1])) 
        
        kpts=f"{kpoints[1]}\n"
        kpts+='\n'.join(f"{kpt[0]:.8f} {kpt[1]:.8f} {kpt[2]:.8f} 1" for kpt in path.kpts )
    pw_inp=f"""&control
verbosity='high'
calculation='{calc_type}'
prefix='mycalc'
pseudo_dir = '{workdir}/../'
outdir='{workdir}/'
/
&system
ibrav=0
nat={len(atoms)},
ntyp=1,
ecutwfc = {ecutwfc},
ecutrho = {ecutrho},
occupations='smearing',
degauss=0.001
/
&electrons
conv_thr = 1.0e-8
mixing_beta = 0.5
/
&ions
/
&cell
/
ATOMIC_SPECIES
  Si  28.086  Si.pbesol-n-rrkjus_psl.1.0.0.UPF
ATOMIC_POSITIONS  crystal
{positions}
K_POINTS {automatic}
{kpts}
CELL_PARAMETERS angstrom
{cell}
"""
    return pw_inp 

## Cutoff setting for all tasks. Adapt if you use other elements (e.g. Carbon)

In [None]:
ecutwfc=30
ecutrho=240

## TASK_0: SCF for Si bulk in conventional cell (cubic, 8 atoms), $\Gamma$  point only. We check total energy and eigenvalues.

In [None]:
crystal = bulk('Si', 'diamond', a=5.43, cubic=True)

In [None]:
workdir=current_dir / 'TASK_0'

In [None]:
pw_in=pw_input(crystal,'scf','1 1 1 0 0 0',workdir,ecutwfc,ecutrho)

In [None]:
#print(pw_in)

In [None]:
scf_out0 = run_pw(pw_in,pw_exec,workdir,'pw.out',live=True)

In [None]:
ene0 = parse_energy(scf_out0)

In [None]:
#print(scf_out0)

## TASK_0b: we repeat the calculation above using a non shifted 2x2x2 grid of k-points

In [None]:
workdir=current_dir / 'TASK_0b'

In [None]:
pw_in=pw_input(crystal,'scf','2 2 2 0 0 0',workdir,ecutwfc,ecutrho)

In [None]:
scf_out0b = run_pw(pw_in,pw_exec,workdir,'pw.out')

In [None]:
ene0b = parse_energy(scf_out0b)

## TASK_0c: we repeat the calculation above using a  shifted 2x2x2 grid of k-points

In [None]:
workdir=current_dir / 'TASK_0b'

In [None]:
pw_in=pw_input(crystal,'scf','2 2 2 1 1 1',workdir,ecutwfc,ecutrho)

In [None]:
scf_out0c = run_pw(pw_in,pw_exec,workdir,'pw.out')

In [None]:
ene0c = parse_energy(scf_out0c)

## TASK_2 now we use a 2x2x2 supercell and only the $\Gamma$ k-point  (In the lecture notes this example is done with a 3x3x3 supercell to compare with TASK_1 here we compare with TASK_0b)

In [None]:
workdir=current_dir / 'TASK_2'

In [None]:
crystal = bulk('Si', 'diamond', a=5.43, cubic=True).repeat((2,2,2))

In [None]:
pw_in=pw_input(crystal,'scf','1 1 1 0 0 0',workdir,ecutwfc,ecutrho)

In [None]:
scf_out2 = run_pw(pw_in,pw_exec,workdir,'pw.out')

In [None]:
ene2 = parse_energy(scf_out2)

In [None]:
print(f"Energy single cell 2x2x2 ptk: {ene0b} Energy/8 2x2x2 cell $\Gamma$ poit only: {ene2/8}" )

## TASK_3 we use the primitive cell for bulk silicon, we perform an accurate scf (5x5x5 grid of k-points) and we compute the bands starting from the accurate scf.

In [None]:
workdir=current_dir / 'TASK_3'

In [None]:
crystal = bulk('Si', 'diamond', a=5.43)

In [None]:
pw_in=pw_input(crystal,'scf','5 5 5 1 1 1',workdir,ecutwfc,ecutrho)

In [None]:
scf_out3 = run_pw(pw_in,pw_exec,workdir,'pw.out')

In [None]:
bands_path='LGXWL'
npoints=50
pw_inb=pw_input(crystal,'bands',bands_path+' '+str(npoints),workdir,ecutwfc,ecutrho)

In [None]:
scf_out3b = run_pw(pw_inb,pw_exec,workdir,'bands.out')

In [None]:
xml_data = read_qe_xml(f"{workdir}/mycalc.xml")
plot_bands('LGXWL',xml_data['kpts'], xml_data['eigvals'], xml_data['fermi_en'],-13,4)

## TASK_4 we repeat the calculation above with an unphisically large lattice parameter

In [None]:
workdir=current_dir / 'TASK_4'
crystal = bulk('Si', 'diamond', a=15)
pw_in=pw_input(crystal,'scf','2 2 2 1 1 1',workdir,ecutwfc,ecutrho)
scf_out4 = run_pw(pw_in,pw_exec,workdir,'pw.out')
bands_path='LGXWL'
npoints=50
pw_inb=pw_input(crystal,'bands',bands_path+' '+str(npoints),workdir,ecutwfc,ecutrho)
scf_out4b = run_pw(pw_inb,pw_exec,workdir,'bands.out')

In [None]:
xml_data = read_qe_xml(f"{workdir}/mycalc.xml")
plot_bands('LGXWL',xml_data['kpts'], xml_data['eigvals'], xml_data['fermi_en'],-13,4)

# Playing around: convergence test, cell/geometry relaxation

### Let's check for example the bandgap. For bulk Si  the top of the valence band is at $\Gamma$ and the bottom of the conduction band is close to X (we take X for simplicity).

In [None]:
path = crystal.cell.bandpath('GX', npoints=2)

In [None]:
path.kpts

### We need the vectors of the reciprocal lattice from the bands output:

In [None]:
xml_data = read_qe_xml(f"{workdir}/mycalc.xml")
rep_lat = xml_data['rep_lat']
eigvals=xml_data['eigvals']
kpts=xml_data['kpts']

In [None]:
eig_G = get_eig_k(np.array([0. , 0. , 0. ]),rep_lat,kpts,eigvals)

In [None]:
eig_X=get_eig_k(np.array([0.5, 0. , 0.5]),rep_lat,kpts,eigvals)

In [None]:
gap = eig_X[4] - eig_G[3]

In [None]:
gap

In [None]:
bands_path='GX'
npoints=2
gaps=[]
scf_out=[]
bands_out=[]
xml_data=[]
gap=[]
workdir=current_dir / 'TASK_free'
crystal = bulk('Si', 'diamond', a=5.43)
for sampling in ['1 1 1 0 0 0','2 2 2 0 0 0','2 2 2 1 1 1','5 5 5 1 1 1']:   
    pw_in=pw_input(crystal,'scf',sampling,workdir,ecutwfc,ecutrho)
    scf_out.append(run_pw(pw_in,pw_exec,workdir,'pw.out'))
    pw_inb=pw_input(crystal,'bands',bands_path+' '+str(npoints),workdir,ecutwfc,ecutrho)
    bands_out.append(run_pw(pw_inb,pw_exec,workdir,'bands.out'))
    xml_data.append(read_qe_xml(f"{workdir}/mycalc.xml"))
    rep_lat = xml_data[-1]['rep_lat']
    eigvals=xml_data[-1]['eigvals']
    kpts=xml_data[-1]['kpts']
    vbm=get_eig_k(np.array([0. , 0. , 0. ]),rep_lat,kpts,eigvals)[3]
    cbm=get_eig_k(np.array([0.5, 0. , 0.5]),rep_lat,kpts,eigvals)[4]
    gap.append(cbm - vbm)

In [None]:
gap

### Example of cell (and geometry) optimization

In [None]:
workdir=current_dir / 'TASK_free'
crystal = bulk('Si', 'diamond', a=5.43)
pw_in=pw_input(crystal,'vc-relax','5 5 5 1 1 1',workdir,ecutwfc,ecutrho)
relax_out = run_pw(pw_in,pw_exec,workdir,'pw.out')

### Extract equilibrium geometry:

In [None]:
# Use StringIO to simulate a file
#out = io.StringIO(relax_out)

# Read from the simulated file into an ASE Atoms object
#atoms = read(out, format='espresso-out')