# Evalaute the Band Structure
Get the band gap energy and the center of the 2p band

In [1]:
from tarfile import TarFile
from pathlib import Path
from ase.io import read
from tqdm import tqdm
import pandas as pd
import numpy as np

## Make loading functions
We need a function to load the band structure data then functions to read out the band gap energy and band centers.

In [2]:
def read_band_structure(tar_path) -> tuple[pd.DataFrame, float]:
    """"Read the band structure from a tar file of pdos files from CP2K

    Args:
        tar_path: Path to the tar file containing the data
    Returns:
        - A DataFrame containing all atom kinds and types
        - Fermi level (a.u.)
    """

    kinds = []
    with TarFile.gzopen(tar_path) as tar:
        for member in tar.getmembers():
            # Read the top of the file
            with tar.extractfile(member) as fp:
                # Header with the kind information
                first_line = fp.readline().decode().split()
                elem = first_line[6]
                e_fermi = float(first_line[-2])

                # Header with the column names
                columns = ['MO', 'Energy'] + fp.readline().decode().split()[4:]

            with tar.extractfile(member) as fp:
                data = pd.read_csv(fp, skiprows=2, sep='\s+', names=columns)
                data['elem'] = elem
                data['alpha'] = 'ALPHA' in member.name
                kinds.append(data)

    return pd.concat(kinds, axis=0, ignore_index=True), e_fermi

In [3]:
band_data, e_fermi = read_band_structure('atoms-relax/CeAlO3/2-cells/pdos.pbe-plus-u.tar.gz')

In [4]:
def get_band_center(band_data, orbitals: list[str] = ['px', 'py', 'pz']):
    """Get the band center for all available orbitals

    Args:
        band_data: Band data of interest
        orbitals: Which orbitals to sum
    Returns:
        The weighted mean of the energies of the target orbitals (a.u.)
    """

    return np.dot(
        band_data[orbitals].sum(axis=1) * 
        band_data['Occupation'],
        band_data['Energy'],
    ) / band_data[orbitals].values.sum()

In [5]:
def get_band_gap(band_data: pd.DataFrame) -> float:
    """Compute the band gap energy

    Args:
        band_gap: Band data
    Returns:
        Band gap
    """

    homo = band_data.query('Occupation > 0.99')['Energy'].max()
    lumo = band_data.query(f'Occupation < 0.01 and Energy > {homo}')['Energy'].min()
    return lumo - homo

In [6]:
get_band_gap(band_data)

0.102524

In [7]:
get_band_center(band_data.query('elem == "O"')) - e_fermi

-0.2949090599461517

## Get it for all materials
Find all tar files and save accordingly

In [8]:
pband_centers = []
for path in tqdm(Path('atoms-relax/').glob('*/2-cells/pdos.pbe-plus-u.tar.gz')):
    # Load the band structure data
    name = path.parts[1]
    band_data, e_fermi = read_band_structure(path)
    center = get_band_center(band_data.query('elem == "O"')) - e_fermi
    gap = get_band_gap(band_data)

    # Load the composition
    traj_path = path.parent / 'relax.traj'
    atoms = read(traj_path, -1)
    comp = atoms.get_chemical_formula('metal', True)
    
    pband_centers.append({
        'name': name,
        'comp': comp,
        'center': center * 27.211,  # Convert to eV
        'gap': gap * 27.211 
    })
pband_centers = pd.DataFrame(pband_centers)
print(f'Loaded {len(pband_centers)} band structures')
pband_centers.sort_values('gap').head()

114it [00:06, 16.56it/s]

Loaded 114 band structures





Unnamed: 0,name,comp,center,gap
95,YbMnO3,MnYbO3,-8.222623,0.000354
22,Yb(Mn7Mo1)O3,Mn7MoYb8O24,-8.275702,0.001333
16,KReO3,KReO3,-9.027639,0.001415
75,CaRhO3,CaRhO3,-6.84722,0.001932
53,(K7Na1)ReO3,K7NaRe8O24,-9.037867,0.00332


Save the band center data

In [9]:
pband_centers.to_csv('band-centers.csv', index=False)