In [1]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
###################### Change this line before release! #######################
import constants as cst
###############################################################################
global cst

# __all__ = [
#     "read_output",
# ]

# def read_output(output, read_eigenvector=True, unit='HARTREE'):
#     """
#     """
    
class Output_reader:
    """
    Class Output_reader opens, reads a single file and returns to the following
    information:
    
    * Dimensionality
    * Lattice
    * Supercell expansion matrix
    * Total energy
    * Number, fractional coordinates and weight of q grid in reciprocal space
    * Number, frequency, symmetry and eigenvectors of phonon modes at q
    """
    
    def __init__(self, output, read_eigenvector=True):
        """
        Check if the output exists, if it is a phonon output and if it is
        finished.
        
        The identifier for phonon output:
        *  phonon       -
        
        The identifier for eigencectors output:
        *  eigenvectors -
        
        The identifier for normal termination:
          Job Finished at
          
        Input:
            output (string)
                Output file name
            read_eigenvector (bool)
                Whether to read eigenvectors from file
        Output:
            self.ncalc (int)
                Number of calculations
            self.dimension (1*1 list of int)
                Dimension of the computed system. Appears once in all cases.
            self.structure (nCalc*1 list of Pymatgen structures)
                Geometric information of the computed system. Unit: Solver
            self.supercell (1*1 list of 3*3 array)
                Supercell matrix corresponds to SUPERCELL / SCELPHONO. Appears
                once in all cases.
            self.eint (nCalc*1 list of floats)
                Internal energy (DFT total energy). Unit: Solver
            self.nqpoint (nCalc*1 list of ints)
                Number of q points sampled for phonon dispersion
            self.qpoint (nCalc*1 list of nqpoint*3 array)
                Fractional coordinates of sampled q points
            self.nmode (nCalc*1 list of nqpoint*1 array)
                Number of modes at sampled q points
            self.frequency (nCalc*1 list of nqpoint*nmode array)
                Frequencies of phonon modes at sampled q points. Unit: Solver
            self.eigenvector (nCalc*1 list of nqpoint*nmode*natom*3 array)
                Eigenvectors of phonon modes at sampled q points. Norm of each
                mode is normalized to 1.
        """
        import re
        import os
        import warnings
        
        # File existance. Must be a full name
        if not os.path.isfile(output):
            err_msg = 'Specified file \'' + output + '\' not found.'
            raise FileNotFoundError(err_msg)

        file = open(output, 'r', errors='ignore')
        data = file.readlines()
        file.close()
        
        # Initial scan - clean exception messages, check file

        is_normal_termination = False
        is_freq = False
        is_eigvt = False
        for idx_line in range(len(data) - 1, -1, -1):
            if re.search(r'\s*Note:[\s\S]+exceptions', data[idx_line]):
                del data[idx_line]
                continue

            # Normal termination
            elif re.match(r'^\s+Job\sFinished\sat', data[idx_line]):
                is_normal_termination = True
                continue

            # Phonon option activated
            elif re.match(r'^\s*\*\s+phonon\s+\-', data[idx_line]):
                is_freq = True
                continue
            
            # (Optional) Eigenvector option activated
            elif re.match(r'^\s*\*\s+eigenvectors\s+\-', data[idx_line]):
                is_eigvt = True
                continue

        if not is_normal_termination:
            err_msg = 'Specified file \'' + output + '\' is interrupted.'
            raise Exception(err_msg)

        if not is_freq:
            err_msg = 'Specified file \'' + output + '\' is not a frequency output.'
            raise Exception(err_msg)
        
        if read_eigenvector and not is_eigvt:
            warnings.warn("GULP option 'eigenvectors' not found. 'read_eigenvector' is set to 'False'.", stacklevel=2)
        
        self.ncalc, self.dimension, lattice, atom, self.supercell, self.eint, \
        self.nqpoint, self.qpoint, self.nmode, self.frequency, self.eigenvector \
            = self.auto_read(data, read_eigenvector)        
        
        # Combine lattice and atomic information into Pymatgen structure
        self.structure = []
        for idx_calc in range(self.ncalc):
            self.structure.append(self.get_structure(lattice[idx_calc], atom[idx_calc]))

        # Clean imaginary modes. Threshold: > 0.01 cm^-1
        self.clean_imaginary(threshold=-1e-2)
        
    def auto_read(self, data, read_eigenvector):
        """
        Auto scan the whole file for only once. 
        
        Input:
            data (list of string)
                Line-by-line list of the input file
            read_eigenvector (bool)
        Output:
            ncalc (int)
            dimension (nCalc*1 list of int)
            lattice (nCalc*1 list of Pymatgen lattice)
            atom (nCalc*1 list of 2*1 list)
                1st element: species (natom*1 list)
                2nd element: coord (natom*3 array)
            supercell (nCalc*1 list of 3*3 array)
            eint (nCalc*1 list of floats)
            nqpoint (nCalc*1 list of ints)
            qpoint (nCalc*1 list of nqpoint*4 array)
            nmode (nCalc*1 list of nqpoint*1 array)
            frequency (nCalc*1 list of nqpoint*nmode array)
            eigenvector (nCalc*1 list of nqpoint*nmode*natom*3 array)
        """
        import re
        import numpy as np
        
        # Get ncalc (number of configurations)
        idx_line = 0
        while not re.match(
            r'^\s+Total number of configurations', data[idx_line]
        ):
            idx_line += 1
        
        ncalc = int(data[idx_line].strip().split()[6])
        
        # Scan over the file
        idx_line = 0
        dimension = [0 for i in range(ncalc)]
        lattice = [0. for i in range(ncalc)]
        atom = [[] for i in range(ncalc)]
        supercell = [np.eye(3) for i in range(ncalc)]
        eint = [0. for i in range(ncalc)]
        nqpoint = [0 for i in range(ncalc)]
        qpoint = [[] for i in range(ncalc)]
        nmode = [[] for i in range(ncalc)]
        frequency = [[] for i in range(ncalc)]
        eigenvector = [[] for i in range(ncalc)]
        while idx_line < len(data):
            # Get the index of configuration
            if re.match(r'^\*\s+(?:In|Out)put for [Cc]onfiguration', data[idx_line]):
                idx_calc = int(re.search('[0-9]+', data[idx_line]).group(0)) - 1

            # Get dimensionality
            elif re.match(r'^\s+Dimensionality = ', data[idx_line]):
                dimension[idx_calc] = int(data[idx_line].strip().split()[2])
            
            # Get supercell expansion matrix
            elif re.match(r'^\s+Supercell dimensions :', data[idx_line]):
                line = data[idx_line].strip().split()
                idx_line += 1
                
                if dimension[idx_calc] == 3:
                    supercell[idx_calc][0, 0] = line[5]
                    supercell[idx_calc][1, 1] = line[8]
                    supercell[idx_calc][1, 1] = line[11]
                elif dimension[idx_calc] == 2:
                    supercell[idx_calc][0, 0] = line[5]
                    supercell[idx_calc][1, 1] = line[8]
                elif dimension[idx_calc] == 1:
                    supercell[idx_calc][0, 0] = line[5]
            
            # Get lattice matrix
            elif re.match(r'^[\s\S]+Cartesian lattice vectors', data[idx_line]):
                idx_line, lattice[idx_calc] = self.read_lattice(data, idx_line, dimension[idx_calc])

            # Get atomic coordinates
            elif re.match(r'^[\s\S]+[Ff]ractional coordinates of', data[idx_line]):
                idx_line, atom[idx_calc] = self.read_atom(data, idx_line)

            # Get internal energy
            elif re.match(r'^\s+Total lattice energy\s+=[\s\S]+eV', data[idx_line]):
                eint[idx_calc] = float(data[idx_line].strip().split()[4]) / cst.ev
            
            # Get q point
            elif re.match(r'^\s+Brillouin zone sampling points :',  data[idx_line]):
                idx_line, nqpoint[idx_calc], qpoint[idx_calc] = \
                    self.read_qpoint(data, idx_line)
                
            # Get phonons
            elif re.match(r'^\s+K point\s+[0-9]+\s=[\s\.\-0-9]+Weight\s=', data[idx_line]):
                idx_line, nmode[idx_calc], frequency[idx_calc], eigenvector[idx_calc] = \
                    self.read_phonon(data, idx_line, nqpoint[idx_calc], read_eigenvector)

            idx_line += 1
        
        return ncalc, dimension, lattice, atom, supercell, eint, nqpoint, qpoint, nmode, frequency, eigenvector
    
    def read_lattice(self, data, idx_line, dimension):
        """
        Get lattice matrix
        
        Input:
            data (list of string)
            idx_line (int)
                Line counter
            dimension (int)
                Dimensionality
        Output:
            idx_line (int)
            cell (Pymatgen Lattice)
        """
        import numpy as np
        from pymatgen.core.lattice import Lattice
        
        vecs = np.zeros([3, 3], dtype=float)
        pbc = {
            1: (True, False, False),
            2: (True, True, False),
            3: (True, True, True)
        }
        
        idx_line += 2
        vecs[0, :] = data[idx_line].strip().split()[0:3]
        idx_line += 1
        vecs[1, :] = data[idx_line].strip().split()[0:3]
        idx_line += 1
        vecs[2, :] = data[idx_line].strip().split()[0:3]
        cell = Lattice(vecs / cst.ang, pbc[dimension])
        
        return idx_line, cell

    def read_atom(self, data, idx_line):
        """
        Get label and internal coordinates of atoms
        
        Input:
            data (list of string)
            idx_line (int)
        Output:
            idx_line (int)
            atom (2*1 list)
        """
        import re
        import numpy as np

        idx_line += 6
        coord = []
        species = []
        while not re.match(r'^\-+', data[idx_line]):
            line = re.split('[\s\*]+', data[idx_line].strip())
            coord.append(line[3:6])
            species.append(re.match(r'^[A-Za-z]+', line[1]).group(0))
            idx_line += 1
        
        coord = np.array(coord, dtype=float)
        atom = [species, coord]

        return idx_line, atom
    
    def read_qpoint(self, data, idx_line):
        """
        Get the number, fractional coordinate and weight of sampling q points 
        
        Input:
            data (list of string)
            idx_line (int)
        Output:
            idx_line (int)
            nqpoint (int)
            qpoint (nqpoint*4 array)
        """
        import re
        import numpy as np
        
        idx_line += 5
        qpoint = []
        while not re.match(r'^\-+', data[idx_line]):
            qpoint.append(data[idx_line].strip().split()[1:])
            idx_line += 1
            
        qpoint = np.array(qpoint, dtype=float)
        nqpoint = np.shape(qpoint)[0]
        
        return idx_line, nqpoint, qpoint
    
    def read_phonon(self, data, idx_line, nqpoint, read_eigenvector):
        """
        Get the number, frequency and eigenvectors of phonons at each q point
        
        Input:
            data (list of string)
            idx_line (int)
            nqpoint (int)
            read_eigenvector (bool)
        Output:
            idx_line (int)
            nmode (nqpoint*1 array)
            frequency (nqpoint*nmode array)
            eigenvector (nqpoint*nmode*natom*3 array)
        """
        import re
        import numpy as np
        
        idx_q = 0
        nmode = np.zeros(nqpoint, dtype=int)
        frequency = [[] for i in range(nqpoint)]
        eigenvector = [[] for i in range(nqpoint)]
        freq_q = []
        eigvt_q = []
        while idx_q < nqpoint:
            idx_line += 1
            if re.match(r'^\s+K point\s+[0-9]+\s=[\s\.\-0-9]+Weight\s=', data[idx_line]) \
            or re.match(r'^\s+Phonon properties', data[idx_line]):
                frequency[idx_q] = np.array(freq_q, dtype=float)
                nmode[idx_q] = len(frequency[idx_q])
                freq_q = []
                if read_eigenvector:
                    eigvt_q = np.array(eigvt_q, dtype=float).transpose()
                    eigvt_q.reshape(eigvt_q, [len(freq_q), int(len(eigvt_q[0]) / 3), 3])
                    eigenvector[idx_q] = eigvt_q
                    eigvt_q = []

                idx_q += 1

            elif re.match(r'^\s+[0-9\-]+\.[0-9]{2}', data[idx_line]):
                freq_q += data[idx_line].strip().split()
                
            elif re.match(r'^\s+Frequency\s+[0-9\.\-]+', data[idx_line]):
                freq_q += data[idx_line].strip().split()[1:]
            
            elif read_eigenvector \
            and re.match(r'^\s+[0-9]+\s[xyz]\s+[0-9\-\.]+', data[idx_line]):
#################### Complex eigenvectors?
                eigvt_q.append(data[idx_line].strip().split()[2:])
#####################################
        frequency = np.array(frequency, dtype=float) / cst.cm_1
        eigenvector = np.array(eigenvector, dtype=float)
        # Normalize eigenvectors of each mode to 1
        if read_eigenvector:
            for idx_q, eigv_q in enumerate(eigenvector):
                for idx_m, eigv_m in enumerate(eigv_q):
                    eigenvector[idx_q, idx_m] /= np.linalg.norm(eigv_m)

        return idx_line, nmode, frequency, eigenvector
    
    def get_structure(self, cell, atom):
        """
        Combine the lattice and internal coordinate information from GULP 
        output file into Pymatgen structure object.
        
        Input:
            cell (Pymatgen Lattice)
            atom (2*1 list)
        Output:
            struc (Pymatgen Structure)
        """
        from pymatgen.core import Structure
        
        struc = Structure(lattice=cell, species=atom[0], coords=atom[1])
        
        return struc
    
    def clean_imaginary(self, threshold):
        """
        Clean imaginary modes, or any mode with frequency lower than threshold,
        and corresponding eigenvectors with numpy NaN format and raise warning.

        Input:
            threshold (float)
                Threshold of frequencies. Unit: cm^-1
        Output:
            self.nmode (nCalc*1 list of nqpoint*1 array)
            self.frequency (nCalc*1 list of nqpoint*nmode array)
            self.eigenvector (nCalc*1 list of nqpoint*nmode*natom*3 array)
        """
        import numpy as np
        import warnings
        
        threshold /= cst.cm_1
        ncalc = len(self.nmode)
        
        for idx_calc in range(ncalc):
            if np.all(self.frequency[idx_calc] >= threshold):
                continue

            warnings.warn('Imaginary mode detected. Frequency, symmetry and eigenvector aresubstituted by numpy.nan.',
                          stacklevel=2)
            idx_img = np.where(self.frequency[idx_calc] < threshold)
            self.frequency[idx_calc][idx_img] = np.nan
            if self.eigenvector[idx_calc].size:
                self.eigenvector[idx_calc][idx_img] = np.nan

        return self

In [2]:
import constants as cst

output = Output_reader('F1-gamma.gout', read_eigenvector=False)
for freq in output.frequency[0][0]:
    print(freq * cst.thz)

-0.0
-0.0
0.0
1.0897455848300002
1.1266200571640002
1.510654195862
1.580206046118
2.0424860163539997
2.116234961022
2.202575188926
2.373456889986
2.445407079906
2.563825100816
2.8159505579940007
2.850126898206
2.90498891802
2.929871692034
3.1082482045440005
3.296517868168
3.3576755296000003
3.4365209460540003
3.4464140971679997
3.4509109840380003
3.637381892914
3.6877470258580005
3.737212781428
3.740810290924
4.365877565854
4.423137925332
4.51937130435
4.69624885457
4.950173066496001
5.933792121194
6.033023424792
6.266861542032
6.270159259070001
6.390675827186
6.425451752314
6.911115534274001
7.094288726112
7.404573920142001
7.419563543042001
7.839272984242001
8.043131855682
9.540295390934
9.752848243656
9.753148036114
9.775032885548
10.569183106790002
10.63963433442
10.97690084967
11.043154982888002
11.345645573010001
11.413698460976
12.283096589176001
12.368837232164001
12.820324673912001
12.825121353240002
12.873387938978
12.928849543707999
14.601691459348
14.627773403194002
14.6475

In [6]:
import crystal

_, _, _, _, _, _, _, frequency, _, _ = crystal.read_output('f1-r0freq.out', read_eigenvector=False)

for freq in frequency[0][0]:
    print(freq * cst.thz)

0.0
0.0
0.0
1.0304
1.2306
1.5365
1.6430000000000002
1.6977
1.8721
2.1338
2.221
2.2324
2.3595
2.5566
2.5586
2.7860999999999994
2.8327
2.9938
3.0093
3.1291
3.1473
3.1856
3.3159
3.3258
3.4868
3.5048
3.6001
3.7779
4.0568
4.3416
4.3921
4.6319
4.7869
4.8288
4.8551
4.8838
5.450199999999999
5.5184
6.0243
6.184499999999999
6.7777
6.8931
6.9396
7.0179
10.2718
10.4081
10.4157
10.4677
10.7718
10.8114
11.5232
11.564799999999998
12.4201
12.443099999999998
12.5095
12.5456
13.0306
13.104
13.1225
13.1787
14.5325
14.604
14.8334
14.8397
15.813
15.833299999999998
15.845
15.8516
16.5551
16.7278
16.7892
16.798
19.2278
19.2624
19.2839
19.3477
19.6539
19.715
19.7814
19.87
20.5073
20.5269
20.5424
20.546
22.448
22.4704
22.5183
22.5221
22.6658
22.7282
22.8261
22.829400000000003
23.0555
23.1902
23.4712
23.6664
25.313
25.3452
25.3824
25.4066
25.612300000000005
25.799099999999996
25.8987
25.957
26.4341
26.4797
26.5308
26.6203
27.263
27.278399999999998
27.330799999999996
27.3421
29.173399999999997
29.232800000000005