# Compute Number of Basis Functions and Composition of Molecule in XYZ file

Assumes the XYZ file stores the geometry in Angstrom.

In [None]:
from nbasis_analysis import *

## Single Molecue: Calculate the number of basis functions

In [10]:
## Just want to know how many basis functions? ##

# molecule
ff = "benzene.xyz"

# Specify the basis set
basis = 'def2-tzvp'

elements_numbers, elements_symbols = get_elements_from_xyz_file(ff)

# Retrieve the basis set for the elements
basis_set_bse = bse.get_basis(basis, elements=elements_numbers)

nbasis = calculate_nbasis(elements_numbers,basis_set_bse)
print(f"Number of basis functions: {nbasis}")

Number of basis functions: 90


## Set of Geometries: collate data of basis and atom types

In [5]:
def create_default_mol_dict():
    output_dict = {
        #"geom_file": "",
        "molecule": [],
        "unique_atoms": [],
        "molecule_num": [],
        "unique_atoms_num": [],
        "NAtoms": 0,
        "NAtomTypes": 0,
        "heaviest_element": 0,
        "transition_metal_present": False,
        "lanthanide_present": False,
        "actinide_present": False,
        "Basis": '',
        "NBasis": 0,
        "highest_angular_momentum": '',
        "ECP": False
    }
    return output_dict

In [13]:
import glob

# Where are the molecules?
geomFileList = glob.glob("*.xyz") 

# Specify the basis set being used
basis = 'def2-tzvp'

output_dict = {}
icount = 0
molecule_dicts = {}

# Loop over all geometry files:
for g in geomFileList:
    p = create_default_mol_dict()

    # get the elements from the Geometry file: return Atomic Numbers and Symbols
    elements_numbers, elements_symbols = get_elements_from_xyz_file(g)
    p["molecule"] = elements_symbols
    p["molecule_num"] = elements_numbers
    
    # get NAtoms from length (could check symbols and num length are the same!)
    p["NAtoms"] = len(p["molecule"])
    p['unique_atoms'] = set(p["molecule"])
    p['unique_atoms_num'] = set(p["molecule_num"])
    
    # Number of atom types
    p["NAtomTypes"] = len(p['unique_atoms_num'])
    p["heaviest_element"] = max(p['unique_atoms_num'])
    
    # Check for Transition Metals/Lanthanides/Actinides
    p["transition_metal_present"] = contains_transition_metal(elements_numbers)
    p["lanthanide_present"] = contains_lanthanide(elements_numbers)
    p["actinide_present"] = contains_actinide(elements_numbers)
    
    # Basis Set Info: 
    # Get the basis set for the elements
    basis_set_bse = bse.get_basis(basis, elements=elements_numbers)
    # Get total number of basis functions for the molecule
    p["NBasis"] = calculate_nbasis(elements_numbers,basis_set_bse)
    p["Basis"] = basis
    # Find the highest angular momentum basis function used for this molecule
    p["highest_angular_momentum"] = find_atom_highest_ang_mom(basis, p["heaviest_element"])
    # Does this molecule use ECP?
    if(uses_ecp(basis,p["heaviest_element"]) == True):
        p["ECP"] = True
    
    # add to the list of dicts
    molecule_dicts[g] = p

    

In [14]:
# Check the result
print(molecule_dicts['h2o.xyz']['NBasis'])

19


In [15]:
# Print whole set (if you have multiple geometries)
molecule_dicts

{'10055627.xyz': {'molecule': ['Si',
   'Si',
   'Si',
   'O',
   'O',
   'O',
   'O',
   'O',
   'N',
   'N',
   'N',
   'N',
   'N',
   'C',
   'C',
   'C',
   'C',
   'C',
   'C',
   'C',
   'C',
   'C',
   'C',
   'C',
   'C',
   'C',
   'C',
   'C',
   'C',
   'C',
   'C',
   'C',
   'C',
   'C',
   'C',
   'C',
   'C',
   'C',
   'C',
   'C',
   'C',
   'C',
   'C',
   'C',
   'C',
   'C',
   'C',
   'C',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H',
   'H'],
  'unique_atoms': {'C', 'H', 'N', 'O', 'Si'},
  'molecule_num': [14,
   14,
   14,
   8,
   8,
   8,
   8,
   8,