Parser for basis sets from the [Basis Set Exchange](https://www.basissetexchange.org/) in JSON format

In [5]:
import json

example_data = """{
    "molssi_bse_schema": {
        "schema_type": "complete",
        "schema_version": "0.1"
    },
    "revision_description": "Data from Gaussian09",
    "revision_date": "2018-06-19",
    "elements": {
        "1": {
            "electron_shells": [
                {
                    "function_type": "gto",
                    "region": "",
                    "angular_momentum": [
                        0
                    ],
                    "exponents": [
                        "0.3425250914E+01",
                        "0.6239137298E+00",
                        "0.1688554040E+00"
                    ],
                    "coefficients": [
                        [
                            "0.1543289673E+00",
                            "0.5353281423E+00",
                            "0.4446345422E+00"
                        ]
                    ]
                }
            ],
            "references": [
                {
                    "reference_description": "STO-3G Minimal Basis (3 functions/AO)",
                    "reference_keys": [
                        "hehre1969a"
                    ]
                }
            ]
        },
        "8": {
            "electron_shells": [
                {
                    "function_type": "gto",
                    "region": "",
                    "angular_momentum": [
                        0
                    ],
                    "exponents": [
                        "0.1307093214E+03",
                        "0.2380886605E+02",
                        "0.6443608313E+01"
                    ],
                    "coefficients": [
                        [
                            "0.1543289673E+00",
                            "0.5353281423E+00",
                            "0.4446345422E+00"
                        ]
                    ]
                },
                {
                    "function_type": "gto",
                    "region": "",
                    "angular_momentum": [
                        0,
                        1
                    ],
                    "exponents": [
                        "0.5033151319E+01",
                        "0.1169596125E+01",
                        "0.3803889600E+00"
                    ],
                    "coefficients": [
                        [
                            "-0.9996722919E-01",
                            "0.3995128261E+00",
                            "0.7001154689E+00"
                        ],
                        [
                            "0.1559162750E+00",
                            "0.6076837186E+00",
                            "0.3919573931E+00"
                        ]
                    ]
                }
            ],
            "references": [
                {
                    "reference_description": "STO-3G Minimal Basis (3 functions/AO)",
                    "reference_keys": [
                        "hehre1969a"
                    ]
                }
            ]
        }
    },
    "version": "1",
    "function_types": [
        "gto"
    ],
    "names": [
        "STO-3G"
    ],
    "tags": [],
    "family": "sto",
    "description": "STO-3G Minimal Basis (3 functions/AO)",
    "role": "orbital",
    "auxiliaries": {},
    "name": "STO-3G"
}"""

###Opening from file
def load_basis_set(atomic_number, basis_set_file):
    '''
    Load basis set from json file and creates a dictionary of exponents and coefficients for a given atomic number
    Args:
        atomic_number (int): Atomic number of element
        basis_set_file (str): Path to basis set json file

    Returns:
        basis_functions (list): List of dictionaries with exponents and coefficients
    
    TODO:
        - Add error handling
        - Implement input of entire molecule and looping inside instead of inputting atomic number from external loop 
    '''
    with open(basis_set_file) as f:
        basis_set = json.load(f)
    element_data = basis_set['elements'][str(atomic_number)]

    basis_functions = []
    for shell in element_data['electron_shells']:
        exponents = [float(e) for e in shell['exponents']]
        coefficients = [[float(c) for c in coef] for coef in shell['coefficients']]
        basis_functions.append({'exponents': exponents, 'coefficients': coefficients})
    return basis_functions

###Opening from string
def load_basis_set_test(atomic_number, basis_set_string):
    '''
    Test version of above function for loading from JSON string within python code
    '''
    basis_set = json.loads(basis_set_string)
    element_data = basis_set['elements'][str(atomic_number)]

    basis_functions = []
    for shell in element_data['electron_shells']:
        exponents = [float(e) for e in shell['exponents']]
        coefficients = [[float(c) for c in coef] for coef in shell['coefficients']]
        basis_functions.append({'exponents': exponents, 'coefficients': coefficients})
    return basis_functions

molecule = [1,8]

molecule_w_basis = []
for atom in molecule:
    ao_basis = load_basis_set_test(atom, example_data)
    print(ao_basis)
    molecule_w_basis.append([atom, ao_basis])
print(molecule_w_basis)


[{'exponents': [3.425250914, 0.6239137298, 0.168855404], 'coefficients': [[0.1543289673, 0.5353281423, 0.4446345422]]}]
[{'exponents': [130.7093214, 23.80886605, 6.443608313], 'coefficients': [[0.1543289673, 0.5353281423, 0.4446345422]]}, {'exponents': [5.033151319, 1.169596125, 0.38038896], 'coefficients': [[-0.09996722919, 0.3995128261, 0.7001154689], [0.155916275, 0.6076837186, 0.3919573931]]}]
[[1, [{'exponents': [3.425250914, 0.6239137298, 0.168855404], 'coefficients': [[0.1543289673, 0.5353281423, 0.4446345422]]}]], [8, [{'exponents': [130.7093214, 23.80886605, 6.443608313], 'coefficients': [[0.1543289673, 0.5353281423, 0.4446345422]]}, {'exponents': [5.033151319, 1.169596125, 0.38038896], 'coefficients': [[-0.09996722919, 0.3995128261, 0.7001154689], [0.155916275, 0.6076837186, 0.3919573931]]}]]]


In [30]:
import json
import os

class BasisFunction:
    def __init__(self, angular_momentum, exponents, coefficients):
        self.angular_momentum = angular_momentum
        self.exponents = exponents
        self.coefficients = coefficients

    def __repr__(self):
        return f"BasisFunction(angular_momentum={self.angular_momentum}, exponents={self.exponents}, coefficients={self.coefficients})"

def load_basis_set(atomic_number, basis_set_file):
    '''
    Load basis set from json file and creates a list of BasisFunction instances for a given atomic number
    Args:
        atomic_number (int): Atomic number of element
        basis_set_file (str): Path to basis set json file

    Returns:
       electron_shell (list): List of BasisFunction instances with exponents and coefficients for a given electron shell of a given atomic number

    Raises:
        FileNotFoundError: If the file does not exist
        ValueError: If the file content is not as expected or atomic number is not found
    '''
    if os.path.exists(basis_set_file):
        with open(basis_set_file, 'r') as f:
            basis_set = json.load(f)
    else:
        raise FileNotFoundError(f"File not found: {basis_set_file}")
    
    if 'elements' in basis_set:
        if str(atomic_number) in basis_set['elements']:
            element_data = basis_set['elements'][str(atomic_number)]
            electron_shell = []
            for shell in element_data.get('electron_shells', []):
                angular_momentum = [int(l) for l in shell.get('angular_momentum', [])]
                exponents = [float(e) for e in shell.get('exponents', [])]
                coefficients = [[float(c) for c in coef] for coef in shell.get('coefficients', [])]
                electron_shell.append(BasisFunction(angular_momentum, exponents, coefficients))
            return electron_shell
        else:
            raise ValueError(f"Atomic number {atomic_number} not found in the basis set file.")
    else:
        raise ValueError("Invalid basis set file format.")

def load_basis_set_for_molecule(atomic_numbers, basis_set_file):
    '''
    Load basis set from json file and creates a dictionary of atomic number to list of BasisFunction instances
    Args:
        atomic_numbers (list): List of atomic numbers of elements in the molecule
        basis_set_file (str): Path to basis set json file

    Returns:
        basis_set_for_molecule (dict): Dictionary with atomic numbers as keys and lists of BasisFunction instances as values
    '''
    #TODO: Error handling
    
    basis_set_for_molecule = {}
    for atomic_number in atomic_numbers:
        basis_set_for_molecule[atomic_number] = load_basis_set(atomic_number, basis_set_file)
    
    return basis_set_for_molecule


In [31]:
atomic_numbers = [1, 8]  # Example atomic numbers for H, C, and O
basis_set_file = "sto-3g_h_o.json"

basis_set_data = load_basis_set_for_molecule(atomic_numbers, basis_set_file)
for atomic_number, basis_functions in basis_set_data.items():
    print(f"Atomic number: {atomic_number}")
    for bf in basis_functions:
        exponents = bf.exponents
        print(exponents)
        print(bf)


for atomic_number, basis_functions in basis_set_data.items():
    print(atomic_number, basis_functions)

Atomic number: 1
[3.425250914, 0.6239137298, 0.168855404]
BasisFunction(angular_momentum=[0], exponents=[3.425250914, 0.6239137298, 0.168855404], coefficients=[[0.1543289673, 0.5353281423, 0.4446345422]])
Atomic number: 8
[130.7093214, 23.80886605, 6.443608313]
BasisFunction(angular_momentum=[0], exponents=[130.7093214, 23.80886605, 6.443608313], coefficients=[[0.1543289673, 0.5353281423, 0.4446345422]])
[5.033151319, 1.169596125, 0.38038896]
BasisFunction(angular_momentum=[0, 1], exponents=[5.033151319, 1.169596125, 0.38038896], coefficients=[[-0.09996722919, 0.3995128261, 0.7001154689], [0.155916275, 0.6076837186, 0.3919573931]])
1 [BasisFunction(angular_momentum=[0], exponents=[3.425250914, 0.6239137298, 0.168855404], coefficients=[[0.1543289673, 0.5353281423, 0.4446345422]])]
8 [BasisFunction(angular_momentum=[0], exponents=[130.7093214, 23.80886605, 6.443608313], coefficients=[[0.1543289673, 0.5353281423, 0.4446345422]]), BasisFunction(angular_momentum=[0, 1], exponents=[5.0331513