In [55]:
import pandas as pd
import tensorflow as tf
import tempfile
import os
import numpy as np
import sympy as sympy
import mendeleev as ptable
import random as random
from pathlib import Path

In [56]:
class molecular_graph:
    def __init__(self, orbital_overlaps, n_atoms, maximum_s_el, maximum_p_el, maximum_d_el, maximum_f_el):
        self.n_atoms = n_atoms
        self.orbital_overlaps = orbital_overlaps
        self.maximum_s_el = maximum_s_el
        self.maximum_p_el = maximum_p_el
        self.maximum_d_el = maximum_d_el
        self.maximum_f_el = maximum_f_el

    def build_adj_matrix(self, n_atoms, orbital_overlaps):
        self.labeled_atom_list = np.arange(0, n_atoms)
        self.adj_matrix = np.array([])
        self.adj_matrix = self.adj_matrix[np.newaxis, :]
        #classifying all atoms in complex as either engaging in orbital overlap (bonding M.O) or not (nonbonding state), starting from atom 1 to the nth atom in the system:
        i = 0
        j = 0
        for i in np.arange(0, n_atoms):
            ith_adjacency_row = np.zeros(n_atoms)
            ith_adjacecy_row = list(ith_adjacency_row)
            for j in np.arange(0, len(orbital_overlaps)):
                if str(i+1) in orbital_overlaps[j]:
                    if orbital_overlaps[j][0] == str(i+1):
                        overlapped_adjacent = int(orbital_overlaps[j][2])
                        ith_adjacency_row[overlapped_adjacent - 1] = 1
                j += 1
            self.adj_matrix = np.append(self.adj_matrix, ith_adjacency_row)
            i += 1
        self.adj_matrix = np.reshape(self.adj_matrix, (n_atoms, n_atoms))
        return self.adj_matrix

In [57]:
def convert_data_to_records():
    test_csv = "/users/haydenprescott/documents/test.csv"
    record_filepath = test_path = os.path.join(os.getcwd(), "test.tfrecords")
    test_dataset = pd.read_csv(test_csv).values
    with tf.compat.v1.python_io.TFRecordWriter(record_filepath) as writer:
        for row in test_dataset:
            features, label = row[:-1], row[-1]
            example_data_piece = tf.train.Example()
            example_data_piece.features.feature["features"].float_list.value.extend(features)
            example_data_piece.features.feature["label"].float_list.value.append(label)
            writer.write(example_data_piece.SerializeToString())
            print(example_data_piece)
    return record_filepath


record_types = convert_data_to_records()
print(record_types)
#contents = contents.read()
#print(contents)

features {
  feature {
    key: "label"
    value {
      float_list {
        value: 0.244
      }
    }
  }
  feature {
    key: "features"
    value {
      float_list {
        value: 0
        value: 3.75
        value: 8
        value: 0.097
        value: 0.055
        value: 498
        value: 4.62
        value: 2.91
      }
    }
  }
}

features {
  feature {
    key: "label"
    value {
      float_list {
        value: 3.15
      }
    }
  }
  feature {
    key: "features"
    value {
      float_list {
        value: 2.5
        value: 2.99
        value: 57
        value: 0.068
        value: -0.19
        value: 1.18
        value: 10.98
        value: 2.8
      }
    }
  }
}

features {
  feature {
    key: "label"
    value {
      float_list {
        value: 0.88
      }
    }
  }
  feature {
    key: "features"
    value {
      float_list {
        value: 3.1
        value: 3.5
        value: 8
        value: 590
        value: 0.00024
        value: 0.0543
        

In [58]:
def initialize_dataset(csv_filepath):
    records_dict = {}
    r_set = np.array([])
    ACSF_set = np.array([])
    z_set = np.array([])
    d1_set = np.array([])
    d2_set = np.array([])
    d3_set = np.array([])
    exp1_set = np.array([])
    exp2_set = np.array([])
    exp3_set = np.array([])
    molecular_datafile = open(csv_filepath, "r")
    features = pd.read_csv(csv_filepath)
    columns = features.columns
    placeholder_column = columns[0]
    del placeholder_column
    r_vals = features["r"]
    ACSF_vals = features["ACSF"]
    z_vals = features["z"]
    d1_vals = features["d1"]
    d2_vals = features["d2"]
    d3_vals = features["d3"]
    exp1_vals = features["exp1"]
    exp2_vals = features["exp2"]
    exp3_vals = features["exp3"]
    for i in r_vals:
        r_set = np.append(r_set, i)
    for j in ACSF_vals:
        ACSF_set = np.append(ACSF_set, j)
    for k in z_vals:
        z_set = np.append(z_set, k) 
    for l in d1_vals:
        d1_set = np.append(d1_set, l)
    for m in d2_vals:
        d2_set = np.append(d2_set, m)
    for n in d3_vals:
        d3_set = np.append(d3_set, n)
    for o in exp1_vals:
        exp1_set = np.append(exp1_set, o)
    for p in exp2_vals:
        exp2_set = np.append(exp2_set, p)
    for q in exp3_vals:
        exp3_set = np.append(exp3_set, q)
    record_keys = ["r", "ACSF", "z", "d1", "d2", "d3", "exp1", "exp2", "exp3"]
    num_records = np.arange(0, len(record_keys))
    for i in num_records:
        if record_keys[i] == "r":
            records_dict.update({record_keys[i]:list(r_set)})
        elif record_keys[i] == "ACSF":
            records_dict.update({record_keys[i]:list(ACSF_set)})
        elif record_keys[i] == "z":
            records_dict.update({record_keys[i]:list(z_set)})
        elif record_keys[i] == "d1":
            records_dict.update({record_keys[i]:list(d1_set)})
        elif record_keys[i] == "d2":
            records_dict.update({record_keys[i]:list(d2_set)})
        elif record_keys[i] == "d3":
            records_dict.update({record_keys[i]:list(d3_set)})
        elif record_keys[i] == "exp1":
            records_dict.update({record_keys[i]:list(exp1_set)})
        elif record_keys[i] == "exp2":
            records_dict.update({record_keys[i]:list(exp2_set)})
        elif record_keys[i] == "exp3":
            records_dict.update({record_keys[i]:list(exp3_set)})
        i += 1        
    return records_dict

def take_data_batch(csv_filepath):
    data_dict = initialize_dataset(csv_filepath)
    example_set = tf.train.Example(features = tf.train.Features(feature={
    'r': tf.train.Feature(
        float_list=tf.train.FloatList(value=[data_dict["r"][0]])),
    'ACSF': tf.train.Feature(
        float_list=tf.train.FloatList(
            value=[data_dict["ACSF"][0]])),
    'z': tf.train.Feature(
        float_list=tf.train.FloatList(value=[data_dict['z'][0]])),
    'd1': tf.train.Feature(
        float_list=tf.train.FloatList(
            value=[data_dict['d1'][0]])),
    'd2': tf.train.Feature(
        float_list=tf.train.FloatList(
            value=[data_dict['d2'][0]])),
    'd3': tf.train.Feature(
        float_list=tf.train.FloatList(value=[data_dict['d3'][0]])),
    'exp1': tf.train.Feature(
        float_list=tf.train.FloatList(
            value=[data_dict['exp1'][0]])),
    'exp2': tf.train.Feature(
        float_list=tf.train.FloatList(
            value=[data_dict['exp2'][0]])),
    'exp3': tf.train.Feature(
        float_list=tf.train.FloatList(value=[data_dict['exp3'][0]]))
    }))
    return example_set
csv_filepath = csv_filepath = "/users/haydenprescott/documents/test.csv"
print(initialize_dataset(csv_filepath))

{'r': [0.0, 2.5, 3.1, 5.4, 1.9, 2.7, 4.7], 'ACSF': [3.75, 2.99, 3.5, 5.0, 3.0, 3.38, 5.2], 'z': [8.0, 57.0, 8.0, 8.0, 57.0, 57.0, 8.0], 'd1': [0.097, 0.068, 590.0, 0.0071, -0.0048, -0.00011, 45.0], 'd2': [0.055, -0.19, 0.00024, 4.1, 0.022, 0.0091, -0.088], 'd3': [498.0, 1.18, 0.0543, 0.00044, -0.095, 0.228, 0.00512], 'exp1': [4.62, 10.98, 3.88, 2.85, 2.56, 3.19, 4.73], 'exp2': [2.91, 2.8, 2.98, 3.25, 9.58, 7.43, 2.9], 'exp3': [0.244, 3.15, 0.88, 0.7, 2.98, 0.97, 0.66]}


In [59]:
def write_TF_records_file(csv_filepath):
    encoded_dataset = take_data_batch(csv_filepath)
    with tf.io.TFRecordWriter("test_data.tfrecord") as writer:
        writer.write(encoded_dataset.SerializeToString())

csv_filepath = "/users/haydenprescott/documents/test.csv"
write_TF_records_file(csv_filepath)

In [60]:
def nodeset_position_fxn(nodeset_proto):
    feature_descriptor = {"z" : tf.io.FixedLenFeature([], tf.float32), "r" : tf.io.FixedLenFeature([], tf.float32), "ACSF" : tf.io.FixedLenFeature([], tf.float32), "d1" : tf.io.FixedLenFeature([], tf.float32), "d2" : tf.io.FixedLenFeature([], tf.float32), "d3" : tf.io.FixedLenFeature([], tf.float32), "exp1" : tf.io.FixedLenFeature([], tf.float32), "exp2" : tf.io.FixedLenFeature([], tf.float32), "exp3" : tf.io.FixedLenFeature([], tf.float32)}
    position_fxn = tf.io.parse_single_example(nodeset_proto, feature_descriptor)
    return position_fxn

csv_filepath = "/users/haydenprescott/documents/test.csv"
def record_floats(value):
    r_floats = tf.train.Feature(float_list = tf.train.FloatList(value = [value]))
    return r_floats

def serialize_floats(r_val, ACSF_val, z_val, d1_val, d2_val, d3_val, exp1_val, exp2_val, exp3_val, csv_filepath):
    initial_state_feature = {"r":record_floats(r_val), "ACSF":record_floats(ACSF_val), "z":record_floats(z_val), "d1":record_floats(d1_val), "d2":record_floats(d2_val), "d3":record_floats(d3_val), "exp1":record_floats(exp1_val), "exp2":record_floats(exp2_val), "exp3":record_floats(exp3_val)}
    initial_state_proto = tf.train.Example(features = tf.train.Features(feature = initial_state_feature))
    initial_state_records = initial_state_proto.SerializeToString()
    return initial_state_records

def convert_floats_to_records(csv_filepath, record_filepath, record_filename):
    data_dict = initialize_dataset(csv_filepath)
    previous_record_path = Path(record_filename)
    serialized_input_floats = []
    n_input_vals = np.arange(0, (len(data_dict['r'])))
    i = 0
    for i in n_input_vals:
        r, ACSF, z, d1, d2, d3, exp1, exp2, exp3 = data_dict['r'][i], data_dict['ACSF'][i], data_dict['z'][i], data_dict['d1'][i], data_dict['d2'][i], data_dict['d3'][i], data_dict['exp1'][i], data_dict['exp2'][i], data_dict['exp3'][i]
        all_floats_per_molecule = (r, ACSF, z, d1, d2, d3, exp1, exp2, exp3)
        serialized_input_floats.append(all_floats_per_molecule)
        initial_state_data = tf.constant([['r', 'ACSF', 'z', 'd1', 'd2', 'd3','exp1', 'exp2', 'exp3']])
    with tf.io.TFRecordWriter(record_filepath) as record_writer:
        for r, ACSF, z, d1, d2, d3, exp1, exp2, exp3 in serialized_input_floats:
            serialized_input_floats = serialize_floats(r, ACSF, z, d1, d2, d3, exp1, exp2, exp3, csv_filepath)
            record_writer.write(serialized_input_floats)
            record_batch = tf.data.TFRecordDataset(record_filename)
            record_batch = record_batch.map(nodeset_position_fxn)
            ith_atom_features = tf.constant([[str(r), str(ACSF), str(z), str(d1), str(d2), str(d3), str(exp1), str(exp2), str(exp3)]])
            initial_state_data = tf.concat([initial_state_data, ith_atom_features], 0)
    return initial_state_data
            
csv_filepath = "/users/haydenprescott/documents/test.csv"
record_filename = "test_data.tfrecord" 
record_filepath = "/users/haydenprescott/test_data.tfrecord"
print(convert_floats_to_records(csv_filepath, record_filepath, record_filename))

tf.Tensor(
[[b'r' b'ACSF' b'z' b'd1' b'd2' b'd3' b'exp1' b'exp2' b'exp3']
 [b'0.0' b'3.75' b'8.0' b'0.097' b'0.055' b'498.0' b'4.62' b'2.91'
  b'0.244']
 [b'2.5' b'2.99' b'57.0' b'0.068' b'-0.19' b'1.18' b'10.98' b'2.8'
  b'3.15']
 [b'3.1' b'3.5' b'8.0' b'590.0' b'0.00024' b'0.0543' b'3.88' b'2.98'
  b'0.88']
 [b'5.4' b'5.0' b'8.0' b'0.0071' b'4.1' b'0.00044' b'2.85' b'3.25' b'0.7']
 [b'1.9' b'3.0' b'57.0' b'-0.0048' b'0.022' b'-0.095' b'2.56' b'9.58'
  b'2.98']
 [b'2.7' b'3.38' b'57.0' b'-0.00011' b'0.0091' b'0.228' b'3.19' b'7.43'
  b'0.97']
 [b'4.7' b'5.2' b'8.0' b'45.0' b'-0.088' b'0.00512' b'4.73' b'2.9'
  b'0.66']], shape=(8, 9), dtype=string)


In [61]:
def build_graph_tensor(csv_filepath, record_filepath, record_filename):
    initial_graph_tensor = convert_floats_to_records(csv_filepath, record_filepath, record_filename)
    input_graph_tensor = tf.transpose(initial_graph_tensor)
    input_rvals = input_graph_tensor[0][1:len(input_graph_tensor) - 1].numpy()
    input_ACSFvals = input_graph_tensor[1][1:len(input_graph_tensor) - 1].numpy()
    input_zvals = input_graph_tensor[2][1:len(input_graph_tensor) - 1].numpy()
    input_d1vals = input_graph_tensor[3][1:len(input_graph_tensor) - 1].numpy()
    input_d2vals = input_graph_tensor[4][1:len(input_graph_tensor) - 1].numpy()
    input_d3vals = input_graph_tensor[5][1:len(input_graph_tensor) - 1].numpy()
    input_exp1vals = input_graph_tensor[6][1:len(input_graph_tensor) - 1].numpy()
    input_exp2vals = input_graph_tensor[7][1:len(input_graph_tensor) - 1].numpy()
    input_exp3vals = input_graph_tensor[8][1:len(input_graph_tensor) - 1].numpy()
    input_graph_tensor = np.array([input_rvals, input_ACSFvals, input_zvals, input_d1vals, input_d2vals, input_d3vals, input_exp1vals, input_exp2vals, input_exp3vals])
    input_graph_tensor = input_graph_tensor.transpose()
    initial_graph_tensor = initial_graph_tensor.numpy()
    initial_state_and_GNN_input = (initial_graph_tensor, input_graph_tensor)
    return initial_state_and_GNN_input

graph_tensor = build_graph_tensor(csv_filepath, record_filepath, record_filename)
print(build_graph_tensor(csv_filepath, record_filepath, record_filename))
print(type(build_graph_tensor(csv_filepath, record_filepath, record_filename)))

(array([[b'r', b'ACSF', b'z', b'd1', b'd2', b'd3', b'exp1', b'exp2',
        b'exp3'],
       [b'0.0', b'3.75', b'8.0', b'0.097', b'0.055', b'498.0', b'4.62',
        b'2.91', b'0.244'],
       [b'2.5', b'2.99', b'57.0', b'0.068', b'-0.19', b'1.18', b'10.98',
        b'2.8', b'3.15'],
       [b'3.1', b'3.5', b'8.0', b'590.0', b'0.00024', b'0.0543', b'3.88',
        b'2.98', b'0.88'],
       [b'5.4', b'5.0', b'8.0', b'0.0071', b'4.1', b'0.00044', b'2.85',
        b'3.25', b'0.7'],
       [b'1.9', b'3.0', b'57.0', b'-0.0048', b'0.022', b'-0.095',
        b'2.56', b'9.58', b'2.98'],
       [b'2.7', b'3.38', b'57.0', b'-0.00011', b'0.0091', b'0.228',
        b'3.19', b'7.43', b'0.97'],
       [b'4.7', b'5.2', b'8.0', b'45.0', b'-0.088', b'0.00512', b'4.73',
        b'2.9', b'0.66']], dtype=object), array([[b'0.0', b'3.75', b'8.0', b'0.097', b'0.055', b'498.0', b'4.62',
        b'2.91', b'0.244'],
       [b'2.5', b'2.99', b'57.0', b'0.068', b'-0.19', b'1.18', b'10.98',
        b'2.8', b'3.1

In [62]:
class find_orbitals_get_characteristics:
    def __init__(self, molecular_formula, seed_structure_directory, element, graph_tensor):
        self.molecular_formula = molecular_formula
        self.seed_structure_directory = seed_structure_directory
        self.element = element
        self.graph_tensor = graph_tensor

    def find_atomic_symbol(self, element):
        characteristic_list = []
        blanks = np.array([])
        for i in np.arange(0, len(str(element))):
            characteristic_list.append(str(element)[i])
        for j in np.arange(0, len(characteristic_list)):
            if characteristic_list[j] == " ":
                blanks = np.append(blanks, j)
        atomic_symbol = str(element)[int(blanks[0]) + 1:int(blanks[1])]
        return atomic_symbol

    def find_atomic_number(self, element):
        characteristic_list = []
        blanks = np.array([])
        for i in np.arange(0, len(str(element))):
            characteristic_list.append(str(element)[i])
        for j in np.arange(0, len(characteristic_list)):
            if characteristic_list[j] == " ":
                blanks = np.append(blanks, j)
        atomic_symbol = str(element)[0:int(blanks[0])]
        return atomic_symbol

    
    def get_symbols_and_numbers(self, molecular_formula, seed_structure_directory, graph_tensor):
        seed_structure = seed_structure_directory + "/" + str(molecular_formula) + ".csv"
        molecular_details = open(seed_structure, "r")
        molecular_details = pd.read_csv(seed_structure)
        element_symbols = np.array(molecular_details['symbol'][0:len(graph_tensor[0]) - 1])
        atomic_numbers = np.array([])
        valence_blocks = np.array([])
        element_set = ptable.get_all_elements()
        for j in np.arange(0, len(element_symbols)):
            for i in np.arange(0, len(element_set)):
                if str.encode(self.find_atomic_symbol(element_set[i])) == str.encode(element_symbols[j]):
                    atomic_numbers = np.append(atomic_numbers, int(self.find_atomic_number(element_set[i])))
                    valence_blocks = np.append(valence_blocks, element_set[i].block)
        return element_symbols, atomic_numbers, valence_blocks

    
            
            


In [63]:
element = None
molecular_characteristics = find_orbitals_get_characteristics(molecular_formula = "La3O4", seed_structure_directory = "/users/haydenprescott/documents", element = element, graph_tensor = graph_tensor)
print(molecular_characteristics.get_symbols_and_numbers(molecular_characteristics.molecular_formula, molecular_characteristics.seed_structure_directory, molecular_characteristics.graph_tensor))
atomic_symbols = molecular_characteristics.find_atomic_symbol(element = ptable.get_all_elements()[56])
print(atomic_symbols)

(array(['La', 'O', 'O', 'La', 'O', 'O', 'La'], dtype=object), array([57.,  8.,  8., 57.,  8.,  8., 57.]), array(['d', 'p', 'p', 'd', 'p', 'p', 'd'], dtype='<U32'))
La


In [None]:

atomic_numbers = np.array([])
valence_blocks = np.array([])
element_set = ptable.get_all_elements()
for j in np.arange(0, len(element_symbols)):
    for i in np.arange(0, len(element_set)):
        if str.encode(self.find_atomic_symbol(element_set[i])) == str.encode(element_symbols[j]):
            atomic_numbers = np.append(atomic_numbers, int(self.find_atomic_number(element_set[i])))
            valence_blocks = np.append(valence_blocks, element_set[i].block)

In [64]:
# compute overlaps between atomic orbitals in basis and approximate energy eigenvalues for all free-atom (with correct valency) and bonded (molecular orbital) states using extended Huckel. Compute energy changes between pure and bonded states, and construct Kohn-Sham MO's as LCAO's of basis AO's with positive (constuctive) overlaps and reduced energies:

class EH_molecular_orbitals:
    def __init__(self, input_tensor, atomic_orbital_states, n_atoms, exponential_range, maximum_s_el, maximum_p_el, maximum_d_el, maximum_f_el):
        self.input_tensor = input_tensor
        self.atomic_orbital_states = atomic_orbital_states
        self.n_atoms = n_atoms
        self.exponential_range = exponential_range
        self.maximum_s_el = maximum_s_el
        self.maximum_p_el = maximum_p_el
        self.maximum_d_el = maximum_d_el
        self.maximum_f_el = maximum_f_el

    def construct_basis_AOs(self, input_tensor, atomic_orbital_states, n_atoms, exponential_range, maximum_s_el, maximum_p_el, maximum_d_el, maximum_f_el):
        s_AO_set = sympy.Array([])
        p_AO_set = sympy.Array([])
        d_AO_set = sympy.Array([])
        f_AO_set = sympy.Array([])
        s_ref_set = sympy.Array([])
        p_ref_set = sympy.Array([])
        d_ref_set = sympy.Array([])
        f_ref_set = sympy.Array([])
        s_exp_set = sympy.Array([])
        p_exp_set = sympy.Array([])
        d_exp_set = sympy.Array([])
        f_exp_set = sympy.Array([])
        exp_set = sympy.Array([])
        one_electron_wavefunctions = np.array([])
        gaussian_terms = []
        coefficient_sequence = []
        exp_set = []
        r = sympy.symbols("r", real = True)
        j = sympy.symbols("j", real = True)
        s_exponentials = 1
        p_exponentials = atomic_orbital_states["p"][1] - atomic_orbital_states["s"]
        d_exponentials = atomic_orbital_states["d"][1] - atomic_orbital_states["p"][1]
        f_exponentials = atomic_orbital_states["f"][0]
        i = 0
        for i in range(0, n_atoms):
            if len(input_tensor.transpose()[basis_coefficient_range[0]:basis_coefficient_range[1]]) > s_exponentials - 1:
                exp_start = exponential_range[s_exponentials - 1]
                s_coefficient_range = np.array([input_tensor.transpose()[basis_coefficient_range[0]:basis_coefficient_range[1]].transpose()[i][0]])
                print(s_coefficient_range)
                j = 0
                for j in range(0, len(s_coefficient_range)):
                    current_basis_coeff = s_coefficient_range[j]
                    if current_basis_coeff != 0:
                        current_s_AO = current_basis_coeff * sympy.exp(-input_tensor[i][exp_start] * r)
                        s_AO_set = np.append(s_AO_set, np.array([current_s_AO, i + 1]))
                        s_ref_set = np.append(s_ref_set, current_s_AO)
                        s_exp_set = np.append(s_exp_set, sympy.exp(-input_tensor[i][exp_start] * r))
                        gaussian_terms.append(current_s_AO)
                        coefficient_sequence.append(current_basis_coeff)
                        j += s_exponentials
            else:
                pass
            exp_range_vals = np.arange(exponential_range[0], exponential_range[1] + 1)
            if len(input_tensor.transpose()[basis_coefficient_range[0]:basis_coefficient_range[1]]) > s_exponentials * maximum_s_el: 
                p_coefficient_range = input_tensor.transpose()[basis_coefficient_range[0]:basis_coefficient_range[1]].transpose()[i][maximum_s_el * s_exponentials : maximum_s_el * s_exponentials + maximum_p_el * p_exponentials]
                print(p_coefficient_range)
                j = 0
                p_AO_contributions = 0
                while j <= len(p_coefficient_range):
                    current_basis_coeffs = p_coefficient_range[j: j + p_exponentials] 
                    k = 0
                    print(current_basis_coeffs)
                    for k in range(0, len(current_basis_coeffs)):
                        if current_basis_coeffs[k] != 0:
                            current_gaussian_term = current_basis_coeffs[k] * sympy.exp(-input_tensor[i][exp_range_vals][k + atomic_orbital_states["p"][0]] * r)
                            gaussian_terms.append(current_gaussian_term)
                            coefficient_sequence.append([current_basis_coeffs[k]])
                            p_exp_set = np.append(p_exp_set, sympy.exp(-input_tensor[i][exp_range_vals][k + atomic_orbital_states["p"][0]] * r))
                            p_AO_contributions = p_AO_contributions + current_gaussian_term
                            k += 1
                    j += p_exponentials
                    if j <= len(p_coefficient_range):
                        p_AO_set = np.append(p_AO_set, np.array([p_AO_contributions, i + 1]))  
                        p_ref_set = np.append(p_ref_set, p_AO_contributions)
            else:
                pass
            if len(input_tensor.transpose()[basis_coefficient_range[0]:basis_coefficient_range[1]]) > s_exponentials * maximum_s_el + p_exponentials * maximum_p_el:
                d_coefficient_range = input_tensor.transpose()[basis_coefficient_range[0]:basis_coefficient_range[1]].transpose()[i][maximum_s_el * s_exponentials + maximum_p_el * p_exponentials : maximum_s_el * s_exponentials + maximum_p_el * p_exponentials + maximum_d_el + d_exponentials]
                j = 0
                d_AO_contributions = 0
                while j <= len(d_coefficient_range):
                    current_basis_coeffs = d_coefficient_range[j: j + d_exponentials] 
                    k = 0
                    for k in range(0, len(current_basis_coeffs)):
                        if current_basis_coeffs[k] != 0:
                            current_gaussian_term = current_basis_coeffs[k] * sympy.exp(-input_tensor[i][exp_range_vals][k + atomic_orbital_states["d"][0]] * r)
                            gaussian_terms.append(current_gaussian_term)
                            coefficient_sequence.append([current_basis_coeffs[k]])
                            d_exp_set = np.append(d_exp_set, sympy.exp(-input_tensor[i][exp_range_vals][k + atomic_orbital_states["d"][0]] * r))
                            d_AO_contributions = d_AO_contributions + current_gaussian_term
                            k += 1
                    j += d_exponentials
                    if j <= len(d_coefficient_range):
                        d_AO_set = np.append(d_AO_set, np.array([d_AO_contributions, i + 1])) 
                        d_ref_set = np.append(d_ref_set, d_AO_contributions)
            else:
                pass
            if len(input_tensor.transpose()[basis_coefficient_range[0]:basis_coefficient_range[1]]) > s_exponentials * maximum_s_el + p_exponentials * maximum_p_el + d_exponentials * maximum_d_el:
                f_coefficient_range = input_tensor.transpose()[basis_coefficient_range[0]:basis_coefficient_range[1]].transpose()[i][s_exponentials * maximum_s_el + p_exponentials * maximum_p_el + d_exponentials * maximum_d_el : maximum_s_el * s_exponentials + maximum_p_el * p_exponentials + maximum_d_el * d_exponentials + maximum_f_el * f_exponentials]
                j = 0
                f_AO_contributions = 0
                while j <= len(f_coefficient_range):
                    current_basis_coeffs = d_coefficient_range[j: j + f_exponentials] 
                    k = 0
                    for k in range(0, len(current_basis_coeffs)):
                        if current_basis_coeffs[k] != 0:
                            current_gaussian_term = current_basis_coeffs[k] * sympy.exp(-input_tensor[i][exp_range_vals][k + atomic_orbital_states["f"][0]] * r)
                            gaussian_terms.append(current_gaussian_term)
                            coefficient_sequence.append([current_basis_coeffs[k]])
                            f_exp_set = np.append(f_exp_set, sympy.exp(-input_tensor[i][exp_range_vals][k + atomic_orbital_states["f"][0]] * r))
                            f_AO_contributions = f_AO_contributions + current_gaussian_term
                            k += 1
                    j += f_exponentials
                    if j <= len(f_coefficient_range):
                        f_AO_set = np.append(f_AO_set, np.array([f_AO_contributions, i + 1])) 
                        f_ref_set = np.append(f_ref_set, f_AO_contributions)
            else:
                pass
            i += 1
        Ci = sympy.symbols("Ci", real = True)
        one_electron_wavefunctions = np.append(one_electron_wavefunctions, s_AO_set)
        one_electron_wavefunctions = np.append(one_electron_wavefunctions, p_AO_set)
        one_electron_wavefunctions = np.append(one_electron_wavefunctions, d_AO_set)
        one_electron_wavefunctions = np.append(one_electron_wavefunctions, f_AO_set)
        exp_set = np.append(exp_set, s_exp_set)
        exp_set = np.append(exp_set, p_exp_set)
        exp_set = np.append(exp_set, d_exp_set)
        exp_set = np.append(exp_set, f_exp_set)
        one_electron_AO_eqns = np.array([])
        k = 0
        for k in range(0, len(s_ref_set) * s_exponentials):
            current_s_AO_eqn = Ci * exp_set[k]
            one_electron_AO_eqns = np.append(one_electron_AO_eqns, current_s_AO_eqn)
            k += 1
        p_gaussian_start_index = (len(s_ref_set) * s_exponentials) - 1
        p_gaussian_end_index = (len(s_ref_set) * s_exponentials + len(p_ref_set) * p_exponentials) - 1
        p_gaussian_count = p_gaussian_end_index - p_gaussian_start_index
        k = 0
        for k in range(0, int(p_gaussian_count / 2)):
            current_gaussian_fxns = list(gaussian_terms[p_gaussian_start_index + p_exponentials * k : p_gaussian_start_index + p_exponentials * k + p_exponentials])
            current_ref_fxns = list(gaussian_terms[p_gaussian_start_index + p_exponentials * k : p_gaussian_start_index + p_exponentials * k + p_exponentials + 1])
            current_basis_coefficients = coefficient_sequence[p_gaussian_start_index + p_exponentials * k : p_gaussian_start_index + p_exponentials * k + p_exponentials]
            current_exponentials = exp_set[p_gaussian_start_index + p_exponentials * k : p_gaussian_start_index + p_exponentials* k + p_exponentials]
            tracker = 1
            prev_gaussian = 0
            for tracker in range(1, len(current_ref_fxns)):
                sample_gaussian = random.choice(current_gaussian_fxns)
                if sample_gaussian != prev_gaussian:
                    gaussian_index = current_gaussian_fxns.index(sample_gaussian)
                    sample_gaussian_eqn = Ci * current_exponentials[gaussian_index]
                    current_gaussian_fxns[gaussian_index] = sample_gaussian_eqn
                    current_gaussian_eqn = sum(current_gaussian_fxns)
                    one_electron_AO_eqns = np.append(one_electron_AO_eqns, current_gaussian_eqn)
                    current_ref_fxns.pop(gaussian_index)
                    prev_gaussian = sample_gaussian
                    current_gaussian_fxns[gaussian_index] = sample_gaussian
                    tracker += 1
                else:
                    continue
                k += 1
        d_gaussian_start_index = (len(s_ref_set) * s_exponentials + len(p_ref_set) * p_exponentials) - 1
        d_gaussian_end_index = (len(s_ref_set) * s_exponentials + len(p_ref_set) * p_exponentials + len(d_ref_set) * d_exponentials) - 1
        d_gaussian_count = d_gaussian_end_index - d_gaussian_start_index
        k = 0
        for k in range(0, int(d_gaussian_count / 2)):
            current_gaussian_fxns = list(gaussian_terms[d_gaussian_start_index + d_exponentials * k : d_gaussian_start_index + d_exponentials * k + d_exponentials])
            current_ref_fxns = list(gaussian_terms[d_gaussian_start_index + d_exponentials * k : d_gaussian_start_index + d_exponentials * k + d_exponentials + 1])
            current_basis_coefficients = coefficient_sequence[d_gaussian_start_index + d_exponentials * k : d_gaussian_start_index + d_exponentials * k + d_exponentials]
            current_exponentials = exp_set[d_gaussian_start_index + d_exponentials * k : d_gaussian_start_index + d_exponentials* k + d_exponentials]
            tracker = 1
            prev_gaussian = 0
            for tracker in range(1, len(current_ref_fxns)):
                sample_gaussian = random.choice(current_gaussian_fxns)
                if sample_gaussian != prev_gaussian:
                    gaussian_index = current_gaussian_fxns.index(sample_gaussian)
                    sample_gaussian_eqn = Ci * current_exponentials[gaussian_index]
                    current_gaussian_fxns[gaussian_index] = sample_gaussian_eqn
                    current_gaussian_eqn = sum(current_gaussian_fxns)
                    one_electron_AO_eqns = np.append(one_electron_AO_eqns, current_gaussian_eqn)
                    current_ref_fxns.pop(gaussian_index)
                    prev_gaussian = sample_gaussian
                    current_gaussian_fxns[gaussian_index] = sample_gaussian
                    tracker += 1
                else:
                    continue
                k += 1
        f_gaussian_start_index = (len(s_ref_set) * s_exponentials + len(p_ref_set) * p_exponentials + len(d_ref_set) * d_exponentials) - 1
        f_gaussian_end_index = (len(s_ref_set) * s_exponentials + len(p_ref_set) * p_exponentials + len(d_ref_set) * d_exponentials + len(f_ref_set) * f_exponentials) - 1
        f_gaussian_count = f_gaussian_end_index - f_gaussian_start_index
        k = 0
        for k in range(0, int(f_gaussian_count / 2)):
            current_gaussian_fxns = list(gaussian_terms[f_gaussian_start_index + f_exponentials * k : f_gaussian_start_index + f_exponentials * k + f_exponentials])
            current_ref_fxns = list(gaussian_terms[f_gaussian_start_index + f_exponentials * k : f_gaussian_start_index + f_exponentials * k + f_exponentials + 1])
            current_basis_coefficients = coefficient_sequence[f_gaussian_start_index + f_exponentials * k : f_gaussian_start_index + f_exponentials * k + f_exponentials]
            current_exponentials = exp_set[f_gaussian_start_index + f_exponentials * k : f_gaussian_start_index + f_exponentials* k + f_exponentials]
            tracker = 1
            prev_gaussian = 0
            for tracker in range(1, len(current_ref_fxns)):
                sample_gaussian = random.choice(current_gaussian_fxns)
                if sample_gaussian != prev_gaussian:
                    gaussian_index = current_gaussian_fxns.index(sample_gaussian)
                    sample_gaussian_eqn = Ci * current_exponentials[gaussian_index]
                    current_gaussian_fxns[gaussian_index] = sample_gaussian_eqn
                    current_gaussian_eqn = sum(current_gaussian_fxns)
                    one_electron_AO_eqns = np.append(one_electron_AO_eqns, current_gaussian_eqn)
                    current_ref_fxns.pop(gaussian_index)
                    prev_gaussian = sample_gaussian
                    current_gaussian_fxns[gaussian_index] = sample_gaussian
                    tracker += 1
                else:
                    continue
                k += 1
                    
        return one_electron_wavefunctions, one_electron_AO_eqns




In [65]:
molecular_graph_details = molecular_graph(orbital_overlaps = np.array(["1_2", "2_1", "1_6", "6_1", "1_7", "7_1", "2_3", "3_2", "3_4", "4_3", "3_7", "7_3", "4_5", "5_4", "5_6", "6_5", "5_7", "7_5"]), n_atoms = 7, maximum_s_el = 1, maximum_p_el = 2, maximum_d_el = 0, maximum_f_el = 0)
initial_state_tensor = build_graph_tensor(csv_filepath, record_filepath, record_filename)[0]
variable_state_tensor = build_graph_tensor(csv_filepath, record_filepath, record_filename)[1]
bonding_states = {"s":0, "p":(1,2), "d":(3,7), "f":(8,)}
variable_state_tensor = variable_state_tensor.astype("float")
initial_state_floats = initial_state_tensor[1:].astype("float")
initial_state_labels = initial_state_tensor[0].astype("str")
basis_coefficient_range = (list(initial_state_labels).index("d1"), list(initial_state_labels).index("exp1"))
exponential_range = (list(initial_state_labels).index("exp1"), len(initial_state_labels) - 1)

EH_solver = EH_molecular_orbitals(input_tensor = initial_state_floats, atomic_orbital_states = bonding_states, n_atoms = 7, exponential_range = exponential_range, maximum_s_el = molecular_graph_details.maximum_s_el, maximum_p_el = molecular_graph_details.maximum_p_el, maximum_d_el = molecular_graph_details.maximum_d_el, maximum_f_el = molecular_graph_details.maximum_f_el)
print(EH_solver.construct_basis_AOs(input_tensor = EH_solver.input_tensor, atomic_orbital_states = EH_solver.atomic_orbital_states, n_atoms = EH_solver.n_atoms, exponential_range = EH_solver.exponential_range, maximum_s_el = EH_solver.maximum_s_el, maximum_p_el = EH_solver.maximum_p_el, maximum_d_el = EH_solver.maximum_d_el, maximum_f_el = EH_solver.maximum_f_el))


[0.097]
[5.50e-02 4.98e+02]
[5.50e-02 4.98e+02]
[]
[0.068]
[-0.19  1.18]
[-0.19  1.18]
[]
[590.]
[0.00024 0.0543 ]
[0.00024 0.0543 ]
[]
[0.0071]
[4.1e+00 4.4e-04]
[4.1e+00 4.4e-04]
[]
[-0.0048]
[ 0.022 -0.095]
[ 0.022 -0.095]
[]
[-0.00011]
[0.0091 0.228 ]
[0.0091 0.228 ]
[]
[45.]
[-0.088    0.00512]
[-0.088    0.00512]
[]
(array([0.097*exp(-4.62*r), 1, 0.068*exp(-10.98*r), 2, 590.0*exp(-3.88*r),
       3, 0.0071*exp(-2.85*r), 4, -0.0048*exp(-2.56*r), 5,
       -0.00011*exp(-3.19*r), 6, 45.0*exp(-4.73*r), 7,
       0.055*exp(-2.91*r) + 498.0*exp(-0.244*r), 1,
       1.18*exp(-3.15*r) - 0.19*exp(-2.8*r), 2,
       0.00024*exp(-2.98*r) + 0.0543*exp(-0.88*r), 3,
       4.1*exp(-3.25*r) + 0.00044*exp(-0.7*r), 4,
       0.022*exp(-9.58*r) - 0.095*exp(-2.98*r), 5,
       0.0091*exp(-7.43*r) + 0.228*exp(-0.97*r), 6,
       -0.088*exp(-2.9*r) + 0.00512*exp(-0.66*r), 7], dtype=object), array([Ci*exp(-4.62*r), Ci*exp(-10.98*r), Ci*exp(-3.88*r),
       Ci*exp(-2.85*r), Ci*exp(-2.56*r), Ci*exp(-3.1

In [22]:
print(initial_state_floats[basis_coefficient_range[0]:basis_coefficient_range[1]])
print(basis_coefficient_range)

[[ 5.40e+00  5.00e+00  8.00e+00  7.10e-03  4.10e+00  4.40e-04  2.85e+00
   3.25e+00  7.00e-01]
 [ 1.90e+00  3.00e+00  5.70e+01 -4.80e-03  2.20e-02 -9.50e-02  2.56e+00
   9.58e+00  2.98e+00]
 [ 2.70e+00  3.38e+00  5.70e+01 -1.10e-04  9.10e-03  2.28e-01  3.19e+00
   7.43e+00  9.70e-01]]
(3, 6)


NameError: name 'maximum_s_el' is not defined

In [71]:
mat = sympy.Matrix([[1,5,3,-3], [0,1,-1,8], [0,0,0,3], [0,0,1,1]])
print(mat.rref())

(Matrix([
[1, 0, 0, 0],
[0, 1, 0, 0],
[0, 0, 1, 0],
[0, 0, 0, 1]]), (0, 1, 2, 3))


In [73]:
help(sympy.Matrix.rref)

Help on function rref in module sympy.matrices.matrices:

rref(self, iszerofunc=<function _iszero at 0x28ed23010>, simplify=False, pivots=True, normalize_last=True)
    Return reduced row-echelon form of matrix and indices of pivot vars.
    
    Parameters
    
    iszerofunc : Function
        A function used for detecting whether an element can
        act as a pivot.  ``lambda x: x.is_zero`` is used by default.
    
    simplify : Function
        A function used to simplify elements when looking for a pivot.
        By default SymPy's ``simplify`` is used.
    
    pivots : True or False
        If ``True``, a tuple containing the row-reduced matrix and a tuple
        of pivot columns is returned.  If ``False`` just the row-reduced
        matrix is returned.
    
    normalize_last : True or False
        If ``True``, no pivots are normalized to `1` until after all
        entries above and below each pivot are zeroed.  This means the row
        reduction algorithm is fraction 