In [1]:
import sys
import warnings
warnings.filterwarnings("ignore")
import numpy as np
np.set_printoptions(threshold=sys.maxsize) #print out full arrays
import pandas as pd

randomstate = 42

from rdkit import Chem
from rdkit.Chem.Draw import IPythonConsole

import get_properties_functions_for_WI as gp

import sys

In [2]:
prefix = "pyrdz"

file_name = prefix + "_atom_map.xlsx"

atom_map_df = pd.read_excel(
    file_name, "Sheet1", index_col=0, header=0, engine="openpyxl"
)

display(atom_map_df.head())

df = atom_map_df  # df is what properties will be appended to, this creates a copy so that you have the original preserved

Unnamed: 0,log_name,C2,C3,C4,N1,N2,C1
0,pyrdz1_conf-1,C16,C17,C4,N5,N6,C7
1,pyrdz1_conf-2,C16,C17,C4,N5,N6,C7
2,pyrdz1_conf-3,C16,C17,C4,N5,N6,C7
3,pyrdz1_conf-4,C16,C17,C4,N5,N6,C7


In [3]:
# this box has functions to choose from
df = atom_map_df

# ---------------GoodVibes Engergies---------------
# uses the GoodVibes 2021 Branch (Jupyter Notebook Compatible)
# calculates the quasi harmonic corrected G(T) and single point corrected G(T) as well as other thermodynamic properties
# inputs: dataframe, temperature
df = gp.get_goodvibes_e(df, 298.15)

# ---------------Frontier Orbitals-----------------
# E(HOMO), E(LUMO), mu(chemical potential or negative of molecular electronegativity), eta(hardness/softness), omega(electrophilicity index)
df = gp.get_frontierorbs(df)

# ---------------Polarizability--------------------
# Exact polarizability
df = gp.get_polarizability(df)

# ---------------Dipole----------------------------
# Total dipole moment magnitude in Debye
df = gp.get_dipole(df)

# ---------------Volume----------------------------
# Molar volume
# requires the Gaussian keyword = "volume" in the .com file
df = gp.get_volume(df)

# ---------------SASA------------------------------
# Uses morfeus to calculat sovlent accessible surface area and the volume under the SASA
df = gp.get_SASA(df)

# ---------------NBO-------------------------------
# natural charge from NBO
# requires the Gaussian keyword = "pop=nbo7" in the .com file
nbo_list = ["C1", "C2", "C3", "C4", "N1", "N2"]
df = gp.get_nbo(df, nbo_list)

# ---------------NMR-------------------------------
# isotropic NMR shift
# requires the Gaussian keyword = "nmr=giao" in the .com file
nmr_list = ["C1", "C2", "C3", "C4", "N1", "N2"]
df = gp.get_nmr(df, nmr_list)

# ---------------Distance--------------------------
# distance between 2 atoms
dist_list_of_lists = [["N1", "C4"], ["N2", "C1"], ["N1", "N2"]]
df = gp.get_distance(df, dist_list_of_lists)

# ---------------Angle-----------------------------
# angle between 3 atoms
angle_list_of_lists = [["C4", "N1", "N2"], ["N1", "N2", "C1"]]
df = gp.get_angles(df, angle_list_of_lists)

# ---------------Dihedral--------------------------
# dihedral angle between 4 atoms
dihedral_list_of_lists = [
    ["C3", "C4", "N1", "N2"],
    ["C2", "C1", "N2", "N2"],
    ["C4", "N1", "N2", "C1"],
]
df = gp.get_dihedral(df, dihedral_list_of_lists)

# ---------------Vbur Scan-------------------------
# uses morfeus to calculate the buried volume at a series of radii (including hydrogens)
# inputs: dataframe, list of atoms, start_radius, end_radius, and step_size
# if you only want a single radius, put the same value for start_radius and end_radius (keep step_size > 0)
vbur_list = ["C1", "C2", "C3", "C4", "N1", "N2"]
df = gp.get_vbur_scan(df, vbur_list, 2, 4, 0.5)

# ---------------Sterimol morfeus------------------
# uses morfeus to calculate Sterimol L, B1, and B5 values
# NOTE: this is much faster than the corresponding DBSTEP function (recommendation: use as default/if you don't need Sterimol2Vec)
sterimol_list_of_lists = [["N1", "C4"], ["N2", "C1"], ["N1", "N2"]]
df = gp.get_sterimol_morfeus(df, sterimol_list_of_lists)

# ---------------Buried Sterimol-------------------
# uses morfeus to calculate Sterimol L, B1, and B5 values within a given sphere of radius r_buried
# atoms outside the sphere + 0.5 vdW radius are deleted and the Sterimol vectors are calculated
# for more information: https://kjelljorner.github.io/morfeus/sterimol.html
# inputs: dataframe, list of atom pairs, r_buried
sterimol_list_of_lists = [["N1", "C4"], ["N2", "C1"], ["N1", "N2"]]
df = gp.get_buried_sterimol(df, sterimol_list_of_lists, 5.5)

# ---------------Sterimol DBSTEP-------------------
# uses DBSTEP to calculate Sterimol L, B1, and B5 values
# default grid point spacing (0.05 Angstrom) is used (can use custom spacing or vdw radii in the get_properties_functions script)
# more info here: https://github.com/patonlab/DBSTEP
# NOTE: this takes longer than the morfeus function (recommendation: only use this if you need Sterimol2Vec)
sterimol_list_of_lists = [["N1", "C4"], ["N2", "C1"], ["N1", "N2"]]
df = gp.get_sterimol_dbstep(df, sterimol_list_of_lists)

# ---------------Sterimol2Vec----------------------
# uses DBSTEP to calculate Sterimol Bmin and Bmax values at intervals from 0 to end_radius, with a given step_size
# default grid point spacing (0.05 Angstrom) is used (can use custom spacing or vdw radii in the get_properties_functions script)
# more info here: https://github.com/patonlab/DBSTEP
# inputs: dataframe, list of atom pairs, end_radius, and step_size
sterimol2vec_list_of_lists = [["N1", "C4"], ["N2", "C1"], ["N1", "N2"]]
df = gp.get_sterimol2vec(df, sterimol2vec_list_of_lists, 1, 1.0)

# ---------------Pyramidalization------------------
# uses morfeus to calculate pyramidalization based on the 3 atoms in closest proximity to the defined atom
# collects values based on two definitions of pyramidalization
# details on these values can be found here: https://kjelljorner.github.io/morfeus/pyramidalization.html
pyr_list = ["C1", "C2", "C3", "C4", "N1", "N2"]
df = gp.get_pyramidalization(df, pyr_list)

# ---------------Plane Angle-----------------------
# plane angle between 2 planes (each defined by 6 atoms)
# planeangle_list_of_lists = [["N1", "C1", "C5"], ["C2", "C3", "C4"]]
# df = gp.get_planeangle(df, planeangle_list_of_lists)

# --------------LP energy - custom from first cell---------------
lp_list = ["N1", "N2"]
df = gp.get_one_lp_energy(df, lp_list)

# ---------------Time----------------------------------
# returns the total CPU time and total Wall time (not per subjob) because we are pioneers
# if used in summary df, will give the average (not Boltzmann average) in the Boltzmann average column
df = gp.get_time(df)

# ---------------ChelpG----------------------------
# ChelpG ESP charge
# requires the Gaussian keyword = "pop=chelpg" in the .com file
a_list = ["C1", "C2", "C3", "C4", "N1", "N2"]
df = gp.get_chelpg(df, a_list)

# ---------------Hirshfeld-------------------------
# Hirshfeld charge, CM5 charge, Hirshfeld atom dipole
# requires the Gaussian keyword = "pop=hirshfeld" in the .com file
a_list = ["C1", "C2", "C3", "C4", "N1", "N2"]
df = gp.get_hirshfeld(df, a_list)

pd.options.display.max_columns = None
display(df)

Goodvibes function has completed
Frontier orbitals function has completed
Polarizability function has completed
Dipole function has completed
Volume function has completed
SASA function has completed
NBO function has completed for ['C1', 'C2', 'C3', 'C4', 'N1', 'N2']
NMR function has completed for ['C1', 'C2', 'C3', 'C4', 'N1', 'N2']
Distance function has completed for [['N1', 'C4'], ['N2', 'C1'], ['N1', 'N2']]
Angles function has completed for [['C4', 'N1', 'N2'], ['N1', 'N2', 'C1']]
Dihedral function has completed for [['C3', 'C4', 'N1', 'N2'], ['C2', 'C1', 'N2', 'N2'], ['C4', 'N1', 'N2', 'C1']]
Vbur scan function has completed for ['C1', 'C2', 'C3', 'C4', 'N1', 'N2'] from 2 to 4
Morfeus Sterimol function has completed for [['N1', 'C4'], ['N2', 'C1'], ['N1', 'N2']]
Morfeus Buried Sterimol function has completed for [['N1', 'C4'], ['N2', 'C1'], ['N1', 'N2']]
   pyrdz1_conf-1.log / Bmin:  2.99 / Bmax:  9.35 / L:  4.20
   pyrdz1_conf-1.log / Bmin:  2.75 / Bmax:  7.86 / L:  5.95
   pyrdz

Unnamed: 0,log_name,C2,C3,C4,N1,N2,C1,E_spc (Hartree),ZPE(Hartree),H_spc(Hartree),T*S,T*qh_S,G(T)_spc(Hartree),qh_G(T)_spc(Hartree),T,HOMO,LUMO,μ,η,ω,polar_iso(Debye),polar_aniso(Debye),dipole(Debye),volume(Bohr_radius³/mol),SASA_surface_area(Å²),SASA_volume(Å³),SASA_sphericity,NBO_charge_C1,NBO_charge_C2,NBO_charge_C3,NBO_charge_C4,NBO_charge_N1,NBO_charge_N2,NMR_shift_C1,NMR_shift_C2,NMR_shift_C3,NMR_shift_C4,NMR_shift_N1,NMR_shift_N2,distance_N1_C4(Å),distance_N2_C1(Å),distance_N1_N2(Å),angle_C4_N1_N2(°),angle_N1_N2_C1(°),dihedral_C3_C4_N1_N2(°),dihedral_C2_C1_N2_N2(°),dihedral_C4_N1_N2_C1(°),%Vbur_C1_2.0Å,%Vbur_C2_2.0Å,%Vbur_C3_2.0Å,%Vbur_C4_2.0Å,%Vbur_N1_2.0Å,%Vbur_N2_2.0Å,%Vbur_C1_2.5Å,%Vbur_C2_2.5Å,%Vbur_C3_2.5Å,%Vbur_C4_2.5Å,%Vbur_N1_2.5Å,%Vbur_N2_2.5Å,%Vbur_C1_3.0Å,%Vbur_C2_3.0Å,%Vbur_C3_3.0Å,%Vbur_C4_3.0Å,%Vbur_N1_3.0Å,%Vbur_N2_3.0Å,%Vbur_C1_3.5Å,%Vbur_C2_3.5Å,%Vbur_C3_3.5Å,%Vbur_C4_3.5Å,%Vbur_N1_3.5Å,%Vbur_N2_3.5Å,%Vbur_C1_4.0Å,%Vbur_C2_4.0Å,%Vbur_C3_4.0Å,%Vbur_C4_4.0Å,%Vbur_N1_4.0Å,%Vbur_N2_4.0Å,Sterimol_L_N1_C4(Å)_morfeus,Sterimol_B1_N1_C4(Å)_morfeus,Sterimol_B5_N1_C4(Å)_morfeus,Sterimol_L_N2_C1(Å)_morfeus,Sterimol_B1_N2_C1(Å)_morfeus,Sterimol_B5_N2_C1(Å)_morfeus,Sterimol_L_N1_N2(Å)_morfeus,Sterimol_B1_N1_N2(Å)_morfeus,Sterimol_B5_N1_N2(Å)_morfeus,Buried_Sterimol_L_N1_C4_5.0(Å),Buried_Sterimol_B1_N1_C4_5.0(Å),Buried_Sterimol_B5_N1_C4_5.0(Å),Buried_Sterimol_L_N2_C1_5.0(Å),Buried_Sterimol_B1_N2_C1_5.0(Å),Buried_Sterimol_B5_N2_C1_5.0(Å),Buried_Sterimol_L_N1_N2_5.0(Å),Buried_Sterimol_B1_N1_N2_5.0(Å),Buried_Sterimol_B5_N1_N2_5.0(Å),Sterimol_B1_N1_C4(Å)_dbstep,Sterimol_B5_N1_C4(Å)_dbstep,Sterimol_L_N1_C4(Å)_dbstep,Sterimol_B1_N2_C1(Å)_dbstep,Sterimol_B5_N2_C1(Å)_dbstep,Sterimol_L_N2_C1(Å)_dbstep,Sterimol_B1_N1_N2(Å)_dbstep,Sterimol_B5_N1_N2(Å)_dbstep,Sterimol_L_N1_N2(Å)_dbstep,Sterimol_Bmin_N1_C4_0.0Å(Å),Sterimol_Bmax_N1_C4_0.0Å(Å),Sterimol_Bmin_N1_C4_1.0Å(Å),Sterimol_Bmax_N1_C4_1.0Å(Å),Sterimol_Bmin_N2_C1_0.0Å(Å),Sterimol_Bmax_N2_C1_0.0Å(Å),Sterimol_Bmin_N2_C1_1.0Å(Å),Sterimol_Bmax_N2_C1_1.0Å(Å),Sterimol_Bmin_N1_N2_0.0Å(Å),Sterimol_Bmax_N1_N2_0.0Å(Å),Sterimol_Bmin_N1_N2_1.0Å(Å),Sterimol_Bmax_N1_N2_1.0Å(Å),pyramidalization_Gavrish_C1(°),pyramidalization_Agranat-Radhakrishnan_C1,pyramidalization_Gavrish_C2(°),pyramidalization_Agranat-Radhakrishnan_C2,pyramidalization_Gavrish_C3(°),pyramidalization_Agranat-Radhakrishnan_C3,pyramidalization_Gavrish_C4(°),pyramidalization_Agranat-Radhakrishnan_C4,pyramidalization_Gavrish_N1(°),pyramidalization_Agranat-Radhakrishnan_N1,pyramidalization_Gavrish_N2(°),pyramidalization_Agranat-Radhakrishnan_N2,NBO_LP_occupancy_N1,NBO_LP_energy_N1,NBO_LP_occupancy_N2,NBO_LP_energy_N2,CPU_time_total(hours),Wall_time_total(hours),ChelpG_charge_C1,ChelpG_charge_C2,ChelpG_charge_C3,ChelpG_charge_C4,ChelpG_charge_N1,ChelpG_charge_N2,Hirsh_charge_C1,Hirsh_CM5_charge_C1,Hirsh_atom_dipole_C1,Hirsh_charge_C2,Hirsh_CM5_charge_C2,Hirsh_atom_dipole_C2,Hirsh_charge_C3,Hirsh_CM5_charge_C3,Hirsh_atom_dipole_C3,Hirsh_charge_C4,Hirsh_CM5_charge_C4,Hirsh_atom_dipole_C4,Hirsh_charge_N1,Hirsh_CM5_charge_N1,Hirsh_atom_dipole_N1,Hirsh_charge_N2,Hirsh_CM5_charge_N2,Hirsh_atom_dipole_N2
0,pyrdz1_conf-1,C16,C17,C4,N5,N6,C7,-723.266926,0.223195,-723.027121,0.062668,0.059724,-723.089789,-723.086846,298.15,-0.28789,-0.03793,-0.16291,0.24996,0.05309,200.979,205.946,2.7605,2558.739,467.943546,722.480854,0.832099,0.13315,-0.18589,-0.20864,0.1881,-0.2195,-0.20526,5.5557,39.1191,41.6092,-11.9697,-283.3194,-282.4306,1.3422,1.34655,1.32316,120.578,121.007,0.63,,0.265,95.75155,92.697572,92.41671,96.797521,80.878745,81.714876,86.734926,79.251856,77.427074,89.930329,72.068303,71.129053,76.032665,66.550581,64.177034,80.180088,63.97497,61.410533,64.506266,56.051994,54.637416,68.196166,57.357086,54.066693,53.587611,47.678217,47.185541,55.247191,50.853842,47.966064,4.653025,3.007906,9.35062,6.399445,2.724532,7.895892,10.772926,2.199199,4.76549,4.653025,2.925634,6.562661,5.987262,2.545027,5.763776,7.519437,2.058946,4.76549,2.988738,9.350134,4.2,2.749605,7.862888,5.95,2.182242,4.762877,10.35,2.4051,7.985299,2.709329,7.34932,2.175623,5.71358,1.69338,6.503076,1.5,4.39659,1.5,4.39659,0.119621,0.017926,0.039513,0.005944,0.031292,0.00471,0.016907,0.002539,10.901543,1.998024,10.862106,1.998023,1.93343,-0.41525,1.93343,-0.41525,6.9478,0.440417,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data
1,pyrdz1_conf-2,C16,C17,C4,N5,N6,C7,-723.266441,0.223123,-723.026679,0.062866,0.059829,-723.089545,-723.086508,298.15,-0.28805,-0.03885,-0.16345,0.2492,0.0536,201.175,210.665,2.9527,2016.375,466.31336,720.897059,0.833787,0.13716,-0.18765,-0.21319,0.19005,-0.21232,-0.20989,4.7069,38.8721,43.507,-11.8179,-273.7653,-279.2271,1.34124,1.34398,1.32668,120.411,121.11,0.61,,0.24,95.748321,92.671746,93.094654,96.629649,78.31547,81.656767,86.694231,79.27139,80.581781,89.645462,67.026957,71.135565,76.023354,66.564549,68.6783,79.838349,58.031324,60.95333,64.486466,56.80267,59.280074,67.882268,51.667909,52.391796,53.413429,49.058485,51.282121,55.102492,46.0648,45.792859,5.188345,3.024503,9.360483,6.399322,2.70855,7.893747,10.776268,1.744485,4.760526,5.188345,2.949658,6.564054,5.985414,2.530168,6.272281,7.521867,1.744485,4.760526,3.00185,9.352139,4.75,2.725231,7.862888,5.95,1.733669,4.759464,10.35,1.65,7.99015,2.703462,7.283715,2.159032,6.390814,1.992208,6.501731,1.5,4.405962,1.5,4.405962,0.123107,0.018451,0.039949,0.00601,0.031361,0.00472,0.016117,0.002421,10.916822,1.998216,10.852619,1.998211,1.93246,-0.41385,1.93246,-0.41385,6.68842,0.424417,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data
2,pyrdz1_conf-3,C16,C17,C4,N5,N6,C7,-723.262575,0.222897,-723.023027,0.062633,0.059752,-723.085661,-723.08278,298.15,-0.2907,-0.0304,-0.16055,0.2603,0.04951,197.83,193.223,4.5491,2416.027,472.013304,727.6385,0.828846,0.13218,-0.19344,-0.21083,0.18897,-0.21985,-0.17966,1.2936,40.8535,41.6658,-11.5883,-289.9734,-300.7194,1.34241,1.34483,1.32613,120.906,120.001,0.362,,0.488,95.990444,92.791193,92.37797,96.758781,80.994964,80.478435,87.9623,79.015822,77.428702,89.969397,72.187134,70.479555,77.513223,65.690182,64.136062,80.226646,64.288774,61.420776,65.531238,55.279771,54.658381,68.239843,57.714079,54.950731,54.324297,47.16498,47.069161,55.544349,51.367466,49.473574,4.660496,3.141164,9.636924,6.658315,2.913979,7.696913,10.794914,1.948517,4.429278,4.660496,3.074873,6.583035,5.883204,2.880466,5.786044,7.676185,1.948517,4.429278,3.12433,9.606378,4.25,2.908788,7.663224,6.25,1.93929,4.409649,10.35,2.570378,7.874802,2.609557,7.169554,1.852002,5.508176,1.954323,5.724072,1.52028,4.3909,1.52028,4.409649,0.096493,0.014507,0.096111,0.014458,0.067612,0.010173,0.020225,0.003038,10.871361,1.996322,10.954311,1.996342,1.93689,-0.41276,1.93689,-0.41276,6.47475,0.410611,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data
3,pyrdz1_conf-4,C16,C17,C4,N5,N6,C7,-723.262021,0.222845,-723.022504,0.062791,0.059842,-723.085295,-723.082346,298.15,-0.29077,-0.03141,-0.16109,0.25936,0.05003,197.971,196.952,4.7294,2014.878,470.603121,726.43616,0.830413,0.13597,-0.19507,-0.21517,0.19084,-0.21237,-0.18413,0.3576,40.6446,43.1976,-11.2698,-279.9666,-297.8096,1.34136,1.34202,1.32991,120.734,120.11,0.368,,0.472,95.993673,92.729855,93.004261,96.519886,78.360666,80.549458,87.9623,79.015822,80.570387,89.627556,67.124626,70.533273,77.469458,65.709736,68.625223,79.856973,58.312537,60.943087,65.532403,56.064806,59.309192,67.910804,51.981224,53.275834,54.166408,48.551456,51.15643,55.382192,46.538468,47.284464,5.185945,3.149059,9.64982,6.666239,2.90534,7.691694,10.799663,1.55,4.427446,5.185945,3.08592,6.592372,5.887237,2.870378,6.281237,7.681508,1.55,4.427446,3.137404,9.60833,4.75,2.910846,7.663224,6.25,1.531473,4.414748,10.35,1.657847,7.874802,2.627558,7.218379,1.829956,6.428841,2.306343,6.126377,1.52028,4.40284,1.52028,4.414748,0.096007,0.014435,0.097274,0.014633,0.068113,0.010249,0.022137,0.003325,10.887163,1.996447,10.944361,1.996459,1.93592,-0.41137,1.93592,-0.41137,6.07722,0.384972,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data


## Save collected properties to Excel

Helpful to save here in case the Notebook crashes or if you want to add more properties before post-processsing. Can be read in at 5.1.1.

In [4]:
writer = pd.ExcelWriter(prefix + "_extracted_properties.xlsx", engine="xlsxwriter")
df.to_excel(writer)
writer.close()

In [5]:
# save the pandas dataframe to a pickle file
df.to_pickle(prefix + "_extracted_properties.pkl")