In [4]:

import os,re,sys,pickle,datetime,time,random,itertools,glob
from itertools import permutations
import warnings
warnings.filterwarnings("ignore")
import numpy as np
np.set_printoptions(threshold=sys.maxsize) #print out full arrays
import openpyxl
from openpyxl import load_workbook
import pandas as pd
from pandas import ExcelWriter
import xlsxwriter

import math
randomstate = 42

from rdkit import Chem
from rdkit.Chem.Draw import IPythonConsole

import goodvibes.GoodVibes as gv
import goodvibes.thermo as thermo
import goodvibes.io as io
import goodvibes.pes as pes
from morfeus import ConeAngle
from morfeus import Sterimol
import get_properties_functions_for_WI as gp

import matplotlib.pyplot as plt
from matplotlib.colors import ColorConverter

import ipywidgets as widgets
import importlib

In [2]:
file_name = "pyrd_atom_map.xlsx"

atom_map_df = pd.read_excel(
    file_name, "Sheet1", index_col=0, header=0, engine="openpyxl"
)

display(atom_map_df.head())

df = atom_map_df  # df is what properties will be appended to, this creates a copy so that you have the original preserved

Unnamed: 0,log_name,C3,C4,C5,N1,C1,C2
0,pyrd10_conf-1,C10,C5,C6,N7,C8,C9
1,pyrd10_conf-10,C10,C5,C6,N7,C8,C9
2,pyrd10_conf-11,C10,C5,C6,N7,C8,C9
3,pyrd10_conf-12,C10,C9,C8,N7,C6,C5
4,pyrd10_conf-13,C10,C5,C6,N7,C8,C9


In [3]:
#this box has functions to choose from
df = atom_map_df

#---------------GoodVibes Engergies---------------
#uses the GoodVibes 2021 Branch (Jupyter Notebook Compatible)
#calculates the quasi harmonic corrected G(T) and single point corrected G(T) as well as other thermodynamic properties
#inputs: dataframe, temperature
df = gp.get_goodvibes_e(df, 298.15)

#---------------Frontier Orbitals-----------------
#E(HOMO), E(LUMO), mu(chemical potential or negative of molecular electronegativity), eta(hardness/softness), omega(electrophilicity index)
df = gp.get_frontierorbs(df)

#---------------Polarizability--------------------
#Exact polarizability
df = gp.get_polarizability(df)

#---------------Dipole----------------------------
#Total dipole moment magnitude in Debye
df = gp.get_dipole(df)

#---------------Volume----------------------------
#Molar volume
#requires the Gaussian keyword = "volume" in the .com file
df = gp.get_volume(df)

#---------------SASA------------------------------
#Uses morfeus to calculat sovlent accessible surface area and the volume under the SASA
df = gp.get_SASA(df)

#---------------NBO-------------------------------
#natural charge from NBO
#requires the Gaussian keyword = "pop=nbo7" in the .com file
nbo_list = ["C1", "C2", "C3", "C4", "C5", "N1"]
df = gp.get_nbo(df, nbo_list)

#---------------NMR-------------------------------
#isotropic NMR shift
#requires the Gaussian keyword = "nmr=giao" in the .com file
nmr_list = ["C1", "C2", "C3", "C4", "C5", "N1"]
df = gp.get_nmr(df, nmr_list)

#---------------Distance--------------------------
#distance between 2 atoms
dist_list_of_lists = [["N1", "C1"], ["N1", "C5"]]
df = gp.get_distance(df, dist_list_of_lists)

#---------------Angle-----------------------------
#angle between 3 atoms
angle_list_of_lists = [["C5", "N1","C1"]]
df = gp.get_angles(df, angle_list_of_lists)

#---------------Dihedral--------------------------
#dihedral angle between 4 atoms
dihedral_list_of_lists = [["C4", "C5", "N1", "C1"]]
df = gp.get_dihedral(df, dihedral_list_of_lists)

#---------------Vbur Scan-------------------------
#uses morfeus to calculate the buried volume at a series of radii (including hydrogens)
#inputs: dataframe, list of atoms, start_radius, end_radius, and step_size
#if you only want a single radius, put the same value for start_radius and end_radius (keep step_size > 0)
vbur_list = ["C1", "C2", "C3", "C4", "C5", "N1"]
df = gp.get_vbur_scan(df, vbur_list, 2, 4, 0.5)

#---------------Sterimol morfeus------------------
#uses morfeus to calculate Sterimol L, B1, and B5 values
#NOTE: this is much faster than the corresponding DBSTEP function (recommendation: use as default/if you don't need Sterimol2Vec)
sterimol_list_of_lists = [["N1", "C1"], ["N1", "C5"]]
df = gp.get_sterimol_morfeus(df, sterimol_list_of_lists)

#---------------Buried Sterimol-------------------
#uses morfeus to calculate Sterimol L, B1, and B5 values within a given sphere of radius r_buried
#atoms outside the sphere + 0.5 vdW radius are deleted and the Sterimol vectors are calculated
#for more information: https://kjelljorner.github.io/morfeus/sterimol.html
#inputs: dataframe, list of atom pairs, r_buried
sterimol_list_of_lists = [["N1", "C1"], ["N1", "C5"]]
df = gp.get_buried_sterimol(df, sterimol_list_of_lists, 5.5)

#---------------Sterimol DBSTEP-------------------
#uses DBSTEP to calculate Sterimol L, B1, and B5 values
#default grid point spacing (0.05 Angstrom) is used (can use custom spacing or vdw radii in the get_properties_functions script)
#more info here: https://github.com/patonlab/DBSTEP
#NOTE: this takes longer than the morfeus function (recommendation: only use this if you need Sterimol2Vec)
sterimol_list_of_lists = [["N1", "C1"]]
df = gp.get_sterimol_dbstep(df, sterimol_list_of_lists)

#---------------Sterimol2Vec----------------------
#uses DBSTEP to calculate Sterimol Bmin and Bmax values at intervals from 0 to end_radius, with a given step_size
#default grid point spacing (0.05 Angstrom) is used (can use custom spacing or vdw radii in the get_properties_functions script)
#more info here: https://github.com/patonlab/DBSTEP
#inputs: dataframe, list of atom pairs, end_radius, and step_size
sterimol2vec_list_of_lists = [["N1", "C5"], ["N1", "C1"]]
df = gp.get_sterimol2vec(df, sterimol2vec_list_of_lists, 1, 1.0)

#---------------Pyramidalization------------------
#uses morfeus to calculate pyramidalization based on the 3 atoms in closest proximity to the defined atom
#collects values based on two definitions of pyramidalization
#details on these values can be found here: https://kjelljorner.github.io/morfeus/pyramidalization.html
pyr_list = ["C1", "C2", "C3", "C4", "C5", "N1"]
df = gp.get_pyramidalization(df, pyr_list)

#---------------Plane Angle-----------------------
#plane angle between 2 planes (each defined by 3 atoms)
planeangle_list_of_lists = [["N1", "C1", "C5"], ["C2", "C3", "C4"]]
df = gp.get_planeangle(df, planeangle_list_of_lists)

#--------------LP energy - custom from first cell---------------
lp_list = ["N1"]
df = gp.get_one_lp_energy(df, lp_list)

#---------------Time----------------------------------
#returns the total CPU time and total Wall time (not per subjob) because we are pioneers
#if used in summary df, will give the average (not Boltzmann average) in the Boltzmann average column
df = gp.get_time(df)

#---------------ChelpG----------------------------
#ChelpG ESP charge 
#requires the Gaussian keyword = "pop=chelpg" in the .com file
a_list = ['C1']
df = gp.get_chelpg(df, a_list)

#---------------Hirshfeld-------------------------
#Hirshfeld charge, CM5 charge, Hirshfeld atom dipole
#requires the Gaussian keyword = "pop=hirshfeld" in the .com file
a_list = ['C1']
df = gp.get_hirshfeld(df, a_list)

pd.options.display.max_columns = None
display(df)

Frontier orbitals function has completed
Polarizability function has completed
Dipole function has completed
Volume function has completed
SASA function has completed
NBO function has completed for ['C1', 'C2', 'C3', 'C4', 'C5', 'N1']
NMR function has completed for ['C1', 'C2', 'C3', 'C4', 'C5', 'N1']
****Unable to acquire distance for: pyrd10_conf-1.log
****Unable to acquire distance for: pyrd10_conf-10.log
****Unable to acquire distance for: pyrd10_conf-11.log
****Unable to acquire distance for: pyrd10_conf-12.log
****Unable to acquire distance for: pyrd10_conf-13.log
****Unable to acquire distance for: pyrd10_conf-2.log
****Unable to acquire distance for: pyrd10_conf-3.log
****Unable to acquire distance for: pyrd10_conf-4.log
****Unable to acquire distance for: pyrd10_conf-5.log
****Unable to acquire distance for: pyrd10_conf-6.log
****Unable to acquire distance for: pyrd10_conf-7.log
****Unable to acquire distance for: pyrd10_conf-8.log
****Unable to acquire distance for: pyrd10_co

Unnamed: 0,log_name,C3,C4,C5,N1,C1,C2,HOMO,LUMO,μ,η,ω,polar_iso(Debye),polar_aniso(Debye),dipole(Debye),volume(Bohr_radius³/mol),SASA_surface_area(Å²),SASA_volume(Å³),SASA_sphericity,NBO_charge_C1,NBO_charge_C2,NBO_charge_C3,NBO_charge_C4,NBO_charge_C5,NBO_charge_N1,NMR_shift_C1,NMR_shift_C2,NMR_shift_C3,NMR_shift_C4,NMR_shift_C5,NMR_shift_N1,%Vbur_C1_2.0Å,%Vbur_C2_2.0Å,%Vbur_C3_2.0Å,%Vbur_C4_2.0Å,%Vbur_C5_2.0Å,%Vbur_N1_2.0Å,%Vbur_C1_2.5Å,%Vbur_C2_2.5Å,%Vbur_C3_2.5Å,%Vbur_C4_2.5Å,%Vbur_C5_2.5Å,%Vbur_N1_2.5Å,%Vbur_C1_3.0Å,%Vbur_C2_3.0Å,%Vbur_C3_3.0Å,%Vbur_C4_3.0Å,%Vbur_C5_3.0Å,%Vbur_N1_3.0Å,%Vbur_C1_3.5Å,%Vbur_C2_3.5Å,%Vbur_C3_3.5Å,%Vbur_C4_3.5Å,%Vbur_C5_3.5Å,%Vbur_N1_3.5Å,%Vbur_C1_4.0Å,%Vbur_C2_4.0Å,%Vbur_C3_4.0Å,%Vbur_C4_4.0Å,%Vbur_C5_4.0Å,%Vbur_N1_4.0Å,Sterimol_L_N1_C1(Å)_morfeus,Sterimol_B1_N1_C1(Å)_morfeus,Sterimol_B5_N1_C1(Å)_morfeus,Sterimol_L_N1_C5(Å)_morfeus,Sterimol_B1_N1_C5(Å)_morfeus,Sterimol_B5_N1_C5(Å)_morfeus,Buried_Sterimol_L_N1_C1_5.0(Å),Buried_Sterimol_B1_N1_C1_5.0(Å),Buried_Sterimol_B5_N1_C1_5.0(Å),Buried_Sterimol_L_N1_C5_5.0(Å),Buried_Sterimol_B1_N1_C5_5.0(Å),Buried_Sterimol_B5_N1_C5_5.0(Å),pyramidalization_Gavrish_C1(°),pyramidalization_Agranat-Radhakrishnan_C1,pyramidalization_Gavrish_C2(°),pyramidalization_Agranat-Radhakrishnan_C2,pyramidalization_Gavrish_C3(°),pyramidalization_Agranat-Radhakrishnan_C3,pyramidalization_Gavrish_C4(°),pyramidalization_Agranat-Radhakrishnan_C4,pyramidalization_Gavrish_C5(°),pyramidalization_Agranat-Radhakrishnan_C5,pyramidalization_Gavrish_N1(°),pyramidalization_Agranat-Radhakrishnan_N1,NBO_charge_N1.1
0,pyrd10_conf-1,C10,C5,C6,N7,C8,C9,-0.31256,-0.0024,-0.15748,0.31016,0.03998,115.274,54.7674,3.9559,1402.36,332.622291,490.744031,0.904557,0.02305,-0.25205,-0.1787,-0.06744,0.0228,-0.41008,12.3385,41.5624,26.7586,27.2832,8.36,-155.3399,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data
1,pyrd10_conf-10,C10,C5,C6,N7,C8,C9,-0.31435,-0.00487,-0.15961,0.30948,0.04116,118.136,59.8668,2.5503,1556.174,351.152486,509.060557,0.878014,0.02423,-0.24936,-0.17638,-0.05899,0.03183,-0.40809,12.0333,41.7045,26.8954,26.2665,10.7585,-156.0897,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data
2,pyrd10_conf-11,C10,C5,C6,N7,C8,C9,-0.31568,-0.00515,-0.160415,0.31053,0.04143,117.866,77.3913,3.7713,1428.498,349.012508,505.510645,0.879286,0.02503,-0.24973,-0.16989,-0.05802,0.02177,-0.40802,12.5514,41.6143,25.798,28.669,14.6598,-155.5752,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data
3,pyrd10_conf-12,C10,C9,C8,N7,C6,C5,-0.30007,0.00107,-0.1495,0.30114,0.03711,112.983,40.5954,3.6925,1360.037,326.071908,486.759133,0.917727,0.03432,-0.0605,-0.17405,-0.24856,0.0249,-0.40999,9.6367,27.3206,26.3592,42.1065,11.976,-154.4481,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data
4,pyrd10_conf-13,C10,C5,C6,N7,C8,C9,-0.30025,0.00406,-0.148095,0.30431,0.03604,112.885,40.264,3.8187,1385.241,326.441895,487.458728,0.917565,0.02239,-0.24885,-0.17453,-0.06088,0.03665,-0.4094,12.7929,41.7467,25.0846,27.0522,9.5352,-153.2876,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data
5,pyrd10_conf-2,C10,C5,C6,N7,C8,C9,-0.31278,-0.00182,-0.1573,0.31096,0.03979,115.041,51.3399,1.2501,1319.042,332.252283,490.460294,0.905216,0.0228,-0.2518,-0.1853,-0.06855,0.02984,-0.41074,12.3992,42.6848,24.8273,27.5887,10.0154,-155.703,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data
6,pyrd10_conf-3,C10,C5,C6,N7,C8,C9,-0.31572,-0.00873,-0.162225,0.30699,0.04286,116.186,40.996,2.6361,1259.341,343.092394,502.027671,0.890345,0.02462,-0.2479,-0.17835,-0.06652,0.02655,-0.40826,11.608,41.3863,27.1871,26.9238,10.0046,-155.2305,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data
7,pyrd10_conf-4,C10,C5,C6,N7,C8,C9,-0.3121,-0.0021,-0.1571,0.31,0.03981,116.613,62.9578,1.1396,1376.219,345.432443,504.026205,0.886659,0.02277,-0.25013,-0.17611,-0.05856,0.03011,-0.41,12.7871,41.8104,26.4253,25.0232,10.2697,-155.3274,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data
8,pyrd10_conf-5,C10,C5,C6,N7,C8,C9,-0.312,-0.00211,-0.157055,0.30989,0.0398,116.685,66.1732,3.7145,1579.731,345.722444,504.417478,0.886373,0.02249,-0.25115,-0.17815,-0.05861,0.03258,-0.40865,12.8009,41.4801,26.6847,25.0692,9.6897,-156.541,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data
9,pyrd10_conf-6,C10,C5,C6,N7,C8,C9,-0.31553,-0.0072,-0.161365,0.30833,0.04223,116.244,41.7352,2.9331,1379.845,343.112399,502.232049,0.890534,0.02327,-0.25048,-0.18193,-0.06679,0.0319,-0.40647,12.2797,42.5817,25.8405,27.0598,9.7376,-157.1935,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data,no data


In [3]:
writer = pd.ExcelWriter("pyrd_extra_properties.xlsx", engine="xlsxwriter")
df.to_excel(writer)
writer.close()

Time function has completed
****no ChelpG ESP charge analysis found in: pyrd10_conf-1.log
****no ChelpG ESP charge analysis found in: pyrd10_conf-10.log
****no ChelpG ESP charge analysis found in: pyrd10_conf-11.log
****no ChelpG ESP charge analysis found in: pyrd10_conf-12.log
****no ChelpG ESP charge analysis found in: pyrd10_conf-13.log
****no ChelpG ESP charge analysis found in: pyrd10_conf-2.log
****no ChelpG ESP charge analysis found in: pyrd10_conf-3.log
****no ChelpG ESP charge analysis found in: pyrd10_conf-4.log
****no ChelpG ESP charge analysis found in: pyrd10_conf-5.log
****no ChelpG ESP charge analysis found in: pyrd10_conf-6.log
****no ChelpG ESP charge analysis found in: pyrd10_conf-7.log
****no ChelpG ESP charge analysis found in: pyrd10_conf-8.log
****no ChelpG ESP charge analysis found in: pyrd10_conf-9.log
****no ChelpG ESP charge analysis found in: pyrd11_conf-1.log
****no ChelpG ESP charge analysis found in: pyrd11_conf-2.log
****no ChelpG ESP charge analysis foun

Unnamed: 0,log_name,C3,C4,C5,N1,C1,C2,CPU_time_total(hours),Wall_time_total(hours),ChelpG_charge_C1,Hirsh_charge_C1,Hirsh_CM5_charge_C1,Hirsh_atom_dipole_C1
0,pyrd10_conf-1,C10,C5,C6,N7,C8,C9,2.27845,0.145194,no data,no data,no data,no data
1,pyrd10_conf-10,C10,C5,C6,N7,C8,C9,2.82839,0.179556,no data,no data,no data,no data
2,pyrd10_conf-11,C10,C5,C6,N7,C8,C9,1.406,0.090444,no data,no data,no data,no data
3,pyrd10_conf-12,C10,C9,C8,N7,C6,C5,2.31821,0.147722,no data,no data,no data,no data
4,pyrd10_conf-13,C10,C5,C6,N7,C8,C9,2.32036,0.148056,no data,no data,no data,no data
5,pyrd10_conf-2,C10,C5,C6,N7,C8,C9,2.33997,0.149111,no data,no data,no data,no data
6,pyrd10_conf-3,C10,C5,C6,N7,C8,C9,2.75702,0.175694,no data,no data,no data,no data
7,pyrd10_conf-4,C10,C5,C6,N7,C8,C9,2.61684,0.167639,no data,no data,no data,no data
8,pyrd10_conf-5,C10,C5,C6,N7,C8,C9,2.26366,0.144361,no data,no data,no data,no data
9,pyrd10_conf-6,C10,C5,C6,N7,C8,C9,2.27814,0.145306,no data,no data,no data,no data
