In [2]:

import numpy as np

from DataAnalysisClass import *
from clustering_work import *

single = ['aug-cc-pVDZ', 'aug-cc-pVTZ', 'aug-cc-pVQZ', 'aug-cc-pV5Z', 'aug-cc-pV6Z']
single_polarized = ['aug-cc-pCVDZ', 'aug-cc-pCVTZ', 'aug-cc-pCVQZ']
double = ['d-aug-cc-pVDZ', 'd-aug-cc-pVTZ', 'd-aug-cc-pVQZ', 'd-aug-cc-pV5Z', 'd-aug-cc-pV6Z']
double_polarized = ['d-aug-cc-pCVDZ', 'd-aug-cc-pCVTZ', 'd-aug-cc-pCVQZ']
all_basis_sets = single + single_polarized + double + double_polarized


In [3]:

database_path = Path('/home/ahurta92/data/august')
paper_path = Path('response_paper_figures')



In [4]:
import glob

# glob for .mol files in august molecules directory
mols = glob.glob('/home/ahurta92/data/august/molecules/*.mol')
mols = [mol.split('/')[-1].split('.')[0] for mol in mols]
mols.remove('LiH_s')


In [5]:
class MRAComparedBasisDF(pd.DataFrame):
    def __init__(self, polar_data, index, values: list, PercentError: bool, *args, **kwargs):
        # Use the special_parameter to modify the DataFrame or perform additional initialization
        basis_data = polar_data.query('basis!="MRA"').copy()
        basis_data = basis_data.set_index(index)

        for value in values:
            basis_data[f'{value}MRA'] = polar_data.query('basis=="MRA"').set_index(index)[
                value]
            if PercentError:
                basis_data[f'{value}E'] = ((basis_data[value] - basis_data[f'{value}MRA']) / basis_data[f'{value}MRA'] * 100)
            else:
                basis_data[f'{value}E'] = (basis_data[value] - basis_data[f'{value}MRA'])
        basis_data = basis_data.reset_index()
        # create a column of percent error in alpha
        basis_data = make_detailed_df(basis_data)
        super().__init__(basis_data, *args, **kwargs)




In [6]:
august_database = PolarizabilityData(mols, 'hf', 'dipole', all_basis_sets, database_path, overwrite=False)


In [7]:
august_database.save_dfs()

In [8]:
basis_data = MRAComparedBasisDF(august_database.iso_data.copy(), ['molecule', 'omega'], ['alpha', 'gamma'], True)
basis_data

In [36]:
mra_data=august_database.iso_data.query('basis=="MRA"').copy()

In [9]:
import seaborn as sns


In [10]:
sharey=False,

subset_mols=['NaH','H2O','HF','CH3SH','Ne','Ar']
vlevel=['D','T','Q']


pdata=basis_data.query('molecule.isin(@mols) and valence.isin(@vlevel)')
g = sns.FacetGrid(data=pdata, col='Type',col_wrap=2,  sharey=sharey, sharex=True, height=3, aspect=1.5)
g.map_dataframe(sns.stripplot,x='valence',y='alphaE', hue='mol_system', size=5, alpha=.75, palette='colorblind',dodge=True,
      legend=True)



In [78]:
sns.set_style('dark')
subset_mols=['SF2']
vlevel = ['T','Q','5']
plot_data = basis_data.query('valence.isin(@vlevel) and molecule.isin(@subset_mols)' ).copy()
# remove unused categories from valence
plot_data.valence = plot_data.valence.cat.remove_unused_categories()
mra_plot_data = mra_data.query('molecule.isin(@subset_mols)').copy()

# create a figure with ax
fig, ax = plt.subplots(1, 1, figsize=(8, 6))

#sns.lineplot(data=mra_plot_data, x='omega', y='alpha',  markers=True, ax=ax, palette=['k'],size=30, legend=False)
# make a plotlib lineplot
import matplotlib.pyplot as plt
plt.plot(mra_plot_data.omega, mra_plot_data.alpha, marker='*', color='k', label='MRA', markersize=30)


# create a lineplot
pal=sns.cubehelix_palette(start=.5, rot=-.75)
pal='seismic'

sns.lineplot(data=plot_data, x='omega', y='alpha', style='basis',hue='basis', markers=True, ax=ax,palette=pal,legend='brief')
# rotate xlabels
# set the title
ax.set_title('{} '.format(subset_mols[0]))
# set the ylabel
ax.set_ylabel('Polarizability')
# set the xlabel
ax.set_xlabel('Frequency')
# move the legend to the right off the plot
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5))

# add mra line




In [128]:
sns.set_style('dark')
subset_mols=['SF2']
vlevel = ['D','T','Q','5']
basis_labels=['s-D','s-T','s-Q','s-5','s-CD','s-CT','s-CQ','d-D','d-T','d-Q','d-CD','d-CT','d-CQ','d-5']
omegas=[0,2,4,6,8]
plot_data = basis_data.query('valence.isin(@vlevel) and molecule.isin(@subset_mols) and omega.isin(@omegas)' ).copy()
# remove unused categories from valence
plot_data.valence = plot_data.valence.cat.remove_unused_categories()
plot_data.omega=plot_data.omega.astype('category')
plot_data.omega = plot_data.omega.cat.remove_unused_categories()
mra_plot_data = mra_data.query('molecule.isin(@subset_mols) and omega.isin(@omegas)').copy()

# create a figure with ax
fig, ax = plt.subplots(1, 1, figsize=(11, 6))

#sns.lineplot(data=mra_plot_data, x='omega', y='alpha',  markers=True, ax=ax, palette=['k'],size=30, legend=False)
# make a plotlib lineplot
import matplotlib.pyplot as plt
pal='magma'

# create a plot of basis vs alpha
sns.lineplot(data=plot_data, x='basis', y='alpha', markers=True, ax=ax,legend='brief',hue='omega',palette=pal,linewidth=1)
sns.scatterplot(data=plot_data, x='basis', y='alpha', ax=ax, hue='omega',palette=pal,legend=False)
# rotate xlabels
plt.xticks(rotation=45)

# move the legend to the right off the plot
ax.legend(loc='center left', bbox_to_anchor=(1, 0.5), title='Frequency')

# replace the xticks with basis labels
ax.set_xticklabels(basis_labels)

# set the title
ax.set_title('{} '.format(subset_mols[0]))
# set the ylabel
ax.set_ylabel('Polarizability')
# set the xlabel
ax.set_xlabel('')


mra_colors=sns.color_palette(pal, n_colors=len(omegas))

for i,om in enumerate(omegas):
    mra_om=mra_plot_data.query('omega==@om')
    plt.axhline(mra_om.alpha.iloc[0], color=mra_colors[i], linestyle='--', label='MRA')

# make background face lighter 
#ax.set_facecolor('grey')

# change the alpha of the background face
ax.patch.set_alpha(0.3)




In [240]:

sns.set_context('talk')
sns.set_style('dark')
subset_mols=['SF2']
vlevel = ['D','T','Q','5']
basis_labels=['s-D','s-T','s-Q','s-5','s-CD','s-CT','s-CQ','d-D','d-T','d-Q','d-CD','d-CT','d-CQ','d-5']
omegas=[0,2,4,6,8]
plot_data = basis_data.query('valence.isin(@vlevel) and molecule.isin(@subset_mols) and omega.isin(@omegas)' ).copy()
# remove unused categories from valence
plot_data.valence = plot_data.valence.cat.remove_unused_categories()
plot_data.omega=plot_data.omega.astype('category')
plot_data.omega = plot_data.omega.cat.remove_unused_categories()
mra_plot_data = mra_data.query('molecule.isin(@subset_mols) and omega.isin(@omegas)').copy()

pal='magma'
# make a facet grid version with seaborn 
g = sns.FacetGrid(data=plot_data, col='Type', sharey=True, sharex=False, height=6,aspect=.5, despine=False)
g.map_dataframe(sns.lineplot, x='valence', y='alpha', hue='omega',  alpha=.5, palette=pal,lw=1,
                legend=True)
g.map_dataframe(sns.scatterplot, x='valence', y='alpha', hue='omega',  alpha=.95, palette=pal,s=100 )
# set the titles
g.set_titles('{col_name}')
# set the ylabels
g.set_ylabels('Polarizability')
# set the xlabels
g.set_xlabels('')




# for each axis in the facet grid add MRA lines 
mra_colors=sns.color_palette(pal, n_colors=len(omegas))
for ax in g.axes.flat:
    for i,om in enumerate(omegas):
        mra_om=mra_plot_data.query('omega==@om')
        ax.axhline(mra_om.alpha.iloc[0], color=mra_colors[i], linestyle='--', label='MRA',alpha=.7)
    ax.patch.set_alpha(0.3)

g.add_legend(title='Frequency', loc='center left', bbox_to_anchor=(1, 0.5))

g.figure.tight_layout()







In [223]:
paper_path=Path('response_paper_figures')



In [227]:
g.savefig(paper_path / 'toc_figure1.png',transparent=False,dpi=1000)

In [239]:

g.savefig(paper_path / 'toc_figure1.svg',transparent=False)
