In [1]:
__author__="Dominic A. Sirianni"
__credits__=["Asim Alenaizan", "Trent M. Parker", "Daniel L. Cheney", "C. David Sherrill"]

__copyright__="(c) 2018, The Sherrill Group"
__license__="BSD-3-Clause"
__date__="2018-01-23"

In [2]:
# ==> Import Relevant Packages & Read Pickled Data <==
import numpy as np
import pandas as pd
idx = pd.IndexSlice
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
import matplotlib.lines as mlines
import matplotlib.markers as markers
import matplotlib.axes as axes

# Make plots appear inline
%matplotlib inline

# Read pickles, print row/column plans
## A21 LRMSD & dCOM
mt = pd.read_pickle('pickles/A21_LRMSD-dCOM.pkl')
mt.sort_index(axis=0, level=0).sort_index(axis=1, level=0, inplace=True)

## A21 LRMSD & dCOM Signed Error
a21_serr = pd.read_pickle('pickles/A21_LRMSD-dCOM-err.pkl')
a21_serr.sort_index(axis=0, level=0).sort_index(axis=1, level=0, inplace=True)

## HBC6 & NBC7x IE Scans
scans = pd.read_pickle('pickles/HBC6-NBC7x_pes-scans.pkl')
scans.sort_index(axis=0, level=0).sort_index(axis=1, level=0, inplace=True)

## HBC6 & NBC7x Interpolated Minima
pes = pd.read_pickle('pickles/HBC6-NBC7x_pes-minima.pkl')
pes.sort_index(axis=0, level=0).sort_index(axis=1, level=0, inplace=True)

## HBC6 & NBC7x Signed Error in Interpolated Minima
pes_err = pd.read_pickle('pickles/HBC6-NBC7x_pes-minima-err.pkl')
pes_err.sort_index(axis=0, level=0).sort_index(axis=1, level=0, inplace=True)


In [28]:
# ==> Define stat functions <==
# Error functions
def serr(vals, refs):
    return vals - refs

def aerr(vals, refs):
    return abs(vals - refs)

# Averages
def mean(vals, dropouts=False):
    if dropouts:
        q1 = vals.quantile(0.25)
        q3 = vals.quantile(0.75)
        iqr = q3 - q1
        low = q1 - 1.5*iqr
        high = q3 + 1.5*iqr
        vals = vals.loc[(vals > low) & (vals < high)]
    return sum(vals) / len(vals)

def mae(vals, refs):
    return mean(abs(vals - refs))

def mse(vals, refs):
    return mean(vals - refs)

def mape(vals, refs):
    return mean(abs((vals - refs) / refs)) * 100

def mspe(vals, refs):
    return mean((vals - refs) / refs) * 100

# RMS
def rms(vals):
    return ((sum(vals ** 2) / len(vals)) ** 0.5)

def rmse(vals, refs):
    return rms(vals - refs)

# Deviation
def mad(vals):
    u = mean(vals)
    return (sum(abs(vals - u)) / len(vals))

def msd(vals):
    u = mean(vals)
    return (sum(vals - u) / len(vals))

# Max & Min
def minse(vals, refs):
    return min(vals - refs)

def maxse(vals, refs):
    return max(vals - refs)

def maxspe(vals, refs):
    return max((vals - refs) / refs) * 100

def minspe(vals, refs):
    return min((vals - refs) / refs) * 100

def minae(vals, refs):
    return min(abs(vals - refs))

def maxae(vals, refs):
    return max(abs(vals - refs))

def maxape(vals, refs):
    return max(abs((vals - refs) / refs)) * 100

def minape(vals, refs):
    return min(abs((vals - refs) / refs)) * 100

# Distribution stats
def iqr(series):
    return abs(series.quantile(0.75) - series.quantile(0.25))

def Range(series):
    return abs(series.quantile(1) - series.quantile(0))

# ==> Define subsets <==
# Basis set levels
dtz = ['DZ','jaDZ','haDZ','aDZ','TZ','maTZ','jaTZ','haTZ','aTZ']
dz = ['DZ','jaDZ','haDZ','aDZ']
tz = ['TZ','maTZ','jaTZ','haTZ','aTZ']
aXZ = ['aDZ', 'aTZ']
haXZ = ['haDZ', 'haTZ']
jaXZ = ['jaDZ', 'jaTZ']
XZ = ['DZ', 'TZ']
stdbas = XZ + aXZ
# A21 summary stats & system subsets
a21_lrmsd = ['avgLRMSD', 'maxLRMSD', 'minLRMSD', 'rmsRMSD']
a21_dcom = ['mae','maxae','minae','mape','maxape','minape','mse','maxse','minse','mspe','maxspe','minspe','rmse']
a24 = ['A24-%s' % i for i in range(1,22)]
a24_hb = a24[:5]
a24_mx = a24[5:13] + [a24[15]]
a24_dd = a24[13:15] + a24[16:]
# PES summary stats
hbc6 = ['fannfann','faonfann','faonfaon','faoofann','faoofaon','faoofaoo']
nbc7 = ['BzBz-S','BzBz-T','BzH2S','BzMe','MeMe','PyPy-S2','PyPy-T3']

In [38]:
pes_err.loc[idx['hbc6',:],idx['df-b3lyp-d3',:,'CP']]

Unnamed: 0_level_0,Unnamed: 1_level_0,df-b3lyp-d3,df-b3lyp-d3,df-b3lyp-d3,df-b3lyp-d3
Unnamed: 0_level_1,Unnamed: 1_level_1,DZ,TZ,aDZ,aTZ
Unnamed: 0_level_2,Unnamed: 1_level_2,CP,CP,CP,CP
hbc6,fannfann,-0.0129818,-0.0127987,-0.0157632,-0.0130179
hbc6,faonfann,0.00700886,-0.00338142,-0.00228405,-0.00372916
hbc6,faonfaon,0.0213273,0.00205474,0.0064786,0.00148573
hbc6,faoofann,,,,
hbc6,faoofaon,0.02044,0.00176761,0.012984,0.00238898
hbc6,faoofaoo,0.033378,-0.00356446,0.031051,0.000999728
hbc6,mae,0.0190272,0.00471338,0.0137122,0.0043243
hbc6,mape,,,,
hbc6,maxae,0.033378,0.0127987,0.031051,0.0130179
hbc6,maxape,0.935285,0.313165,0.87008,0.318528


## Manuscript Figures
### 3.1 Optimization of A21 Complexes
---

In [17]:
mt.loc[idx['all','rmsRMSD'], idx['df-b3lyp-d3':,:,'LRMSD']] = rms(mt.loc[idx['a24',:], idx[:,:,'LRMSD']].values)

In [35]:
print(a21_serr.loc[idx['a24',:], idx['df-m05-2x',:,'dCOM']].to_latex())
print(a21_serr.loc[idx['all',a21_dcom], idx['df-m05-2x',:,'dCOM']].to_latex())

\begin{tabular}{llllll}
\toprule
    &       & \multicolumn{4}{l}{df-m05-2x} \\
    &       &           DZ &           TZ &          aDZ &         aTZ \\
    &       &         dCOM &         dCOM &         dCOM &        dCOM \\
\midrule
a24 & A24-1 &  0.000832211 &    0.0131587 &  -0.00506048 & -0.00262976 \\
    & A24-10 &    -0.229429 &    -0.143693 &   -0.0808946 &  -0.0588027 \\
    & A24-11 &    -0.107156 &   -0.0436436 &   -0.0118335 &  0.00470912 \\
    & A24-12 &  -0.00131608 &  0.000245015 &  -0.00166849 &  -0.0037409 \\
    & A24-13 &    0.0750735 &  8.64191e-05 &   -0.0280445 &  -0.0172316 \\
    & A24-14 &    0.0265656 &    0.0388938 &  -0.00292802 &   0.0357613 \\
    & A24-15 &    0.0728047 &     0.070262 &   -0.0331581 &    0.079043 \\
    & A24-16 &    -0.111789 &    -0.110745 &    -0.104481 &   -0.116544 \\
    & A24-17 &    0.0483725 &    0.0473397 &   -0.0450176 &   0.0313989 \\
    & A24-18 &   -0.0032186 &    0.0234783 &   -0.0651501 & -0.00405631 \\
    & A24-19 &

In [None]:
# ==> Fig. 2 <==
data = stdbas
x = [x * 3 for x in range(1, len(data)+1)]
titles = {'df-b3lyp-d3': 'DF-B3LYP-D3', 'df-wpbe-d3': 'DF-$\omega$PBE-D3', 'df-b97-d3': 'DF-B97-D3',
          'df-wb97x-d': 'DF-$\omega$B97X-D', 'df-m05-2x': 'DF-M05-2X'}

boxcolors = ['pink', 'lightblue']
dcom_patch = mpatches.Patch(color='pink', label='A21 $\Delta$COM Signed Error')
lrmsd_patch = mpatches.Patch(color='lightblue', label='A21 LRMSD')

# Boxplot & legend options
meanprops = {'marker': 's', 'markeredgecolor': 'k', 'markerfacecolor': 'k', 'label': 'blargh', #'markersize': 5
            }
boxprops = {'linewidth': 1.5}
medianprops = dict(linestyle='-', linewidth=1.5, color='k')
medianprops_dcom = dict(linestyle='-', linewidth=1.5, color='cyan')
medianprops_lrmsd = dict(linestyle='-', linewidth=1.5, color='m')
whiskerprops = dict(linestyle='-', linewidth=1.5, color='k')
k_square = mlines.Line2D([], [], color='k', marker='s', markersize=7, label='A21 Mean dCOM', linewidth=0)

plt.rcParams['figure.figsize'] = [10,15]
f, axarr = plt.subplots(3, 1, figsize=(10,15), sharex=True)
k=0

for m in mt.columns.levels[0][1:]:
    # Plot
    for i in range(len(data)):
        bp = axarr[k].boxplot(a21_serr.loc[idx['a24'], idx[m,data[i],'dCOM']].values, positions=[x[i]-1],
                        whis='range', 
                        showmeans=True, 
                        meanprops=meanprops, 
                        medianprops=medianprops,
                        showfliers=False, 
                        widths=0.5, 
                        patch_artist=True)
        bx = axarr[k].boxplot(mt.loc[idx['a24'], idx[m,data[i],'LRMSD']].values, positions=[x[i]], 
                        whis='range', 
                        showmeans=True, 
                        meanprops=meanprops, 
                        medianprops=medianprops,
                        widths=0.5, 
                        patch_artist=True)
        for patch in bp['boxes']:
            patch.set_facecolor(boxcolors[0])
        for patch in bx['boxes']:
            patch.set_facecolor(boxcolors[1])
    # Plot Options
    plt.xticks([i-0.5 for i in x], data)
    axarr[k].set_xlim(x[0] - 1.5, x[-1] + 0.5)
    axarr[k].set_ylim(-0.3, 0.3)
    axarr[k].set_ylabel('A21 $\Delta$COM Signed Error & LRMSD ($\AA$)')
    axarr[k].hlines(0, 0, x[-1] + 1, linestyle='--', linewidth=1, zorder=1)
    plt.legend(handles=[dcom_patch, lrmsd_patch], loc='lower right', fontsize='x-large')
    axarr[k].fill_between(np.arange(x[0]-4,x[-1]+2), -0.1, 0.1, facecolor='grey', alpha=0.1)
    axarr[k].fill_between(np.arange(x[0]-4,x[-1]+2), -0.05, 0.05, facecolor='grey', alpha=0.2)
    axarr[k].fill_between(np.arange(x[0]-4,x[-1]+2), -0.01, 0.01, facecolor='grey', alpha=0.3)
    k += 1

In [None]:
# ==> Fig. 3a <==
plt.rcParams["figure.figsize"] = [10,5]
x = [x * 3 for x in range(1, len(mt.columns.levels[0][1:])+1)]
ticklabels = {'df-b3lyp-d3': 'DF-B3LYP-D3', 'df-b97-d3': 'DF-B97-D3',
          'df-m05-2x': 'DF-M05-2X'}
titles = {'DZ': 'cc-pVDZ', 'TZ': 'cc-pVTZ', 'aDZ': 'aug-cc-pVDZ', 'aTZ': 'aug-cc-pVTZ'}
# Boxplot & legend options
meanprops = {'marker': 's', 'markeredgecolor': 'k', 'markerfacecolor': 'k', 'label': 'blargh', #'markersize': 5
            }
k_square = mlines.Line2D([], [], color='k', marker='s', markersize=7, label='A21 Mean LRMSD', linewidth=0)

for b in stdbas:
    plt.figure(figsize=(10,5))
    boxes = mt.loc[idx['a24'], idx[list(ticklabels.keys())[:],b,'LRMSD']].plot.box(whis='range', showmeans=True, positions=x, meanprops=meanprops)
    i = 0
    hb_dots = mx_dots = dd_dots = []
    for m in mt.columns.levels[0][1:]:
        hb = plt.scatter([x[i] - 1.5]*len(a24_hb), mt.loc[idx['a24', a24_hb], idx[m,b,'LRMSD']].values, facecolors='none', edgecolors='r', label='HB Subset Members')
        mx = plt.scatter([x[i] - 1]*len(a24_mx), mt.loc[idx['a24', a24_mx], idx[m,b,'LRMSD']].values, facecolors='none', edgecolors='g', label='MX Subset Members')
        dd = plt.scatter([x[i] - 0.5]*len(a24_dd), mt.loc[idx['a24', a24_dd], idx[m,b,'LRMSD']].values, facecolors='none', edgecolors='b', label='DD Subset Members')
        i+=1
    plt.xlim(0.5, x[-1] + 0.5)
    plt.ylim(0, 0.55) if b == 'aTZ' else None
    plt.xticks([i-0.75 for i in x], [ticklabels[i] for i in mt.columns.levels[0][1:-2]])
    plt.ylabel('LRMSD of Optimized Geometry ($\AA$)')
    plt.title(titles[b])
    plt.legend(handles=[hb, mx, dd, k_square])
    ax = plt.gca()
    ax.fill_between(np.arange(x[0]-4,x[-1]+2), 0, 0.1, facecolor='grey', alpha=0.1)
    ax.fill_between(np.arange(x[0]-4,x[-1]+2), 0, 0.05, facecolor='grey', alpha=0.2)
    ax.fill_between(np.arange(x[0]-4,x[-1]+2), 0, 0.01, facecolor='grey', alpha=0.3)

In [None]:
# ==> Fig. 3b <==

plt.rcParams["figure.figsize"] = [10,5]
x = [x * 3 for x in range(1, len(stdbas))]
ticklabels = {'df-b3lyp-d3': 'DF-B3LYP-D3', 'df-b97-d3': 'DF-B97-D3',
          'df-m05-2x': 'DF-M05-2X'}
titles = {'DZ': 'cc-pVDZ', 'TZ': 'cc-pVTZ', 'aDZ': 'aug-cc-pVDZ', 'aTZ': 'aug-cc-pVTZ'}
# Boxplot & legend options
meanprops = {'marker': 's', 'markeredgecolor': 'k', 'markerfacecolor': 'k', 'label': 'blargh', #'markersize': 5
            }
k_square = mlines.Line2D([], [], color='k', marker='s', markersize=7, label='A21 Mean LRMSD', linewidth=0)

for b in mt.columns.levels[1][1:]:
    # Plot
    plt.rcParams["figure.figsize"] = [10,5]
    fig = plt.figure(figsize=(10,5))
    bp = a21_serr.loc[idx['a24'], idx[:,b,'dCOM']].plot.box(whis='range', showmeans=True, positions=x, 
                                                            meanprops=meanprops)
    i = 0
    for m in mt.columns.levels[0][1:]:
        hb = plt.scatter([x[i] - 1.5]*len(a24_hb), a21_serr.loc[idx['a24', a24_hb], idx[m,b,'dCOM']],
                         facecolors='none', edgecolors='r', label='HB Subset Members')
        mx = plt.scatter([x[i] - 1]*len(a24_mx), a21_serr.loc[idx['a24', a24_mx], idx[m,b,'dCOM']], 
                         facecolors='none', edgecolors='g', label='MX Subset Members')
        dd = plt.scatter([x[i] - 0.5]*len(a24_dd), a21_serr.loc[idx['a24', a24_dd], idx[m,b,'dCOM']], 
                         facecolors='none', edgecolors='b', label='DD Subset Members')
        i+=1
        
    # Plot Options
    plt.xlim(0.5, x[-1] + 0.5)
    plt.ylim(-0.15, 0.3) if b == 'aTZ' else None
    plt.xticks([i-0.75 for i in x], [ticklabels[i] for i in mt.columns.levels[0][1:]])
    plt.ylabel('A21 dCOM Signed Error ($\AA$)')
    plt.title(titles[b])
    plt.legend(handles=[hb, mx, dd, k_square])
    ax = plt.gca()
    ax.fill_between(np.arange(x[0]-4,x[-1]+2), -0.1, 0.1, facecolor='grey', alpha=0.1)
    ax.fill_between(np.arange(x[0]-4,x[-1]+2), -0.05, 0.05, facecolor='grey', alpha=0.2)
    ax.fill_between(np.arange(x[0]-4,x[-1]+2), -0.01, 0.01, facecolor='grey', alpha=0.3)

### 3.2 Prediction of Optimal Intermolecular Separation in NBC7x and HBC6 Interaction Energy Scans
----

In [None]:
# ==> Fig. 4 <==
plt.rcParams['figure.figsize'] = [8,6]
mtdlabel = {'df-b3lyp-d3': 'DF-B3LYP-D3', 'df-b97-d3': 'DF-B97-D3', 'df-m05-2x': 'DF-M05-2X', 'REF': 'CCSD(T)/CBS'}
dbse_label = {'hbc6': 'HBC6', 'nbc10ext': 'NBC7x'}
system_id = {'faoofaoo': '1', 'faonfaon': '2', 'fannfann': '3', 
             'faoofaon': '4', 'faonfann': '5', 'faoofann': '6',
             'BzBz-S': '1', 'BzBz-T': '2', 'BzH2S': '4', 'BzMe': '5', 
             'MeMe': '6', 'PyPy-S2': '7', 'PyPy-T3': '8'
            }
bas_label = {'DZ': 'cc-pVDZ', 'aDZ': 'aug-cc-pVDZ', 'TZ': 'cc-pVTZ', 'aTZ': 'aug-cc-pVTZ'}

colors = ['r','b','g']
markers = ['s','>','*']

d = 'hbc6'
b = 'DZ'
cp = 'unCP'
s = 'fannfann'
j = 0

minie = maxie = []
for m in scans.columns.levels[0][:-3]:
    curve = scans.loc[idx[d,s], idx[m,b,cp]]
    plt.plot(curve[0], curve[1], color=colors[j], marker=markers[j], label=mtdlabel[m])
    pesmin = pes.loc[idx[d,s], idx[m,b,cp]]
    plt.vlines(pesmin, -30, 0, linestyle='--', color=colors[j], linewidth=2)
    minie.append(curve.min())
    maxie.append(curve.max())
    j+=1
    
# Plot reference curve
ref = scans.loc[idx[d,s], idx['REF']].values[0]
minie.append(ref.min())
maxie.append(ref.max())
plt.plot(ref[0],ref[1],'ko-', label=mtdlabel['REF'])
refmin = pes.loc[idx[d,s], idx['REF']].values[0]
plt.vlines(refmin, -30, 0, linestyle='--', color='k', linewidth=2)

# Plot Options
plt.title('%s-%s: %s Curves with the %s basis' % (dbse_label[d], system_id[s], cp,
                                                  bas_label[b]))
plt.hlines(0,0,12,linestyle='-',linewidth=1)
plt.xlim(ref[0,0]-0.05, refmin+1)
plt.ylim(min(minie) - 1, max(maxie)+1)
plt.minorticks_on()
ax = plt.gca()
ax.tick_params(axis='y',which='minor',bottom='off')
plt.xlabel('Intermolecular Separation, $R$ ($\AA$)',fontsize='xx-large')
plt.ylabel('Interaction Energy (kcal/mol)',fontsize='xx-large')
plt.legend(loc='best',fontsize='x-large')
plt.savefig('figs/hbc6-3_DZ-unCP_curves.pdf')

In [None]:
# ==> Fig. 5 <==

m = mt.columns.levels[0][1:]
x = [x * 3 for x in range(1, len(stdbas)+1)]
titles = {'df-b3lyp-d3': 'DF-B3LYP-D3', 'df-wpbe-d3': 'DF-$\omega$PBE-D3', 'df-b97-d3': 'DF-B97-D3',
          'df-wb97x-d': 'DF-$\omega$B97X-D', 'df-m05-2x': 'DF-M05-2X'}

boxcolors = ['xkcd:light red','lightblue','lightgreen']
b3lyp_patch = mpatches.Patch(color='xkcd:light red', label='DF-B3LYP-D3')
b97_patch = mpatches.Patch(color='lightblue', label='DF-B97-D3')
m05_patch = mpatches.Patch(color='lightgreen', label='DF-M05-2X')

# Boxplot & legend options
meanprops = {'marker': 's', 'markeredgecolor': 'k', 'markerfacecolor': 'k', 'label': 'blargh', #'markersize': 5
            }
boxprops = {'linewidth': 1.5}
medianprops = dict(linestyle='-', linewidth=1.5, color='k')
medianprops_dcom = dict(linestyle='-', linewidth=1.5, color='cyan')
medianprops_lrmsd = dict(linestyle='-', linewidth=1.5, color='m')
whiskerprops = dict(linestyle='-', linewidth=1.5, color='k')
k_square = mlines.Line2D([], [], color='k', marker='s', markersize=7, linewidth=0)

plt.rcParams['figure.figsize'] = [20,10]
f, axarr = plt.subplots(2, 2, figsize=(20,10), sharex=True)

# ==> Plot <==
# Plot NBC7x/CP on axarr[0,0]
for i in range(len(stdbas)):
    b1 = axarr[0,0].boxplot(pes_err.loc[idx['nbc10ext', nbc7], idx[m[0],stdbas[i],'CP']].values, 
                    positions=[x[i]-0.65],
                    whis='range', 
                    showmeans=True, 
                    meanprops=meanprops, 
                    medianprops=medianprops,
                    showfliers=False, 
                    widths=0.5, 
                    patch_artist=True)
    b2 = axarr[0,0].boxplot(pes_err.loc[idx['nbc10ext', nbc7], idx[m[1],stdbas[i],'CP']].values, 
                    positions=[x[i]], 
                    whis='range', 
                    showmeans=True, 
                    meanprops=meanprops, 
                    medianprops=medianprops,
                    widths=0.5, 
                    patch_artist=True)
    b3 = axarr[0,0].boxplot(pes_err.loc[idx['nbc10ext', nbc7], idx[m[2],stdbas[i],'CP']].values, 
                    positions=[x[i]+.65], 
                    whis='range', 
                    showmeans=True, 
                    meanprops=meanprops, 
                    medianprops=medianprops,
                    widths=0.5, 
                    patch_artist=True)
    for patch in b1['boxes']:
        patch.set_facecolor(boxcolors[0])
    for patch in b2['boxes']:
        patch.set_facecolor(boxcolors[1])
    for patch in b3['boxes']:
        patch.set_facecolor(boxcolors[2])
# Plot NBC7x/unCP on axarr[0,1]
for i in range(len(stdbas)):
    b1 = axarr[0,1].boxplot(pes_err.loc[idx['nbc10ext', nbc7], idx[m[0],stdbas[i],'unCP']].values, 
                    positions=[x[i]-0.65],
                    whis='range', 
                    showmeans=True, 
                    meanprops=meanprops, 
                    medianprops=medianprops,
                    showfliers=False, 
                    widths=0.5, 
                    patch_artist=True)
    b2 = axarr[0,1].boxplot(pes_err.loc[idx['nbc10ext', nbc7], idx[m[1],stdbas[i],'unCP']].values, 
                    positions=[x[i]], 
                    whis='range', 
                    showmeans=True, 
                    meanprops=meanprops, 
                    medianprops=medianprops,
                    widths=0.5, 
                    patch_artist=True)
    b3 = axarr[0,1].boxplot(pes_err.loc[idx['nbc10ext', nbc7], idx[m[2],stdbas[i],'unCP']].values, 
                    positions=[x[i]+.65], 
                    whis='range', 
                    showmeans=True, 
                    meanprops=meanprops, 
                    medianprops=medianprops,
                    widths=0.5, 
                    patch_artist=True)
    for patch in b1['boxes']:
        patch.set_facecolor(boxcolors[0])
    for patch in b2['boxes']:
        patch.set_facecolor(boxcolors[1])
    for patch in b3['boxes']:
        patch.set_facecolor(boxcolors[2])
# Plot HBC6/CP on axarr[1,0]
for i in range(len(stdbas)):
    b1 = axarr[1,0].boxplot(pes_err.loc[idx['hbc6', hbc6], idx[m[0],stdbas[i],'CP']].dropna().values, 
                    positions=[x[i]-0.65],
                    whis='range', 
                    showmeans=True, 
                    meanprops=meanprops, 
                    medianprops=medianprops,
                    showfliers=False, 
                    widths=0.5, 
                    patch_artist=True)
    b2 = axarr[1,0].boxplot(pes_err.loc[idx['hbc6', hbc6], idx[m[1],stdbas[i],'CP']].dropna().values, 
                    positions=[x[i]], 
                    whis='range', 
                    showmeans=True, 
                    meanprops=meanprops, 
                    medianprops=medianprops,
                    widths=0.5, 
                    patch_artist=True)
    b3 = axarr[1,0].boxplot(pes_err.loc[idx['hbc6', hbc6], idx[m[2],stdbas[i],'CP']].dropna().values, 
                    positions=[x[i]+.65], 
                    whis='range', 
                    showmeans=True, 
                    meanprops=meanprops, 
                    medianprops=medianprops,
                    widths=0.5, 
                    patch_artist=True)
    for patch in b1['boxes']:
        patch.set_facecolor(boxcolors[0])
    for patch in b2['boxes']:
        patch.set_facecolor(boxcolors[1])
    for patch in b3['boxes']:
        patch.set_facecolor(boxcolors[2])
# Plot NBC7x/unCP on axarr[1,1]
for i in range(len(stdbas)):
    b1 = axarr[1,1].boxplot(pes_err.loc[idx['hbc6', hbc6], idx[m[0],stdbas[i],'unCP']].dropna().values, 
                    positions=[x[i]-0.65],
                    whis='range', 
                    showmeans=True, 
                    meanprops=meanprops, 
                    medianprops=medianprops,
                    showfliers=False, 
                    widths=0.5, 
                    patch_artist=True)
    b2 = axarr[1,1].boxplot(pes_err.loc[idx['hbc6', hbc6], idx[m[1],stdbas[i],'unCP']].dropna().values, 
                    positions=[x[i]], 
                    whis='range', 
                    showmeans=True, 
                    meanprops=meanprops, 
                    medianprops=medianprops,
                    widths=0.5, 
                    patch_artist=True)
    b3 = axarr[1,1].boxplot(pes_err.loc[idx['hbc6', hbc6], idx[m[2],stdbas[i],'unCP']].dropna().values, 
                    positions=[x[i]+.65], 
                    whis='range', 
                    showmeans=True, 
                    meanprops=meanprops, 
                    medianprops=medianprops,
                    widths=0.5, 
                    patch_artist=True)
    for patch in b1['boxes']:
        patch.set_facecolor(boxcolors[0])
    for patch in b2['boxes']:
        patch.set_facecolor(boxcolors[1])
    for patch in b3['boxes']:
        patch.set_facecolor(boxcolors[2])
        

# ==> Plot Options <==
plt.xticks([i for i in x], stdbas)
axarr[0,0].set_xlim(x[0] - 1.5, x[-1] + 1.5)

# NBC7x: CP
axarr[0,0].set_title('(a) NBC7x: CP Curves')
axarr[0,0].set_ylabel('$\Delta$COM Signed Error ($\AA$)',fontsize='x-large')
axarr[0,0].hlines(0, 0, x[-1] + 5, linestyle='--', linewidth=1, zorder=1)
axarr[0,0].set_ylim(-0.16, 0.25)
axarr[0,0].fill_between(np.arange(x[0]-4,x[-1]+5), -0.1, 0.1, facecolor='grey', alpha=0.1)
axarr[0,0].fill_between(np.arange(x[0]-4,x[-1]+5), -0.05, 0.05, facecolor='grey', alpha=0.2)
axarr[0,0].fill_between(np.arange(x[0]-4,x[-1]+5), -0.01, 0.01, facecolor='grey', alpha=0.3)

# NBC7x: unCP
axarr[0,1].set_title('(b) NBC7x: unCP Curves')
axarr[0,1].set_ylabel('$\Delta$COM Signed Error ($\AA$)',fontsize='x-large')
axarr[0,1].set_ylim(-0.16, 0.25)
axarr[0,1].hlines(0, 0, x[-1] + 5, linestyle='--', linewidth=1, zorder=1)
axarr[0,1].fill_between(np.arange(x[0]-4,x[-1]+5), -0.1, 0.1, facecolor='grey', alpha=0.1)
axarr[0,1].fill_between(np.arange(x[0]-4,x[-1]+5), -0.05, 0.05, facecolor='grey', alpha=0.2)
axarr[0,1].fill_between(np.arange(x[0]-4,x[-1]+5), -0.01, 0.01, facecolor='grey', alpha=0.3)

# HBC6: CP
axarr[1,0].set_title('(c) HBC6: CP Curves')
axarr[1,0].set_ylabel('$\Delta$COM Signed Error ($\AA$)',fontsize='x-large')
axarr[1,0].set_ylim(-0.13, 0.15)
axarr[1,0].hlines(0, 0, x[-1] + 5, linestyle='--', linewidth=1, zorder=1)
axarr[1,0].fill_between(np.arange(x[0]-4,x[-1]+5), -0.1, 0.1, facecolor='grey', alpha=0.1)
axarr[1,0].fill_between(np.arange(x[0]-4,x[-1]+5), -0.05, 0.05, facecolor='grey', alpha=0.2)
axarr[1,0].fill_between(np.arange(x[0]-4,x[-1]+5), -0.01, 0.01, facecolor='grey', alpha=0.3)

# HBC6: unCP
axarr[1,1].set_title('(d) HBC6: unCP Curves')
axarr[1,1].set_ylabel('$\Delta$COM Signed Error ($\AA$)',fontsize='x-large')
axarr[1,1].set_ylim(-0.13, 0.15)
axarr[1,1].hlines(0, 0, x[-1] + 5, linestyle='--', linewidth=1, zorder=1)
axarr[1,1].fill_between(np.arange(x[0]-4,x[-1]+5), -0.1, 0.1, facecolor='grey', alpha=0.1)
axarr[1,1].fill_between(np.arange(x[0]-4,x[-1]+5), -0.05, 0.05, facecolor='grey', alpha=0.2)
axarr[1,1].fill_between(np.arange(x[0]-4,x[-1]+5), -0.01, 0.01, facecolor='grey', alpha=0.3)

plt.legend(handles=[b3lyp_patch, b97_patch, m05_patch], loc='upper center', fontsize='x-large', ncol=3) 

## Supporting Information
---

In [None]:
# ==> Fig. S-2 -- S-4 <==
x = [x * 3 for x in range(1, len(stdbas)+1)]
titles = {'df-b3lyp-d3': 'DF-B3LYP-D3', 'df-wpbe-d3': 'DF-$\omega$PBE-D3', 'df-b97-d3': 'DF-B97-D3',
          'df-wb97x-d': 'DF-$\omega$B97X-D', 'df-m05-2x': 'DF-M05-2X'}

# Boxplot & legend options
meanprops = {'marker': 's', 'markeredgecolor': 'k', 'markerfacecolor': 'k', 'label': 'blargh', #'markersize': 5
            }
#k_square = mpatches.Patch(color='k', label='The red data')
k_square = mlines.Line2D([], [], color='k', marker='s', markersize=7, label='A21 Mean LRMSD', linewidth=0)

for j in range(len(mt.columns.levels[0][1:])):
    m = mt.columns.levels[0][1:][j]
    plt.figure(figsize=(10,5))
    i = 0
    for b in mt.reindex(columns=stdbas, level=1).columns.levels[1]:
        bx = plt.boxplot(mt.loc[idx['a24'], idx[m,b,'LRMSD']], positions=[x[i]], whis='range', showmeans=True, meanprops=meanprops, widths=0.5)
        hb = plt.scatter([x[i] - 1.5]*len(a24_hb), mt.loc[idx['a24', a24_hb], idx[m,b,'LRMSD']].values, facecolors='none', edgecolors='r', label='HB Subset Members')
        mx = plt.scatter([x[i] - 1]*len(a24_mx), mt.loc[idx['a24', a24_mx], idx[m,b,'LRMSD']].values, facecolors='none', edgecolors='g', label='MX Subset Members')
        dd = plt.scatter([x[i] - 0.5]*len(a24_dd), mt.loc[idx['a24', a24_dd], idx[m,b,'LRMSD']].values, facecolors='none', edgecolors='b', label='DD Subset Members')
        i+=1
    plt.xlim(0.5, x[-1] + 0.5)
    plt.xticks([i-0.75 for i in x], stdbas)
    plt.ylabel('LRMSD of Optimized Geometry ($\AA$)')
    plt.title(titles[m])
    plt.legend(handles=[hb, mx, dd, k_square])
    ax = plt.gca()
    ax.fill_between(np.arange(x[0]-4,x[-1]+2), 0, 0.1, facecolor='grey', alpha=0.1)
    ax.fill_between(np.arange(x[0]-4,x[-1]+2), 0, 0.05, facecolor='grey', alpha=0.2)
    ax.fill_between(np.arange(x[0]-4,x[-1]+2), 0, 0.01, facecolor='grey', alpha=0.3)

In [None]:
# ==> Fig. S-5 -- S-7 <==
x = [x * 3 for x in range(1, len(stdbas)+1)]
titles = {'df-b3lyp-d3': 'DF-B3LYP-D3', 'df-wpbe-d3': 'DF-$\omega$PBE-D3', 'df-b97-d3': 'DF-B97-D3',
          'df-wb97x-d': 'DF-$\omega$B97X-D', 'df-m05-2x': 'DF-M05-2X'}


# Boxplot & legend options
meanprops = {'marker': 's', 'markeredgecolor': 'k', 'markerfacecolor': 'k', 'label': 'blargh', #'markersize': 5
            }
boxprops = {'linewidth': 1.5}
medianprops = dict(linestyle='-', linewidth=1.5, color='cyan')
whiskerprops = dict(linestyle='-', linewidth=1.5, color='k')
k_square = mlines.Line2D([], [], color='k', marker='s', markersize=7, label='A21 Mean dCOM', linewidth=0)

for m in mt.columns.levels[0][1:]:
    # Plot
    plt.rcParams["figure.figsize"] = [10,5]
    fig = plt.figure(figsize=(10,5))
    bp = a21_serr.loc[idx['a24'], idx[m,:,'dCOM']].plot.box(whis='range', showmeans=True, positions=x, 
                                                            meanprops=meanprops)
    i = 0
    for b in mt.columns.levels[1][1:]:
        hb = plt.scatter([x[i] - 1.5]*len(a24_hb), a21_serr.loc[idx['a24', a24_hb], idx[m,b,'dCOM']], 
                         facecolors='none', edgecolors='r', label='HB Subset Members')
        mx = plt.scatter([x[i] - 1]*len(a24_mx), a21_serr.loc[idx['a24', a24_mx], idx[m,b,'dCOM']], 
                         facecolors='none', edgecolors='g', label='MX Subset Members')
        dd = plt.scatter([x[i] - 0.5]*len(a24_dd), a21_serr.loc[idx['a24', a24_dd], idx[m,b,'dCOM']], 
                         facecolors='none', edgecolors='b', label='DD Subset Members')
        i+=1
    # Plot options
    plt.xlim(1, x[-1] + 0.5)
    plt.xticks([i-0.75 for i in x], stdbas)
    plt.ylabel('A21 dCOM Signed Error ($\AA$)')
    plt.hlines(0, 0, x[-1] + 1, linestyle='--', linewidth=1, zorder=1)
    plt.title(titles[m])
    plt.legend(handles=[hb, mx, dd, k_square], loc='best', ncol=2)
    ax = plt.gca()
    ax.fill_between(np.arange(x[0]-4,x[-1]+2), -0.1, 0.1, facecolor='grey', alpha=0.1)
    ax.fill_between(np.arange(x[0]-4,x[-1]+2), -0.05, 0.05, facecolor='grey', alpha=0.2)
    ax.fill_between(np.arange(x[0]-4,x[-1]+2), -0.01, 0.01, facecolor='grey', alpha=0.3)

In [None]:
# ==> Fig. S-8 -- S-111 <==
plt.rcParams['figure.figsize'] = [8,6]
mtdlabel = {'df-b3lyp-d3': 'DF-B3LYP-D3', 'df-b97-d3': 'DF-B97-D3', 'df-m05-2x': 'DF-M05-2X', 'REF': 'CCSD(T)/CBS'}
dbse_label = {'hbc6': 'HBC6', 'nbc10ext': 'NBC7x'}
system_id = {'hbc6':{'fannfann': '3', 'faonfann': '5', 'faonfaon': '2', 
                     'faoofann': '6', 'faoofaon': '4', 'faoofaoo': '1',
                    },
             'nbc10ext':{'BzBz-S': '1', 'BzBz-T': '2', 'BzH2S': '4', 'BzMe': '5', 
                         'MeMe': '6', 'PyPy-S2': '7', 'PyPy-T3': '8'
                        }
            }
bas_label = {'DZ': 'cc-pVDZ', 'aDZ': 'aug-cc-pVDZ', 'TZ': 'cc-pVTZ', 'aTZ': 'aug-cc-pVTZ'}

colors = ['r','b','g']
markers = ['s','>','*']


# Plot DFT curves
for d in system_id.keys():
    for s in system_id[d].keys():
        for b in stdbas:
            for cp in ['CP', 'unCP']:
                j = 0
                minie = []
                maxie = []
                plt.rcParams["figure.figsize"] = [10,5]
                fig = plt.figure(figsize=(10,5))
                for m in scans.columns.levels[0][:-3]:
                    curve = scans.loc[idx[d,s], idx[m,b,cp]]
                    plt.plot(curve[0], curve[1], color=colors[j], marker=markers[j], label=mtdlabel[m])
                    pesmin = pes.loc[idx[d,s], idx[m,b,cp]]
                    plt.vlines(pesmin, -30, 0, linestyle='--', color=colors[j], linewidth=2)
                    minie.append(curve.min())
                    maxie.append(curve.max())
                    j+=1
                    
                # Plot reference curve
                ref = scans.loc[idx[d,s], idx['REF']].values[0]
                minie.append(ref.min())
                maxie.append(ref.max())
                plt.plot(ref[0],ref[1],'ko-', label=mtdlabel['REF'])
                refmin = pes.loc[idx[d,s], idx['REF']].values[0]
                plt.vlines(refmin, -30, 0, linestyle='--', color='k', linewidth=2)
                
                # Plot Options
                plt.title('%s-%s: %s Curves with the %s basis' % (dbse_label[d], system_id[d][s], cp,
                                                                  bas_label[b]))
                plt.hlines(0,0,12,linestyle='-',linewidth=1)
                plt.xlim(ref[0,0]-0.05, refmin+1)
                plt.ylim(min(minie) - 1, max(maxie)+1)
                plt.xlabel('Intermolecular Separation, $R$ ($\AA$)',fontsize='xx-large')
                plt.ylabel('Interaction Energy (kcal/mol)',fontsize='xx-large')
                if d == 'hbc6':
                    plt.legend(loc='best',fontsize='x-large',ncol=2)
                elif d == 'nbc10ext':
                    plt.legend(loc='best',fontsize='x-large')
                #plt.savefig('figs/curves/%s-%s-%s-%s_curves.pdf' % (dbse_label[d], system_id[s], b, cp), 
                #            transparent=True, bbox_inches='tight', pad_inches=0.0)

## Additional Figures

In [None]:
# ==> LRMSD Boxplots: Grouped by Basis Set (subplots) <==
x = [x * 3 for x in range(1, len(mt.columns.levels[0][1:])+1)]
xticklabels = {'df-b3lyp-d3': 'DF-B3LYP-D3', 'df-wpbe-d3': 'DF-$\omega$PBE-D3', 'df-b97-d3': 'DF-B97-D3',
          'df-wb97x-d': 'DF-$\omega$B97X-D', 'df-m05-2x': 'DF-M05-2X'}
titles = {'DZ': 'cc-pVDZ', 'TZ': 'cc-pVTZ', 'aDZ': 'aug-cc-pVDZ', 'aTZ': 'aug-cc-pVTZ'}

# Boxplot & legend options
meanprops = {'marker': 's', 'markeredgecolor': 'k', 'markerfacecolor': 'k', 'label': 'blargh', #'markersize': 5
            }
k_square = mlines.Line2D([], [], color='k', marker='s', markersize=7, label='A21 Mean LRMSD', linewidth=0)

plt.rcParams["figure.figsize"] = [10,20]
f, axarr = plt.subplots(4, 1, sharex=True, figsize=(10,20))

j = 0
for b in stdbas:
    i = 0
    hb_dots = mx_dots = dd_dots = []
    for m in mt.columns.levels[0][1:]:
        bx = axarr[j].boxplot(mt.loc[idx['a24'], idx[m,b,'LRMSD']], positions=[x[i]], whis='range', showmeans=True, meanprops=meanprops, widths=0.5)
        hb = axarr[j].scatter([x[i] - 1.5]*len(a24_hb), mt.loc[idx['a24', a24_hb], idx[m,b,'LRMSD']].values, facecolors='none', edgecolors='r', label='HB Subset Members')
        mx = axarr[j].scatter([x[i] - 1]*len(a24_mx), mt.loc[idx['a24', a24_mx], idx[m,b,'LRMSD']].values, facecolors='none', edgecolors='g', label='MX Subset Members')
        dd = axarr[j].scatter([x[i] - 0.5]*len(a24_dd), mt.loc[idx['a24', a24_dd], idx[m,b,'LRMSD']].values, facecolors='none', edgecolors='b', label='DD Subset Members')
        i+=1
    axarr[j].set_xlim(x[0] - 2, x[-1] + 0.5)
    axarr[j].set_ylim(0, 0.6) if b == 'aTZ' else None
    plt.xticks([i-0.75 for i in x], [xticklabels[m] for m in mt.columns.levels[0][1:-2]])#, rotation=45)
    axarr[j].set_ylabel('LRMSD of Optimized Geometry ($\AA$)')
    axarr[j].set_title(titles[b])
    plt.legend(handles=[hb, mx, dd, k_square], loc='best')
    axarr[j].fill_between(np.arange(x[0]-4,x[-1]+2), 0, 0.1, facecolor='grey', alpha=0.1)
    axarr[j].fill_between(np.arange(x[0]-4,x[-1]+2), 0, 0.05, facecolor='grey', alpha=0.2)
    axarr[j].fill_between(np.arange(x[0]-4,x[-1]+2), 0, 0.01, facecolor='grey', alpha=0.3)
    j += 1

In [None]:
# ==> LRMSD Boxplots: Grouped by Method (subplots) <==
x = [x * 3 for x in range(1, len(stdbas)+1)]
titles = {'df-b3lyp-d3': 'DF-B3LYP-D3', 'df-wpbe-d3': 'DF-$\omega$PBE-D3', 'df-b97-d3': 'DF-B97-D3',
          'df-wb97x-d': 'DF-$\omega$B97X-D', 'df-m05-2x': 'DF-M05-2X'}

# Boxplot & legend options
meanprops = {'marker': 's', 'markeredgecolor': 'k', 'markerfacecolor': 'k', 'label': 'blargh', #'markersize': 5
            }
#k_square = mpatches.Patch(color='k', label='The red data')
k_square = mlines.Line2D([], [], color='k', marker='s', markersize=7, label='A21 Mean LRMSD', linewidth=0)

plt.rcParams["figure.figsize"] = [10,15]
f, axarr = plt.subplots(3, 1, sharex=True, figsize=(10,15))

#mt = mt.reindex(columns=dtz, level=1)
for j in range(len(mt.columns.levels[0][1:])):
    i = 0
    m = mt.columns.levels[0][1:][j]
    for b in mt.reindex(columns=stdbas, level=1).columns.levels[1]:
        bx = axarr[j].boxplot(mt.loc[idx['a24'], idx[m,b,'LRMSD']], positions=[x[i]], whis='range', showmeans=True, meanprops=meanprops, widths=0.5)
        hb = axarr[j].scatter([x[i] - 1.5]*len(a24_hb), mt.loc[idx['a24',a24_hb], idx[m,b,'LRMSD']].values, facecolors='none', edgecolors='r', label='HB Subset Members')
        mx = axarr[j].scatter([x[i] - 1]*len(a24_mx), mt.loc[idx['a24',a24_mx], idx[m,b,'LRMSD']].values, facecolors='none', edgecolors='g', label='MX Subset Members')
        dd = axarr[j].scatter([x[i] - 0.5]*len(a24_dd), mt.loc[idx['a24',a24_dd], idx[m,b,'LRMSD']].values, facecolors='none', edgecolors='b', label='DD Subset Members')
        i+=1
    axarr[j].set_xlim(0.5, x[-1] + 0.5)
    axarr[j].set_ylim(0, 0.7) if m == 'df-m05-2x' else None
    plt.xticks([i-0.75 for i in x], stdbas, rotation=45)
    axarr[j].set_ylabel('LRMSD of Optimized Geometry ($\AA$)')
    axarr[j].set_title(titles[m])
    plt.legend(handles=[hb, mx, dd, k_square], loc='best')
    #ax = plt.gca()
    axarr[j].fill_between(np.arange(x[0]-4,x[-1]+2), 0, 0.1, facecolor='grey', alpha=0.1)
    axarr[j].fill_between(np.arange(x[0]-4,x[-1]+2), 0, 0.05, facecolor='grey', alpha=0.2)
    axarr[j].fill_between(np.arange(x[0]-4,x[-1]+2), 0, 0.01, facecolor='grey', alpha=0.3)

In [None]:
# ==> Plotting LRMSD & dCOM SE: Box & Whisker (indiv plots) <==
data = XZ + aXZ
x = [x * 3 for x in range(1, len(data)+1)]
titles = {'df-b3lyp-d3': 'DF-B3LYP-D3', 'df-wpbe-d3': 'DF-$\omega$PBE-D3', 'df-b97-d3': 'DF-B97-D3',
          'df-wb97x-d': 'DF-$\omega$B97X-D', 'df-m05-2x': 'DF-M05-2X'}

boxcolors = ['pink', 'lightblue']
dcom_patch = mpatches.Patch(color='pink', label='A21 dCOM Signed Error')
lrmsd_patch = mpatches.Patch(color='lightblue', label='A21 LRMSD')

# Boxplot & legend options
meanprops = {'marker': 's', 'markeredgecolor': 'k', 'markerfacecolor': 'k', 'label': 'blargh', #'markersize': 5
            }
boxprops = {'linewidth': 1.5}
medianprops = dict(linestyle='-', linewidth=1.5, color='k')
medianprops_dcom = dict(linestyle='-', linewidth=1.5, color='cyan')
medianprops_lrmsd = dict(linestyle='-', linewidth=1.5, color='m')
whiskerprops = dict(linestyle='-', linewidth=1.5, color='k')
k_square = mlines.Line2D([], [], color='k', marker='s', markersize=7, label='A21 Mean dCOM', linewidth=0)

for m in mt.columns.levels[0][1:]:
    # Plot
    plt.rcParams['figure.figsize'] = [10,5]
    fig = plt.figure(figsize=(10,5))
    ax = plt.gca()
    for i in range(len(data)):
        bp = ax.boxplot(a21_serr.loc[idx['a24'], idx[m,data[i],'dCOM']].values, positions=[x[i]-1],
                        whis='range', 
                        showmeans=True, 
                        meanprops=meanprops, 
                        medianprops=medianprops,
                        showfliers=False, 
                        widths=0.5, 
                        patch_artist=True)
        bx = ax.boxplot(mt.loc[idx['a24'], idx[m,data[i],'LRMSD']].values, positions=[x[i]], 
                        whis='range', 
                        showmeans=True, 
                        meanprops=meanprops, 
                        medianprops=medianprops,
                        widths=0.5, 
                        patch_artist=True)
        for patch in bp['boxes']:
            patch.set_facecolor(boxcolors[0])
        for patch in bx['boxes']:
            patch.set_facecolor(boxcolors[1])
    # Plot Options
    plt.xticks([i-0.5 for i in x], data)
    ax.set_xlim(x[0] - 1.5, x[-1] + 0.5)
    ax.set_ylim(-0.4, 0.7) if m == 'df-m05-2x' else None
    ax.set_ylabel('A21 $\Delta$COM Signed Error & LRMSD ($\AA$)')
    plt.hlines(0, 0, x[-1] + 1, linestyle='--', linewidth=1, zorder=1)
    ax.set_title(titles[m])
    plt.legend(handles=[dcom_patch, lrmsd_patch], loc='lower center', ncol=2)
    ax.fill_between(np.arange(x[0]-4,x[-1]+2), -0.1, 0.1, facecolor='grey', alpha=0.1)
    ax.fill_between(np.arange(x[0]-4,x[-1]+2), -0.05, 0.05, facecolor='grey', alpha=0.2)
    ax.fill_between(np.arange(x[0]-4,x[-1]+2), -0.01, 0.01, facecolor='grey', alpha=0.3)