In [None]:
import matplotlib.pyplot as plt
import numpy as np
from sklearn.linear_model import LinearRegression

In [None]:
def qvib(vibs,kB,T): # calculates vibrational partition function
    # vibs is an array or list of vibrational modes (units of eV)
    # kB is Boltzmann's constant (units of eV/K)
    # T is temperature (units of K)
    Nvibs = len(vibs)
    qvib = 1
    for ii in range(0,Nvibs):
        temp = np.exp(-vibs[ii]/(2*kB*T))/(1 - np.exp(-vibs[ii]/(kB*T)))
        qvib = qvib*temp
    return qvib

In [None]:
def read_vib_file(filename): # read in and parse input files to save vib modes only
    # filename contains title line (ignored) and several columns
    # the third column contains the vib modes in meV
    count = 0
    vibs = []
    with open(filename) as f:
        for line in f:
            count = count + 1
            if count == 1:
                continue
            else:
                line = line.split()
                vibs_temp = line[2]
                vibs.append(float(vibs_temp)/1000)
    return vibs # output units are eV

In [None]:
ML_sites = 16 # user input, number of sites for 1 ML on model surface
spec_number = [1,8,12] # user input, number of adsorbates on model surface for each model with calculated vib modes 
surface_label = "Pt111" # user input, surface info
atom_label = "O" # user input, adsorbate info
system_label = "vibs_"+atom_label+"_"+surface_label+"_" 
kB = 8.617333262*10**(-5) # Boltzmann's constant, eV/K
T = np.linspace(200,1000,500) # Temperature, K

plt.rcParams.update({'font.size': 12})

# calculate all free energy vibration corrections
print("--------------------Fitting T-dependence at Constant Coverage--------------------")
Avib_plot = []
for jj in spec_number:
    filename = system_label+str(jj)+"mol.txt"
    vibs = read_vib_file(filename)
    for ii in T:
        temp = qvib(vibs,kB,ii)
        Avib_plot.append(-kB*ii*np.log(temp))

Avib_plot_save = Avib_plot

# fit the temperature dependence of vibrational free energy at each coverage 
Avib_plot = np.array(Avib_plot).reshape(len(spec_number),len(T))
xfit = []
for ii in T:
    formx = [ii,ii**2]
    xfit.append(formx)

coeffs = []
RMSE = []
for ii in range(0,len(spec_number)):
    model_data = Avib_plot[ii,:]
    lm = LinearRegression()
    model = lm.fit(xfit,model_data)
    coeffs.append([model.intercept_,model.coef_[0],model.coef_[1]])
    
    yreg = [model.intercept_ + model.coef_[0]*ii + model.coef_[1]*ii**2 for ii in T]
    sq_diff = [(model_data[ii] - yreg[ii])**2 for ii in range(0,len(T))]
    aRMSE = (sum(sq_diff)/len(xfit))**(1/2)
    RMSE.append(aRMSE)
    plt.plot(T,model_data,'-',linewidth=3)
    plt.plot(T,yreg,'k:',linewidth=3)
    
plt.ylabel("Vib. Free Energy (eV)",fontsize=16)
plt.xlabel("Temperature (K)",fontsize=16)
plt.tight_layout()
plt.show()

p_print = []
rmse_print = []
for jj in range(0,len(formx)+1):
    for ii in range(0,len(spec_number)):
        p_print.append("{:.6e}".format(coeffs[ii][jj]))
    rmse_print.append("{:.6e}".format(RMSE[jj]))

print("T-fit Zero-Order Coefficient (eV):        ",*p_print[:3],sep = '    ')
print("T-fit First-Order Coefficient (eV/K):     ",*p_print[3:6],sep = '    ')
print("T-fit Second-Order Coefficient (eV/K²):   ",*p_print[6:],sep = '    ')
print("T-fit RMSE (eV):                          ",*rmse_print,sep = '    ')

# Fitting the T-dependence parameters from above to the number of surface species (i.e. ~coverage)
print()
print("--------------------Fitting T-dependent Slopes and Intercepts from above to Coverage--------------------")
spec_number = [ii/ML_sites for ii in spec_number]
xfit = np.array(spec_number).reshape(-1, 1)
tfit_coeffs = []
RMSE_save = []
fig, axes = plt.subplots(nrows=1,ncols=(len(formx)+1),figsize=(12, 4))
fit_label = ["Zeroth","First","Second"]
unit_label = ["(eV)","(eV/K)","(eV/K²)"]
for jj in range(0,len(formx)+1):
    model_data = []
    for ii in range(0,len(spec_number)):
        model_data.append(coeffs[ii][jj])
    model = lm.fit(xfit,model_data)
    yreg = [model.intercept_ + model.coef_[0]*ii for ii in xfit]
    sq_diff = [(model_data[ii] - yreg[ii])**2 for ii in range(0,len(xfit))]
    RMSE = (sum(sq_diff)/len(xfit))**(1/2)
    axes[jj].plot(spec_number,model_data,'o',markersize=9)
    axes[jj].errorbar(spec_number,model_data,yerr=2*RMSE[0],fmt='k.',markersize=0)
    axes[jj].plot(spec_number,yreg,'k:',linewidth=3)
    axes[jj].set_xlabel(atom_label+"* Coverage (ML)",fontsize=16)
    axes[jj].set_ylabel(fit_label[jj]+" Coefficient "+unit_label[jj],fontsize=16)
    axes[jj].ticklabel_format(axis='y',style='scientific',scilimits=(0,0))
    
    tfit_coeffs.append([model.intercept_,model.coef_[0]])
    RMSE_save.append(RMSE[0])

plt.tight_layout()
plt.show()

p_print = []
rmse_print = []
for jj in range(0,2):
    for ii in range(0,len(spec_number)):
        p_print.append("{:.6e}".format(tfit_coeffs[ii][jj]))
        if jj == 0:
            rmse_print.append("{:.6e}".format(RMSE_save[ii]))
    
print("Coverage-fit Zero-Order Coefficient:    ",*p_print[:3],sep = '    ')
print("Coverage-fit First-Order Coefficient:    ",*p_print[3:6],sep = '    ')
print("Coverage-fit RMSE:                       ",*rmse_print,sep = '    ')

# calculate the overall error from the above set of regression models for Avib = f(T,Nspec)
print()
print("--------------------Calculate RMSE over all Temperature and Coverage Values--------------------")
Avib_fits = []
count = 0
plt.plot([min(Avib_plot_save),max(Avib_plot_save)],[min(Avib_plot_save),max(Avib_plot_save)],'k:',linewidth=3)
for ii in spec_number:
    yfit = []
    yDFT = []
    for jj in T:
        aAvib_fits = (tfit_coeffs[0][1]*ii + tfit_coeffs[0][0]) + (tfit_coeffs[1][1]*ii + tfit_coeffs[1][0])*jj + (tfit_coeffs[2][1]*ii + tfit_coeffs[2][0])*jj**2
        Avib_fits.append(aAvib_fits)
        yfit.append(aAvib_fits)
        yDFT.append(Avib_plot_save[count])
        count = count + 1
    plt.plot(yfit,yDFT,'+',markeredgewidth=2)

plt.ylabel("DFT Vib. Free Energy (eV)",fontsize=16)
plt.xlabel("Regression Vib. Free Energy (eV)",fontsize=16)
plt.show()

sq_diff = [(Avib_fits[ii] - Avib_plot_save[ii])**2 for ii in range(0,len(Avib_fits))]
RMSE = (sum(sq_diff)/len(sq_diff))**(1/2)
print()
print("Overall RMSE from multi-level regression (eV): ",RMSE)