In [35]:
import os
import numpy as np
import ase.io
from ase.constraints import FixAtoms
from ase.build import add_adsorbate, molecule, surface
from pymatgen.ext.matproj import MPRester
from pymatgen.core.surface import generate_all_slabs, SlabGenerator
from pymatgen.io.ase import AseAtomsAdaptor
from ocpmodels.common.relaxation.ase_utils import OCPCalculator
import torch
import pickle
import json
import pandas as pd

In [36]:
try:
    config_yml_path = "ocp/configs/is2re/all/dimenet_plus_plus/dpp.yml"
    checkpoint_path = "dimenetpp_all.pt"
    # Define the calculator
    calc = OCPCalculator(config_yml=config_yml_path, checkpoint=checkpoint_path)
except Exception as e:
    print(e)
    config_yml_path = "ocp/configs/is2re/all/dimenet_plus_plus/dpp.yml"
    checkpoint_path = "dimenetpp_all.pt"
    # Define the calculator
    calc = OCPCalculator(config_yml=config_yml_path, checkpoint=checkpoint_path)    

In [37]:
calc

<ocpmodels.common.relaxation.ase_utils.OCPCalculator at 0x7fc09a9f8160>

In [38]:
def gen_data(loaded_reactions, calc, curdir, pid):
    list_rinfo = []
    for ir, r in enumerate(loaded_reactions[:]):
        try:
            # if ir%1000==0:
            #     print(ir, len(list_rinfo))

            ## retrieve slab and adsorbate
            ase_slab = r['reactionSystems']['star']
            d_prod = json.loads(r['products'])
            if len(d_prod.keys()) != 1:
                continue
            pkey = list(d_prod.keys())[0].replace('star', '')
            ads_symbol = pkey    

            ## slab + adsorbate 
            adslab = ase_slab.copy()            
            adsorbate = molecule(ads_symbol)
            add_adsorbate(adslab, adsorbate, 3, offset=(1, 1))

            ## set additional info
            tags = np.zeros(len(adslab))
            tags[18:27] = 1
            tags[27:] = 2
            adslab.set_tags(tags)
            cons= FixAtoms(indices=[atom.index for atom in adslab if (atom.tag == 0)])
            adslab.set_constraint(cons)
            adslab.center(vacuum=13.0, axis=2)
            adslab.set_pbc(True)

            ## calculator, energy, embeddings
            adslab.calc = calc
            ads_energy = adslab.get_potential_energy()
            mean_ads_embs = torch.mean(calc.trainer.model.module.embs[0], dim=0)    
            min_ads_embs = torch.min(calc.trainer.model.module.embs[0], dim=0).values    
            max_ads_embs = torch.max(calc.trainer.model.module.embs[0], dim=0).values    
            sum_ads_embs = torch.sum(calc.trainer.model.module.embs[0], dim=0)                

            ## additional info
            sc = r['surfaceComposition']
            facet = r['facet']
            pval = d_prod[list(d_prod.keys())[0]]        
            re = r['reactionEnergy']
            nre = re/pval

            eqn = r['Equation']
            ae = r['activationEnergy']
            cc = r['chemicalComposition']
            d_cvr = json.loads(r['coverages'])
            dft_code = r['dftCode']
            dft_func = r['dftFunctional']
            pubid = r['pubId']
            d_reactants = json.loads(r['reactants'])
            sites = json.loads(r['sites'])
            username = r['username']

            rinfo = {
                'ase_slab': ase_slab,            
                'd_prod': d_prod,            
                'pkey': pkey,
                'mean_ads_embs': mean_ads_embs,
                'min_ads_embs': min_ads_embs,
                'max_ads_embs': max_ads_embs,
                'sum_ads_embs': sum_ads_embs,
                'ads_embs': [float(x) for x in min_ads_embs] + [float(x) for x in mean_ads_embs] \
                                + [float(x) for x in max_ads_embs] + [float(x) for x in sum_ads_embs],

                'sc': sc,
                'facet': facet,            
                'pval': pval,            
                're': re,
                'nre': nre,

                'eqn': eqn,
                'ae': ae, 
                'cc': cc, 
                'd_cvr': d_cvr, 
                'dft_code': dft_code,
                'dft_func': dft_func,
                'pubid': pubid,
                'd_reactants': d_reactants,
                'sites': sites,
                'username': username            
            }
            list_rinfo.append(rinfo)
        except Exception as e:
            pass
    if list_rinfo:
        with open(f'{curdir}v2_script_10/exp12/dict_{pid}.pickle', 'wb') as f:
            pickle.dump(list_rinfo, f)
        ##
        loaded_list_rinfo = list_rinfo
        df = pd.DataFrame(loaded_list_rinfo)
        ads_embs_cols = df['ads_embs'].apply(pd.Series)
        ads_embs_cols.columns = ['e' + str(i) for i in range(256*4)]
        df = df.drop('ads_embs', axis=1)
        df = pd.concat([df, ads_embs_cols], axis=1)
        df = df.sort_values(by='nre')
        df = df.groupby(['sc', 'facet', 'pkey']).first().reset_index()
        df = df[['e' + str(i) for i in range(256*4)] + ['nre']]
        df.to_pickle(f'{curdir}v2_script_10/exp12/df_{pid}.pickle')
        return df
    else:
        return None

In [39]:
list_pid = [
    'LogadottirAmmonia2003','HonkalaAmmonia2005','CatappTrends2008','JiangTrends2009', 
    'WangUniversal2011','GrabowDescriptor-Based2011','StudtCO2012','BehrensThe2012',
    'FerrinHydrogen2012','MedfordElementary2012','MontoyaInsights2013','MedfordAssessing2014',
    'TangNickel--silver2014','FalsigOn2014','TsaiTuning2014','TsaiActive2014',
    'YooTheoretical2014','MedfordThermochemistry2014','ChanMolybdenum2014','Unpublished',
    'MontoyaThe2015','SeitzA2016','HoffmannFramework2016','YangIntrinsic2016',
    'GauthierSolvation2017','mgfieldslanders2018','MichalLixCoO22017','RolingConfigurational2017',
    'RolingBimetallic2017','FesterEdge2017','BukasORR2017','BoesAdsorption2018',
    'BajdichWO32018','SchumannSelectivity2018','BackPrediction2018','DickensElectronic2018','SandbergStrongly2018',
    'ChenUnderstanding2018','BackSingle2018','PatelTheoretical2018','HansenFirst2018',
    'ClarkInfluence2018','SniderRevealing2018','Park2D2019','ZhaoTrends2019','Schlexer2019pH',
    'SkafteSelective2019','SharadaAdsorption2019','GauthierImplications2019','StricklerSystematic2019',
    'JuUnraveling2019','ZhaoImproved2019','GauthierFacile2019','MeffordInterpreting2019',
    'TangFrom2020','FloresActive2020','HubertAcidic2020','TangModeling2020','SanchezCatalyst2020',
    'PengRole2020','GrewalHighly2019','LeeEpitaxial2020','BaeumerTuning2020','ZhengOrigin2020',
    'LandersUnderstanding2020','WangTheory-aided2020','PatelGeneralizable2021','JiangModelling2021',
    'WangAchieving2021','Gauthierrole2021','TangExploring2021','CamposEfficient2021','HalldinAssessing2021',
    'CraigHigh-throughput2021','Jia-ChengAtomistic2021','StreibelMicrokinetic2021',
    'ShiLong-term2021','PengTrends2022','LiuCatalytic2022','LiScreening2021','AraComputational2022',
    'ComerUnraveling2022','LungerCation-dependent2022','SainiElectronic2022','TettehCompressively2022',
    'KaiData-driven2022','HossainInvestigation2022','KoshyInvestigation2022','WeiInsights2022',
    'RaoResolving2022',
    # 'MamunHighT2019',    
]

In [40]:
curdir = '' ## '/curdir/' ## ''
for pid in list_pid[:]:
    with open(f'{curdir}v2_script_10/exp12/reactions_{pid}.pickle', 'rb') as f:
        loaded_reactions = pickle.load(f)
    num_sample = len(loaded_reactions)
    df = gen_data(loaded_reactions, calc, curdir, pid)
    if df is not None: 
        print(pid, num_sample, df.shape)
    else:
        print(pid, num_sample)

LogadottirAmmonia2003 12
HonkalaAmmonia2005 1
CatappTrends2008 23
JiangTrends2009 21
WangUniversal2011 440
GrabowDescriptor-Based2011 4
StudtCO2012 1249
BehrensThe2012 10
FerrinHydrogen2012 90
MedfordElementary2012 12
MontoyaInsights2013 6
MedfordAssessing2014 24
TangNickel--silver2014 5
FalsigOn2014 219
TsaiTuning2014 2
TsaiActive2014 12
YooTheoretical2014 40
MedfordThermochemistry2014 19
ChanMolybdenum2014 13
Unpublished 27
MontoyaThe2015 248
SeitzA2016 24
HoffmannFramework2016 144
YangIntrinsic2016 24
GauthierSolvation2017 4
mgfieldslanders2018 49
MichalLixCoO22017 24
RolingConfigurational2017 104
RolingBimetallic2017 3850
FesterEdge2017 16
BukasORR2017 4
BoesAdsorption2018 9496 (49, 1025)
BajdichWO32018 250 (84, 1025)
SchumannSelectivity2018 170
BackPrediction2018 67
DickensElectronic2018 901
SandbergStrongly2018 56
ChenUnderstanding2018 6
BackSingle2018 141
PatelTheoretical2018 28
HansenFirst2018 845
ClarkInfluence2018 36
SniderRevealing2018 44
Park2D2019 9
ZhaoTrends2019 129
Schl