# Worflow to train the ML model

This workflow performs the following tasks:
- download the substrate structures from materials project
- generate the adsorbate structures (pymatgen)
- generate the adsorption structures
- write the CRYSTAL inputs
- run the adsorbate+substrate CRYSTAL calculations
- run the adsorbate and substrate CRYSTAL calculations (GHOST atoms)
- calculate the E_ads (BSSE corrected)

In [1]:
# crystal_functions imports
from file_readwrite import write_cry_input, write_cry_gui
from file_readwrite import Crystal_input, Crystal_output
from adsorb import sub_ads_indices
from calculate import cry_ads_energy
from execute import runcry

# pymatgen imports
from pymatgen.core.structure import Molecule, Structure, Lattice
from pymatgen.core.surface import SlabGenerator, generate_all_slabs
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer
from pymatgen.analysis.adsorption import AdsorbateSiteFinder
from pymatgen.ext.matproj import MPRester

# other imports
import shutil
import pandas as pd
import numpy as np

### Make adsorbates
Make the adsorbate structures using the Molecule class in pymatgen

In [2]:
O = Molecule('O',[[0.0, 0.0, 0.0]])
CO = Molecule('CO',[[0.0, 0.0, 0.0],[ 0.0, 0.0, 1.128]])
H2O = Molecule('HHO',[[0.76,0.00,0.50],[0.76, 0.00,-0.50],[0.0, 0.0, 0.0]])
adsorbates = [O,CO,H2O]
adsorbates = [O] #for testing purpouses

### Make substrates

#### Bulk
Download the bulk structures from the Materials Project

In [3]:
with MPRester("My-ID") as m:    
    cu = m.get_structure_by_material_id("mp-30")
    mgo = m.get_structure_by_material_id("mp-1265")
    rutile = m.get_structure_by_material_id("mp-2657")
materials = [cu,mgo,rutile]
materials = [cu] #for testing purpouses 

Ensure the conventional cell is used

In [4]:
bulks = []
substrates = []
for material in materials:
    bulks.append(SpacegroupAnalyzer(material).get_conventional_standard_structure())

#### Slab
Generate the slabs using the pymatgen SlabGenerator function.

In [5]:
substrates =[]
for bulk in bulks:
    slabs = generate_all_slabs(bulk, max_index=1, min_slab_size=2., min_vacuum_size=10.0, 
                                   center_slab=False, symmetrize=True, in_unit_planes=False) 
    substrates.extend(slabs)

### Adsorb
Place the adsorbate on both surfaces of the slab (symmetric sites)

In [6]:
sub_composition = []
ads_composition = []
miller_indices = []    
n_layers = []
sub_ads_structures = []
for substrate in substrates:
    for adsorbate in adsorbates:
        adsorption_structures = AdsorbateSiteFinder(substrate).adsorb_both_surfaces(adsorbate,repeat=[1,1,1])
        sites = AdsorbateSiteFinder(substrate).find_adsorption_sites()    
        for i,adsorption_structure in enumerate(adsorption_structures):
            miller_indices.append(adsorption_structure.miller_index)
            n_layers.append(len(np.unique(substrate.cart_coords[:,2])))  
            sub_composition.append(substrate.composition.reduced_formula)
            ads_composition.append(str(adsorbate.composition.hill_formula).replace(" ", ""))
            sub_ads_structures.append(adsorption_structure)

### Prepare inputs (geometry optimisation)
Define CRYSTAL input parameters.

In [12]:
geom_block = ['Adsorption tests\n','EXTERNAL\n','EXTPRT\n']
bs_block = ['BASISSET\n', 'STO-3G\n']
func_block = ['DFT\n', 'B3LYP\n', 'XXLGRID\n', 'ENDDFT\n']
scf_block = [['TOLINTEG\n', '5 5 5 5 10\n'],
             ['SHRINK\n', '6 12\n'],
             ['MAXCYCLE\n', '20\n'],
             ['FMIXING\n', '70\n'],
             'DIIS\n',
             'ENDSCF\n']

#### Write inputs
Write the inputs to file.

In [27]:
file_names = []
for i,structure in enumerate(sub_ads_structures):
    input_name = 'data/'+str(sub_composition[i]).replace(" ", "")+'_'+str(ads_composition[i]).replace(" ", "")+'_' \
               +''.join(str(x) for x in structure.miller_index)+'_'+str(i+1)+'.d12'
    file_names.append(input_name[:-4])
    write_cry_input(input_name,crystal_blocks=[geom_block,bs_block,func_block,scf_block],external_obj=structure)

### Run the calculations
Use the crystal_functions runcry function to execute CRYSTAL (please ensure the path to your runcry17 is defined in execute.runcry()

In [14]:
for cry_input in file_names:
    runcry(cry_input)

### Read the optimised geometry
Use the crystal_functions Crystal_output class to extract the final energy.

In [29]:
E_full_system = []
for cry_input in file_names:
    cry_output = Crystal_output(cry_input+'.out')
    if cry_output.converged == True:
        E_full_system.append(cry_output.final_energy()) 

### Prepare inputs for the BSSE calculations
Generate the inputs for the BSSE calculation. The indices of the GHOST atoms are obtained from the pymatgen.core.structure.Slab object ('adsorbate' and 'substrate').

In [18]:
for i,cry_input in enumerate(file_names):
    indices = sub_ads_indices(sub_ads_structures[i])
    #Substrate        
    bsse_sub_inp = Crystal_input(cry_input+'.d12')
    bsse_sub_inp.add_ghost(indices['adsorbate'])
    bsse_sub_inp.opt_to_sp()
    bsse_sub_inp_name = cry_input+'_BSSE_sub.d12'
    write_cry_input(bsse_sub_inp_name,bsse_sub_inp)

    shutil.copy(cry_input+'.gui',cry_input+'_BSSE_sub.gui')

    #Adsorbate
    bsse_ads_inp = Crystal_input(cry_input+'.d12')
    bsse_ads_inp.add_ghost(indices['substrate'])
    bsse_sub_inp.opt_to_sp()
    bsse_ads_inp_name = cry_input+'_BSSE_ads.d12'
    write_cry_input(bsse_ads_inp_name,bsse_ads_inp)

    shutil.copy(cry_input+'.gui',cry_input+'_BSSE_ads.gui')

### Run the BSSE calculations
Use the crystal_functions runcry function to execute CRYSTAL (please ensure the path to your runcry17 is defined in execute.runcry()

In [33]:
E_sub_BSSE = []
E_ads_BSSE = []
for i,cry_input in enumerate(file_names):
    runcry(cry_input+'_BSSE_sub')
    cry_BSSE_sub_output = Crystal_output(cry_input+'_BSSE_sub.out')
    if cry_BSSE_sub_output.converged == True:
        E_sub_BSSE.append(cry_BSSE_sub_output.final_energy())
    
    
    runcry(cry_input+'_BSSE_ads')
    cry_BSSE_ads_output = Crystal_output(cry_input+'_BSSE_ads.out')
    if cry_BSSE_ads_output.converged == True:
        E_ads_BSSE.append(cry_BSSE_ads_output.final_energy())

### Calculate the adsorption energy
Use the crystal_functions cry_ads_energy function to get the adsorption energy (BSSE corrected).

In [35]:
E_adsorption = []
for i in range(len(file_names)):
    E_adsorption.append(cry_ads_energy(E_full_system[i],E_sub_BSSE[i],E_ads_BSSE[i]))

### Create Dataframe

In [39]:
df = pd.DataFrame(list(zip(file_names, sub_composition, n_layers, ads_composition,  miller_indices, E_adsorption,
                           E_sub_BSSE,E_ads_BSSE,E_full_system)),
               columns =['File name', 'Substrate','N layers','Adsorbate','Miller Indices','E adsorption (BSSE)',
                         'E sub (ads ghost)','E ads (ads ghost)','E full system'])
display(df)
print('*The data presented in this table is for testing the Jupyter Notebook only. The calculations were not performed')
print('at an acceptable level of accuracy and were not converged.')

Unnamed: 0,File name,Substrate,N layers,Adsorbate,Miller Indices,E adsorption (BSSE),E sub (ads ghost),E ads (ads ghost),E full system
0,data/Cu_O_100_1,Cu,1,O,"(1, 1, 1)",2409.283222,-47341.648962,-50263.955891,-95196.321631
1,data/Cu_O_100_2,Cu,1,O,"(1, 1, 1)",-11638.113883,-83509.646796,-82921.005588,-178068.766266
2,data/Cu_O_100_3,Cu,1,O,"(1, 1, 1)",632.897215,-44125.322377,-4025.968968,-47518.39413
3,data/Cu_O_100_4,Cu,2,O,"(1, 1, 0)",12274.686627,-87421.473073,-92809.964171,-167956.750617
4,data/Cu_O_100_5,Cu,2,O,"(1, 1, 0)",13841.18653,-89071.485326,-92285.435147,-167515.733942
5,data/Cu_O_100_6,Cu,2,O,"(1, 1, 0)",-9386.445718,-86475.931476,-87026.095877,-182888.473072
6,data/Cu_O_100_7,Cu,2,O,"(1, 1, 0)",-11638.113883,-83509.646796,-82921.005588,-178068.766266
7,data/Cu_O_100_8,Cu,2,O,"(1, 0, 0)",-10107.443535,-86290.160382,-86199.967101,-182597.571017
8,data/Cu_O_100_9,Cu,2,O,"(1, 0, 0)",1442.160954,-88251.935834,-4025.50184,-90835.27672
9,data/Cu_O_100_10,Cu,2,O,"(1, 0, 0)",-10796.145444,-86251.217692,-86443.87404,-183491.237176


*The data presented in this table is for testing the Jupyter Notebook only. The calculations were not performed
at an acceptable level of accuracy and were not converged.
