In [2]:
import os
import shutil
from pathlib import Path

def shorten_outcar(filepath_in, filepath_out):
    with open(filepath_in, 'r', encoding="ISO-8859-1") as f:
        lines = f.readlines()
    
    # Starting from the last line find line number of the last ionic step: aborting loop because EDIFF is reached
    for idx_start in range(len(lines)):
        if 'POSCAR =' in lines[idx_start]:
            break
    
    # Find the line number with " Dimension of arrays:"
    for idx_middle_start in range(len(lines)):
        if 'k-points in units of' in lines[idx_middle_start]:
            break

    for idx_middle_end in range(len(lines)):
        if 'k-point   1 :' in lines[idx_middle_end]:
            break

    for idx_end in range(len(lines)-1, 0, -1):
        # if 'aborting loop because EDIFF is reached' in lines[idx]:
        if 'aborting loop because EDIFF is reached' in lines[idx_end]:
            break    

    start_outcar = lines[:idx_start+2].copy()
    middle_outcar = lines[idx_middle_start-15:idx_middle_end-2].copy()
    end_outcar = lines[idx_end+9:].copy()
    # Write the shortened OUTCAR
    with open(filepath_out, 'w') as f:
        f.writelines(start_outcar)
        f.writelines(middle_outcar)
        f.write("\n--------------------------------------- Iteration      FINAL  ---------------------------------------\n\n\n")
        for dummy_idx in range(10):
            f.write("--------------------OUTCAR has been truncated-------------------------\n\n\n")
        f.writelines(end_outcar)
structure_convert = {
    'AD_SLAB': 'Molecule-Surface',
    'SLAB_CP': 'Surface',
    'AD_CP': 'Molecule',
    'SLAB_FS': 'Surface',
    'AD_FS': 'Molecule',
    'adsorbate_slab': 'Molecule-Surface',
    'adsorbate': 'Molecule',
    'slab': 'Surface'
}

crystal_skzcam_cluster_size = {
    'MgO': [7,5,3],
    'r-TiO2': [9,7,4],
    'a-TiO2': [9,7,4]
}

adsorbate_config = {
    'NO Vertical-Hollow': 'NO_1',
    'NO Vertical-Mg': 'NO_2',
    'NO Bent-Bridge': 'NO_3',
    'NO Bent-Mg': 'NO_4',
    'NO Bent-O': 'NO_5',
    'NO Dimer': 'NO_6',
    'CO2 Physisorbed': 'CO2_2',
    'CO2 Chemisorbed': 'CO2_3',
    'CH3OH Tilted': 'CH3OH_1',
    'CH3OH Parallel': 'CH3OH_2',
    'CH3OH Tetramer': 'CH3OH_4',
    'H2O Monomer': 'H2O',
    'H2O Tetramer': 'H2O_4',
    'N2O Parallel': 'N2O_1',
    'N2O Tilted': 'N2O_2',
    'CO2 Parallel': 'CO2_1',
    'CO2 Tilted': 'CO2_2',
    'CH4 Monolayer': 'CH4_ML',
    'C2H6 Monolayer': 'C2H6_ML',
}

molecule_surface_systems = {
    'MgO': ['CH4', 'CH4 Monolayer', 'C2H6', 'C2H6 Monolayer', 'CO', 'C6H6', 'N2O Parallel', 'N2O Tilted', 'NO Vertical-Hollow', 'NO Vertical-Mg', 'NO Bent-Bridge', 'NO Bent-Mg', 'NO Bent-O', 'NO Dimer', 'H2O Monomer', 'H2O Tetramer', 'CH3OH Tilted', 'CH3OH Parallel', 'CH3OH Tetramer', 'NH3', 'CO2 Physisorbed', 'CO2 Chemisorbed'],
    'r-TiO2': ['CH4','CO2 Parallel','CO2 Tilted','H2O','CH3OH'],
    'a-TiO2': ['H2O','NH3']
}

crystal_xc_func_ensemble = {
    'MgO': ["01_PBE-D2-Ne", "02_revPBE-D4", "03_vdW-DF", "04_rev-vdW-DF2", "05_PBE0-D4", "06_B3LYP-D2-Ne"],
    'r-TiO2': ["01_PBE-U-TSHI", "02_revPBE-D4", "03_vdW-DF", "04_rev-vdW-DF2", "05_R2SCAN-rVV10", "06_HSE06-D4"],
    'a-TiO2': ["01_PBE-U-TSHI", "02_revPBE-D4", "03_vdW-DF", "04_rev-vdW-DF2", "05_R2SCAN-rVV10", "06_HSE06-D4"]
}

crystal_convert = {
    'MgO': 'MgO',
    'TiO2_rutile': 'r-TiO2',
    'TiO2_anatase': 'a-TiO2'
}


In [11]:
# Transfer CO2 Conformational calculations

def shorten_dfcc(filepath_in, filepath_out):
    with open(f'{filepath_in}/mrcc.out', 'r', encoding="ISO-8859-1") as f:
        lines = f.readlines()
    
    # Starting from the last line find line number of the last ionic step: aborting loop because EDIFF is reached
    counter = 0
    for idx_start in range(len(lines)):

        if 'ghost=serialno' in lines[idx_start]:
            break
        
        if 'qmmm=Amber' in lines[idx_start]:
            break
    # Find the line number with " Dimension of arrays:"



    for idx_end in range(len(lines)-1, 0, -1):
        # if 'aborting loop because EDIFF is reached' in lines[idx]:
        if ' Executing ccsd...' in lines[idx_end]:
            break  



    # with open(f'{filepath_in}/orca.bq', 'r', encoding="ISO-8859-1") as f:
    #     lines_bq = f.readlines()

    start_outcar = lines[:idx_start+10].copy()
    end_outcar = lines[idx_end-1:].copy()
    # Write the shortened OUTCAR
    with open(filepath_out, 'w') as f:
        f.writelines(start_outcar)
#         f.write("""
# ================================================================================
#                                     Point Charge FILE
# ================================================================================
# """)
#         f.writelines(lines_bq)
#         f.write("\n================================================================================\n")
        # f.writelines(middle_outcar)
        for dummy_idx in range(10):
            f.write("--------------------Output has been truncated-------------------------\n\n\n")
        f.writelines(end_outcar)

molecule_conv_dict = {
    'CO2_3': 'CO2 Chemisorbed',
    'NO_6': 'NO Dimer',
    '_B3LYP': '06_B3LYP-D2-Ne',
    '': '02_revPBE-D4'
}
for molecule in ['CO2_3']:
    molecule_label = molecule_conv_dict[molecule].replace(' ','_')
    for xc_func in crystal_xc_func_ensemble['MgO']:
        xc_func_label = xc_func
        for basis in ['TZ','QZ']:
            for structure in ['RS', 'FS']:
                structure_label = 'Molecule-Surface' if structure == 'FS' else 'Molecule'
                in_path = f'Data_original/cWFT/24_11_24-CO2_Conf/{xc_func}/{structure}/{basis}'
                Path(f'Data/07-Econf/MgO/{molecule_label}/07_CCSDT/{xc_func_label}/{structure_label}').mkdir(parents=True, exist_ok=True)
                out_path = f'Data/07-Econf/MgO/{molecule_label}/07_CCSDT/{xc_func_label}/{structure_label}/CCSDT_aV{basis}.mrcc.out'
                shorten_dfcc(in_path, out_path)




In [7]:
# Transfer CO2 different XC functionals
# Transferring MP2 data

def shorten_orca(filepath_in, filepath_out):
    with open(f'{filepath_in}/orca.out', 'r', encoding="ISO-8859-1") as f:
        lines = f.readlines()
    
    # Starting from the last line find line number of the last ionic step: aborting loop because EDIFF is reached
    for idx_start_start in range(len(lines)):
        if '                                       INPUT FILE' in lines[idx_start_start]:
            break
    
    for idx_start_end in range(len(lines)):
        if 'END OF INPUT' in lines[idx_start_end]:
            break

    # Find the line number with " Dimension of arrays:"
    for idx_middle_start in range(len(lines)):
        if 'TOTAL SCF ENERGY' in lines[idx_middle_start]:
            break

    for idx_middle_end in range(len(lines)):
        if 'Virial Ratio       :' in lines[idx_middle_end]:
            break



    for idx_end_start in range(len(lines)-1, 0, -1):
        # if 'aborting loop because EDIFF is reached' in lines[idx]:
        if 'RI-MP2 CORRELATION ENERGY:' in lines[idx_end_start]:
            break  

    for idx_end_end in range(len(lines)-1, 0, -1):
        # if 'aborting loop because EDIFF is reached' in lines[idx]:
        if 'FINAL SINGLE POINT ENERGY' in lines[idx_end_end]:
            break      

    # with open(f'{filepath_in}/orca.bq', 'r', encoding="ISO-8859-1") as f:
    #     lines_bq = f.readlines()

    start_outcar = lines[idx_start_start-1:idx_start_end+2].copy()
    middle_outcar = lines[idx_middle_start-1:idx_middle_end+2].copy()
    end_outcar = lines[idx_end_start-1:idx_end_start+2].copy() + lines[idx_end_end-7:idx_end_end+2].copy()
    # Write the shortened OUTCAR
    with open(filepath_out, 'w') as f:
        f.writelines(start_outcar)
#         f.write("""
# ================================================================================
#                                     Point Charge FILE
# ================================================================================
# """)
#         f.writelines(lines_bq)
#         f.write("\n================================================================================\n")
        f.writelines(middle_outcar)
        for dummy_idx in range(10):
            f.write("--------------------Output has been truncated-------------------------\n\n\n")
        f.writelines(end_outcar)
        f.writelines(lines[-1])

structure_convert = {
    'AD_SLAB': 'Molecule-Surface',
    'SLAB_CP': 'Surface',
    'AD_CP': 'Molecule'
}

crystal_skzcam_cluster_size = {
    'MgO': [7,5,3],
    'r-TiO2': [9,7,4],
    'a-TiO2': [9,7,4]
}

adsorbate_config = {
    'NO Vertical-Hollow': 'NO_1',
    'NO Vertical-Mg': 'NO_2',
    'NO Bent-Bridge': 'NO_3',
    'NO Bent-Mg': 'NO_4',
    'NO Bent-O': 'NO_5',
    'NO Dimer': 'NO_6',
    'CO2 Physisorbed': 'CO2_2',
    'CO2 Chemisorbed': 'CO2_3',
    'CH3OH Tilted': 'CH3OH_1',
    'CH3OH Parallel': 'CH3OH_2',
    'CH3OH Tetramer': 'CH3OH_4',
    'H2O Monomer': 'H2O',
    'H2O Tetramer': 'H2O_4',
    'N2O Parallel': 'N2O_1',
    'N2O Tilted': 'N2O_2',
    'CO2 Parallel': 'CO2_1',
    'CO2 Tilted': 'CO2_2',
    'CH4 Monolayer': 'CH4_ML',
    'C2H6 Monolayer': 'C2H6_ML',
}

molecule_surface_systems = {
    'MgO': ['CH4', 'CH4 Monolayer', 'C2H6', 'C2H6 Monolayer', 'CO', 'C6H6', 'N2O Parallel', 'N2O Tilted', 'NO Vertical-Hollow', 'NO Vertical-Mg', 'NO Bent-Bridge', 'NO Bent-Mg', 'NO Bent-O', 'NO Dimer', 'H2O Monomer', 'H2O Tetramer', 'CH3OH Tilted', 'CH3OH Parallel', 'CH3OH Tetramer', 'NH3', 'CO2 Physisorbed', 'CO2 Chemisorbed'],
    'r-TiO2': ['CH4','CO2 Parallel','CO2 Tilted','H2O','CH3OH'],
    'a-TiO2': ['H2O','NH3']
}

for crystal in ['MgO']:
    for molecule in ['CO2 Chemisorbed']: #molecule_surface_systems[crystal_convert[crystal]]:
        for functional in crystal_xc_func_ensemble[crystal]:
            for skzcam_cluster_size in range(1, crystal_skzcam_cluster_size[crystal_convert[crystal]][0]+1):
                for structure in ['AD_SLAB', 'SLAB_CP','AD_CP']:
                    if molecule in adsorbate_config:
                        molecule_label_in = adsorbate_config[molecule]
                    else:
                        molecule_label_in = molecule
                    molecule_label_out = molecule.replace(' ', '_')
                    for basis_idx, basis in enumerate(['DZ', 'TZ', 'QZ']):
                        if skzcam_cluster_size > crystal_skzcam_cluster_size[crystal_convert[crystal]][basis_idx]:
                            continue

                        in_path = f'Data_original/DFT/CO2_XC_Comparison/MP2/{functional}/{skzcam_cluster_size}/{basis}/{structure}'
                        out_path = f'Data/Miscellaneous/Error_Validation_CO2_MgO/{functional}/Cluster_{skzcam_cluster_size}/{structure_convert[structure]}'                    
                        Path(out_path).mkdir(parents=True, exist_ok=True)
                        shorten_orca(f'{in_path}', f'{out_path}/MP2_awCV{basis}.orca.out')
                        shutil.copy(f'{in_path}/orca.bq',f'{out_path}/orca.pointcharges')


In [9]:
# Let's copy the CCSD(T) calculations

# Transferring MP2 data

def shorten_dlpnocc(filepath_in, filepath_out):
    with open(f'{filepath_in}/orca_cc.out', 'r', encoding="ISO-8859-1") as f:
        lines = f.readlines()
    
    # Starting from the last line find line number of the last ionic step: aborting loop because EDIFF is reached
    for idx_start_start in range(len(lines)):
        if '                                       INPUT FILE' in lines[idx_start_start]:
            break
    
    for idx_start_end in range(len(lines)):
        if 'END OF INPUT' in lines[idx_start_end]:
            break

    # Find the line number with " Dimension of arrays:"
    for idx_middle_start in range(len(lines)):
        if 'TOTAL SCF ENERGY' in lines[idx_middle_start]:
            break

    for idx_middle_end in range(len(lines)):
        if 'Virial Ratio       :' in lines[idx_middle_end]:
            break



    for idx_end_start in range(len(lines)-1, 0, -1):
        # if 'aborting loop because EDIFF is reached' in lines[idx]:
        if 'The CCSD iterations have converged' in lines[idx_end_start]:
            break  

    for idx_end_end in range(len(lines)-1, 0, -1):
        # if 'aborting loop because EDIFF is reached' in lines[idx]:
        if 'Final correlation energy' in lines[idx_end_end]:
            break      

    # with open(f'{filepath_in}/orca.bq', 'r', encoding="ISO-8859-1") as f:
    #     lines_bq = f.readlines()

    start_outcar = lines[idx_start_start-1:idx_start_end+2].copy()
    middle_outcar = lines[idx_middle_start-1:idx_middle_end+2].copy()
    end_outcar = ["\n-------- CCSD Data ------------\n"] + lines[idx_end_start+1:idx_end_start+7].copy() + ["\n-------- CCSD(T) Data ------------\n"] + lines[idx_end_end:idx_end_end+3].copy()
    print(lines[-1])
    # Write the shortened OUTCAR
    with open(filepath_out, 'w') as f:
        f.writelines(start_outcar)
#         f.write("""
# ================================================================================
#                                     Point Charge FILE
# ================================================================================
# """)
#         f.writelines(lines_bq)
#         f.write("\n================================================================================\n")
        f.writelines(middle_outcar)
        for dummy_idx in range(10):
            f.write("--------------------Output has been truncated-------------------------\n\n\n")
        f.writelines(end_outcar)


def shorten_dlpnomp2(filepath_in, filepath_out):
    with open(f'{filepath_in}/orca_mp2.out', 'r', encoding="ISO-8859-1") as f:
        lines = f.readlines()
    
    # Starting from the last line find line number of the last ionic step: aborting loop because EDIFF is reached
    for idx_start_start in range(len(lines)):
        if '                                       INPUT FILE' in lines[idx_start_start]:
            break
    
    for idx_start_end in range(len(lines)):
        if 'END OF INPUT' in lines[idx_start_end]:
            break

    # Find the line number with " Dimension of arrays:"
    for idx_middle_start in range(len(lines)):
        if 'TOTAL SCF ENERGY' in lines[idx_middle_start]:
            break

    for idx_middle_end in range(len(lines)):
        if 'Virial Ratio       :' in lines[idx_middle_end]:
            break



    for idx_end_start in range(len(lines)-1, 0, -1):
        # if 'aborting loop because EDIFF is reached' in lines[idx]:
        if 'DLPNO-MP2 CORRELATION ENERGY:' in lines[idx_end_start]:
            break  

    for idx_end_end in range(len(lines)-1, 0, -1):
        # if 'aborting loop because EDIFF is reached' in lines[idx]:
        if 'Final correlation energy' in lines[idx_end_end]:
            break      

    # with open(f'{filepath_in}/orca.bq', 'r', encoding="ISO-8859-1") as f:
    #     lines_bq = f.readlines()

    start_outcar = lines[idx_start_start-1:idx_start_end+2].copy()
    middle_outcar = lines[idx_middle_start-1:idx_middle_end+2].copy()
    end_outcar = lines[idx_end_start-1:idx_end_start+2].copy()
    # Write the shortened OUTCAR
    with open(filepath_out, 'w') as f:
        f.writelines(start_outcar)
#         f.write("""
# ================================================================================
#                                     Point Charge FILE
# ================================================================================
# """)
#         f.writelines(lines_bq)
#         f.write("\n================================================================================\n")
        f.writelines(middle_outcar)
        for dummy_idx in range(10):
            f.write("--------------------Output has been truncated-------------------------\n\n\n")
        f.writelines(end_outcar)

def shorten_lnocc(filepath_in, filepath_out):
    with open(f'{filepath_in}/mrcc.out', 'r', encoding="ISO-8859-1") as f:
        lines = f.readlines()
    
    # Starting from the last line find line number of the last ionic step: aborting loop because EDIFF is reached
    counter = 0
    for idx_start in range(len(lines)):

        if 'ghost=serialno' in lines[idx_start]:
            break

    # Find the line number with " Dimension of arrays:"



    for idx_end in range(len(lines)-1, 0, -1):
        # if 'aborting loop because EDIFF is reached' in lines[idx]:
        if 'Final local CC results:' in lines[idx_end]:
            break  



    # with open(f'{filepath_in}/orca.bq', 'r', encoding="ISO-8859-1") as f:
    #     lines_bq = f.readlines()

    start_outcar = lines[:idx_start+10].copy()
    end_outcar = lines[idx_end-1:].copy()
    # Write the shortened OUTCAR
    with open(filepath_out, 'w') as f:
        f.writelines(start_outcar)
#         f.write("""
# ================================================================================
#                                     Point Charge FILE
# ================================================================================
# """)
#         f.writelines(lines_bq)
#         f.write("\n================================================================================\n")
        # f.writelines(middle_outcar)
        for dummy_idx in range(10):
            f.write("--------------------Output has been truncated-------------------------\n\n\n")
        f.writelines(end_outcar)


def shorten_dfcc(filepath_in, filepath_out):
    with open(f'{filepath_in}/mrcc.out', 'r', encoding="ISO-8859-1") as f:
        lines = f.readlines()
    
    # Starting from the last line find line number of the last ionic step: aborting loop because EDIFF is reached
    counter = 0
    for idx_start in range(len(lines)):

        if 'ghost=serialno' in lines[idx_start]:
            break
        
        if 'qmmm=Amber' in lines[idx_start]:
            break
    # Find the line number with " Dimension of arrays:"



    for idx_end in range(len(lines)-1, 0, -1):
        # if 'aborting loop because EDIFF is reached' in lines[idx]:
        if ' Executing ccsd...' in lines[idx_end]:
            break  



    # with open(f'{filepath_in}/orca.bq', 'r', encoding="ISO-8859-1") as f:
    #     lines_bq = f.readlines()

    start_outcar = lines[:idx_start+10].copy()
    end_outcar = lines[idx_end-1:].copy()
    # Write the shortened OUTCAR
    with open(filepath_out, 'w') as f:
        f.writelines(start_outcar)
#         f.write("""
# ================================================================================
#                                     Point Charge FILE
# ================================================================================
# """)
#         f.writelines(lines_bq)
#         f.write("\n================================================================================\n")
        # f.writelines(middle_outcar)
        for dummy_idx in range(10):
            f.write("--------------------Output has been truncated-------------------------\n\n\n")
        f.writelines(end_outcar)



structure_convert = {
    'AD_SLAB': 'Molecule-Surface',
    'SLAB_CP': 'Surface',
    'AD_CP': 'Molecule'
}

crystal_skzcam_cluster_size = {
    'MgO': [3,1],
    'r-TiO2': [4,1],
    'a-TiO2': [4,1]
}

for crystal in ['MgO']:
    for molecule in ['CO2 Chemisorbed']: #molecule_surface_systems[crystal_convert[crystal]]:
        for functional in crystal_xc_func_ensemble[crystal]:
            for skzcam_cluster_size in range(1, crystal_skzcam_cluster_size[crystal_convert[crystal]][0]+1):
                for structure in ['AD_SLAB', 'SLAB_CP','AD_CP']:
                    if molecule in adsorbate_config:
                        molecule_label_in = adsorbate_config[molecule]
                    else:
                        molecule_label_in = molecule
                    molecule_label_out = molecule.replace(' ', '_')
                    for basis_idx, basis in enumerate(['DZ', 'TZ']): #, 'QZ']):
                        if skzcam_cluster_size > 3:
                            continue
                        in_path = f'Data_original/DFT/CO2_XC_Comparison/CCSDT/{functional}/{skzcam_cluster_size}/{basis}_Local/{structure}'
                        out_path = f'Data/Miscellaneous/Error_Validation_CO2_MgO/{functional}/Cluster_{skzcam_cluster_size}/{structure_convert[structure]}'                    
                        Path(out_path).mkdir(parents=True, exist_ok=True)
                        shorten_lnocc(f'{in_path}', f'{out_path}/LNOCCSDT_awCV{basis}.mrcc.out')
                        shutil.copy(f'{in_path}/MINP',f'{out_path}/LNOCCSDT.mrcc.input')




In [None]:

for crystal in ['TiO2_rutile']: #crystal_convert:
    for xc_func in ["06_HSE06-D4"]: #crystal_xc_func_ensemble[crystal_convert[crystal]]:
        # Make a directory for each crystal and xc_func
        Path(f'Data/01-Unit_Cell/{crystal_convert[crystal]}/{xc_func}').mkdir(parents=True, exist_ok=True)
        shorten_outcar(f'Data_original/DFT/TiO2_DFT/01-UC/{crystal.split("_")[1]}/{xc_func}/OUTCAR', f'Data/01-Unit_Cell/{crystal_convert[crystal]}/{xc_func}/OUTCAR')   


In [None]:
# Transfer TiO2 

for crystal in ['TiO2_rutile']: #crystal_convert:
    for xc_func in ["06_HSE06-D4"]: #crystal_xc_func_ensemble[crystal_convert[crystal]]:
        # Make a directory for each crystal and xc_func
        Path(f'Data/02-Surface/{crystal_convert[crystal]}/{xc_func}').mkdir(parents=True, exist_ok=True)

        in_path = f'Data_original/DFT/TiO2_DFT/02-Surface/{crystal}/{xc_func}'
        out_path = f'Data/02-Surface/{crystal_convert[crystal]}/{xc_func}'
        shorten_outcar(f'{in_path}/OUTCAR', f'{out_path}/OUTCAR')   

In [2]:
# Transfer Calculation cost calculations

for mol_surface in ['CO2','H2O']:
    for structure in ['AD_SLAB', 'SLAB_FS','AD_FS']:
        in_path = f'Data_original/autoSKZCAM_Cost/VASP/{mol_surface}/{structure}'
        out_path = f'Data/Miscellaneous/autoSKZCAM_Cost/Periodic_DFT/{mol_surface}/{structure_convert[structure]}'
        Path(out_path).mkdir(parents=True, exist_ok=True)
        shorten_outcar(f'{in_path}/OUTCAR_PBE', f'{out_path}/OUTCAR_GGA')
        shorten_outcar(f'{in_path}/OUTCAR_DFT', f'{out_path}/OUTCAR_hybrid')

In [3]:
def shorten_lnocc(filepath_in, filepath_out):
    with open(f'{filepath_in}', 'r', encoding="ISO-8859-1") as f:
        lines = f.readlines()
    
    # Starting from the last line find line number of the last ionic step: aborting loop because EDIFF is reached
    counter = 0
    for idx_start in range(len(lines)):

        if 'ghost=serialno' in lines[idx_start]:
            break

    # Find the line number with " Dimension of arrays:"



    for idx_end in range(len(lines)-1, 0, -1):
        # if 'aborting loop because EDIFF is reached' in lines[idx]:
        if 'Final local CC results:' in lines[idx_end]:
            break  



    # with open(f'{filepath_in}/orca.bq', 'r', encoding="ISO-8859-1") as f:
    #     lines_bq = f.readlines()

    start_outcar = lines[:idx_start+10].copy()
    end_outcar = lines[idx_end-1:].copy()
    # Write the shortened OUTCAR
    with open(filepath_out, 'w') as f:
        f.writelines(start_outcar)
#         f.write("""
# ================================================================================
#                                     Point Charge FILE
# ================================================================================
# """)
#         f.writelines(lines_bq)
#         f.write("\n================================================================================\n")
        # f.writelines(middle_outcar)
        for dummy_idx in range(10):
            f.write("--------------------Output has been truncated-------------------------\n\n\n")
        f.writelines(end_outcar)

def shorten_orca(filepath_in, filepath_out):
    with open(f'{filepath_in}', 'r', encoding="ISO-8859-1") as f:
        lines = f.readlines()
    
    # Starting from the last line find line number of the last ionic step: aborting loop because EDIFF is reached
    for idx_start_start in range(len(lines)):
        if '                                       INPUT FILE' in lines[idx_start_start]:
            break
    
    for idx_start_end in range(len(lines)):
        if 'END OF INPUT' in lines[idx_start_end]:
            break

    # Find the line number with " Dimension of arrays:"
    for idx_middle_start in range(len(lines)):
        if 'TOTAL SCF ENERGY' in lines[idx_middle_start]:
            break

    for idx_middle_end in range(len(lines)):
        if 'Virial Ratio       :' in lines[idx_middle_end]:
            break



    for idx_end_start in range(len(lines)-1, 0, -1):
        # if 'aborting loop because EDIFF is reached' in lines[idx]:
        if 'RI-MP2 CORRELATION ENERGY:' in lines[idx_end_start]:
            break  

    for idx_end_end in range(len(lines)-1, 0, -1):
        # if 'aborting loop because EDIFF is reached' in lines[idx]:
        if 'FINAL SINGLE POINT ENERGY' in lines[idx_end_end]:
            break      

    # with open(f'{filepath_in}/orca.bq', 'r', encoding="ISO-8859-1") as f:
    #     lines_bq = f.readlines()

    start_outcar = lines[idx_start_start-1:idx_start_end+2].copy()
    middle_outcar = lines[idx_middle_start-1:idx_middle_end+2].copy()
    end_outcar = lines[idx_end_start-1:idx_end_start+2].copy() + lines[idx_end_end-7:idx_end_end+2].copy()
    # Write the shortened OUTCAR
    with open(filepath_out, 'w') as f:
        f.writelines(start_outcar)
#         f.write("""
# ================================================================================
#                                     Point Charge FILE
# ================================================================================
# """)
#         f.writelines(lines_bq)
#         f.write("\n================================================================================\n")
        f.writelines(middle_outcar)
        for dummy_idx in range(10):
            f.write("--------------------Output has been truncated-------------------------\n\n\n")
        f.writelines(end_outcar)
        f.writelines(lines[-1])

for mol_surface in ['CO2','H2O']:
    for cluster_num in range(1,8):
        for structure in ['adsorbate', 'adsorbate_slab','slab']:
            in_path = f'Data_original/autoSKZCAM_Cost/{mol_surface}/{cluster_num}/{structure}'
            out_path = f'Data/Miscellaneous/autoSKZCAM_Cost/autoSKZCAM/{mol_surface}/{cluster_num}/{structure_convert[structure]}'
            Path(out_path).mkdir(parents=True, exist_ok=True)
            for basis_set in ['def2-SVP','aVDZ','aVTZ','aVQZ','awCVDZ','awCVTZ','awCVQZ']:
                # Check if the file exists
                if os.path.exists(f'{in_path}/orca_{basis_set}.out'):
                    shorten_orca(f'{in_path}/orca_{basis_set}.out', f'{out_path}/orca_{basis_set}.out')

                if os.path.exists(f'{in_path}/mrcc_{basis_set}.out'):
                    shorten_lnocc(f'{in_path}/mrcc_{basis_set}.out', f'{out_path}/mrcc_{basis_set}.out')

        #     shorten_lnocc(in_path, f'{out_path}/mrcc.out')
        # in_path = f'Data_original/autoSKZCAM_Cost/VASP/{mol_surface}/{structure}'
        # out_path = f'Data/Miscellaneous/autoSKZCAM_Cost/Periodic_DFT/{mol_surface}/{structure_convert[structure]}'
        # Path(out_path).mkdir(parents=True, exist_ok=True)
        # shorten_outcar(f'{in_path}/OUTCAR_PBE', f'{out_path}/OUTCAR_GGA')
        # shorten_outcar(f'{in_path}/OUTCAR_DFT', f'{out_path}/OUTCAR_hybrid')

In [2]:
# Transfer DFT functionals Eint calculations



# Getting the 

dft_benchmark_functionals = ["01_PBE-D30", "02_PBE-MBDFI", "03_rev-vdW-DF2", "04_SCAN-rVV10", "05_R2SCAN-D4", "06_PBE0-TSHI",  "07_HSE06-D4"]

dft_xc_eint_dict = {x1: {z:50 for z in ['01_PBE-D30', '02_PBE-MBDFI', '03_rev-vdW-DF2', '04_R2SCAN-rVV10', '05_PBE0-TSHI','06_HSE06-D4','07_RPA','08_RPA-rSE']} for x1 in ['MgO CH4', 'MgO C2H6', 'MgO CO', 'MgO CO2_2', 'MgO CO2_3', 'MgO H2O','MgO N2O_1','MgO N2O_2', 'MgO NH3','TiO2_rutile CH4','TiO2_rutile CH3OH','TiO2_rutile CO2_1','TiO2_rutile CO2_2','TiO2_rutile H2O','TiO2_anatase H2O','TiO2_anatase NH3']}

mol_surface_to_mol = {
    'MgO CH4': 'CH4',
    'MgO C2H6': 'C2H6',
    'MgO CO': 'CO',
    'MgO CO2_2': 'CO2_Physisorbed',
    'MgO CO2_3': 'CO2_Chemisorbed',
    'MgO N2O_1': 'N2O_Parallel',
    'MgO N2O_2': 'N2O_Tilted',
    'MgO H2O': 'H2O_Monomer',
    'MgO NH3': 'NH3',
    'MgO CH4_ML': 'CH4_Monolayer',
    'MgO C2H6_ML': 'C2H6_Monolayer',
    'TiO2_rutile CH4': 'CH4',
    'TiO2_rutile CH3OH': 'CH3OH',
    'TiO2_rutile CO2_1': 'CO2_Parallel',
    'TiO2_rutile CO2_2': 'CO2_Tilted',
    'TiO2_rutile H2O': 'H2O',
    'TiO2_anatase H2O': 'H2O',
    'TiO2_anatase NH3': 'NH3'
}

for mol_surface in ['MgO CH4', 'MgO C2H6', 'MgO CO', 'MgO CO2_2', 'MgO CO2_3', 'MgO H2O','MgO N2O_1','MgO N2O_2', 'MgO NH3','TiO2_rutile CH4','TiO2_rutile CH3OH','TiO2_rutile CO2_1','TiO2_rutile CO2_2','TiO2_rutile H2O','TiO2_anatase H2O','TiO2_anatase NH3']:
    molecule = mol_surface.split()[-1]
    surface  = mol_surface.split()[0]
    for functional in dft_benchmark_functionals:

        for structure in ['AD_SLAB', 'SLAB_FS','AD_FS']:
            in_path = f'Data_original/DFT/DFT_Compare/{surface}/{molecule}/{functional}/{structure}'
            out_path = f'Data/Miscellaneous/DFT_Comparison/{crystal_convert[surface]}/{mol_surface_to_mol[mol_surface]}/{functional}/{structure_convert[structure]}'
            Path(out_path).mkdir(parents=True, exist_ok=True)

            if '06' in functional:
                shorten_outcar(f'{in_path}/OUTCAR_01', f'{out_path}/OUTCAR_GGA_tight')
                shorten_outcar(f'{in_path}/OUTCAR_02', f'{out_path}/OUTCAR_GGA_normal')
                shorten_outcar(f'{in_path}/OUTCAR_03', f'{out_path}/OUTCAR')

            elif '07' in functional:
                shorten_outcar(f'{in_path}/OUTCAR_01', f'{out_path}/OUTCAR_GGA_tight')
                shorten_outcar(f'{in_path}/OUTCAR_02', f'{out_path}/OUTCAR_GGA_normal')
                shorten_outcar(f'{in_path}/OUTCAR_03', f'{out_path}/OUTCAR')

            else:
                shorten_outcar(f'{in_path}/OUTCAR', f'{out_path}/OUTCAR')

            if '05' in functional:
                with open(f'{in_path}/D4_EDISP', 'r') as f:
                    lines = f.readlines()
                with open(f'{out_path}/OUTCAR', 'a') as f:
                    f.write('\n\n')
                    f.writelines(lines)

            if '07' in functional:
                with open(f'{in_path}/D4_EDISP', 'r') as f:
                    lines = f.readlines()
                with open(f'{out_path}/OUTCAR_GGA_tight', 'a') as f:
                    f.write('\n\n')
                    f.writelines(lines)
                with open(f'{out_path}/OUTCAR_GGA_normal', 'a') as f:
                    f.write('\n\n')
                    f.writelines(lines)
                with open(f'{out_path}/OUTCAR', 'a') as f:
                    f.write('\n\n')
                    f.writelines(lines)



    # Do RPA analysis if MgO

    if surface == 'MgO' and molecule in ['CH4','C2H6','CO','CO2_2','CO2_3','H2O','N2O_1','NH3']:
        for structure in ['AD_SLAB', 'SLAB_FS','AD_FS']:
            in_path = f'Data_original/DFT/04-Int_Ene/DFT_Compare/{surface}/{molecule}/07-RPA/{structure}'
            out_path = f'Data/Miscellaneous/DFT_Comparison/{crystal_convert[surface]}/{mol_surface_to_mol[mol_surface]}/08_RPA/{structure_convert[structure]}'
            Path(out_path).mkdir(parents=True, exist_ok=True)
            shutil.copy(f'{in_path}/OUT.EXX.run.encut.550.k.2',f'{out_path}/OUTCAR_EXX')


            in_path = f'Data_original/DFT/04-Int_Ene/DFT_Compare/{surface}/{molecule}/07-RPA/{structure}'
            out_path = f'Data/Miscellaneous/DFT_Comparison/{crystal_convert[surface]}/{mol_surface_to_mol[mol_surface]}/08_RPA/{structure_convert[structure]}'
            Path(out_path).mkdir(parents=True, exist_ok=True)
            shutil.copy(f'{in_path}/OUT.RPA.run.encut.550.k.2',f'{out_path}/OUTCAR_RPA')

            in_path = f'Data_original/DFT/04-Int_Ene/DFT_Compare/{surface}/{molecule}/07-RPA/{structure}'
            out_path = f'Data/Miscellaneous/DFT_Comparison/{crystal_convert[surface]}/{mol_surface_to_mol[mol_surface]}/09_RPA-rSE/{structure_convert[structure]}'
            Path(out_path).mkdir(parents=True, exist_ok=True)
            shutil.copy(f'{in_path}/OUT.RPA.run.encut.550.k.2',f'{out_path}/OUTCAR_RPA')

            in_path = f'Data_original/DFT/04-Int_Ene/DFT_Compare/{surface}/{molecule}/07-RPA/{structure}'
            out_path = f'Data/Miscellaneous/DFT_Comparison/{crystal_convert[surface]}/{mol_surface_to_mol[mol_surface]}/09_RPA-rSE/{structure_convert[structure]}'
            Path(out_path).mkdir(parents=True, exist_ok=True)
            shutil.copy(f'{in_path}/OUT.rSE.run.encut.550.k.2',f'{out_path}/OUTCAR_rSE')




In [None]:



systems_studied = {
    'MgO': ['CH4','C2H6','N2O_1','N2O_2','CO','CO2_2','CO2_3','H2O','NH3'],
    'TiO2_rutile': ['CH4','CH3OH','CO2_1','CO2_2','H2O'],
    'TiO2_anatase': ['H2O','NH3']
}

dft_benchmark_dict = {surface: {molecule: {dft_func: 0 for dft_func in dft_benchmark_functionals} for molecule in systems_studied[surface]}  for surface in systems_studied}

for surface in systems_studied:
    for molecule in systems_studied[surface]:
        molecule_label = molecule.replace(' ', '_')
        for dft_func in ["06_PBE0-TSHI",  "07_HSE06-D4"]:
            xc_func_eint = []
            for calc_num in range(1, 4):
                file_path = f'Data_original/DFT/DFT_Compare/{surface}/{molecule_label}/{dft_func}'
                xc_func_eint += [calculate_eint(file_path, code='vasp', structure_labels=["AD_SLAB", "SLAB_FS", "AD_FS"], vasp_outcar_label=f"OUTCAR_{calc_num:02d}")*1000]
                # dft_benchmark_dict[surface][molecule][dft_func] = calculate_eint(file_path, code='vasp', structure_labels=["AD_SLAB", "SLAB_FS", "AD_FS"], vasp_outcar_label=f"OUTCAR_{calc_num:02d}")
            print(surface, molecule, dft_func, xc_func_eint)

In [10]:
def shorten_fnocc(filepath_in, filepath_out):
    with open(f'{filepath_in}/mrcc.out', 'r', encoding="ISO-8859-1") as f:
        lines = f.readlines()
    
    # Starting from the last line find line number of the last ionic step: aborting loop because EDIFF is reached
    counter = 0
    for idx_start in range(len(lines)):

        if 'ghost=serialno' in lines[idx_start]:
            break

    # Find the line number with " Dimension of arrays:"



    for idx_end in range(len(lines)-1, 0, -1):
        # if 'aborting loop because EDIFF is reached' in lines[idx]:
        if 'Results in full basis:' in lines[idx_end]:
            break  



    # with open(f'{filepath_in}/orca.bq', 'r', encoding="ISO-8859-1") as f:
    #     lines_bq = f.readlines()

    start_outcar = lines[:idx_start].copy()
    end_outcar = lines[idx_end+2:].copy()
    # Write the shortened OUTCAR
    with open(filepath_out, 'w') as f:
        f.writelines(start_outcar)
#         f.write("""
# ================================================================================
#                                     Point Charge FILE
# ================================================================================
# """)
#         f.writelines(lines_bq)
#         f.write("\n================================================================================\n")
        # f.writelines(middle_outcar)
        for dummy_idx in range(10):
            f.write("--------------------Output has been truncated-------------------------\n\n\n")
        f.writelines(end_outcar)

def shorten_revpbe(filepath_in, filepath_out):
    with open(f'{filepath_in}/orca.out', 'r', encoding="ISO-8859-1") as f:
        lines = f.readlines()
    
    # Starting from the last line find line number of the last ionic step: aborting loop because EDIFF is reached
    for idx_start_start in range(len(lines)):
        if '                                       INPUT FILE' in lines[idx_start_start]:
            break
    
    for idx_start_end in range(len(lines)):
        if 'END OF INPUT' in lines[idx_start_end]:
            break

    # Find the line number with " Dimension of arrays:"
    for idx_middle_start in range(len(lines)):
        if 'TOTAL SCF ENERGY' in lines[idx_middle_start]:
            break

    for idx_middle_end in range(len(lines)):
        if 'Virial Ratio       :' in lines[idx_middle_end]:
            break



    for idx_end_start in range(len(lines)-1, 0, -1):
        # if 'aborting loop because EDIFF is reached' in lines[idx]:
        if 'Dispersion correction' in lines[idx_end_start]:
            break  

    for idx_end_end in range(len(lines)-1, 0, -1):
        # if 'aborting loop because EDIFF is reached' in lines[idx]:
        if 'FINAL SINGLE POINT ENERGY' in lines[idx_end_end]:
            break      

    # with open(f'{filepath_in}/orca.bq', 'r', encoding="ISO-8859-1") as f:
    #     lines_bq = f.readlines()

    start_outcar = lines[idx_start_start-1:idx_start_end+2].copy()
    middle_outcar = lines[idx_middle_start-1:idx_middle_end+2].copy()
    end_outcar = lines[idx_end_start-1:idx_end_end+2].copy()
    # Write the shortened OUTCAR
    with open(filepath_out, 'w') as f:
        f.writelines(start_outcar)
#         f.write("""
# ================================================================================
#                                     Point Charge FILE
# ================================================================================
# """)
#         f.writelines(lines_bq)
#         f.write("\n================================================================================\n")
        f.writelines(middle_outcar)
        for dummy_idx in range(10):
            f.write("--------------------Output has been truncated-------------------------\n\n\n")
        f.writelines(end_outcar)

for molecule in ['CH4','C2H6']:
    if molecule == 'CH4':
        max_num = 68
    elif molecule == 'C2H6':
        max_num = 72

    for twob_term in range(1,max_num+1):
        structure_conv_dict = {
            0: 'Dimer',
            1: 'Monomer_A',
            2: 'Monomer_B'
        }

        for structure_idx in [0,1,2]:
            Path(f'Data/08-Ecoh/MgO/{molecule}_Monolayer/02_revPBE-D4_2B/02_revPBE-D4/{twob_term:02d}/{structure_conv_dict[structure_idx]}').mkdir(parents=True, exist_ok=True)
            Path(f'Data/08-Ecoh/MgO/{molecule}_Monolayer/07_CCSDT_2B/02_revPBE-D4/{twob_term:02d}/{structure_conv_dict[structure_idx]}').mkdir(parents=True, exist_ok=True)
            in_path = f'Data_original/cWFT/CCSDT/MgO/{molecule}_ML/MBE_Corr/{twob_term}_revPBE-D4/{twob_term}/{structure_idx}'

            out_path = f'Data/08-Ecoh/MgO/{molecule}_Monolayer/02_revPBE-D4_2B/02_revPBE-D4/{twob_term:02d}/{structure_conv_dict[structure_idx]}/revPBE-D4_QZVPD.orca.out'

            shorten_revpbe(in_path, out_path)

            in_path = f'Data_original/cWFT/CCSDT/MgO/{molecule}_ML/MBE_Corr/{twob_term}_FNOCC/{twob_term}/TZ/{structure_idx}'

            out_path = f'Data/08-Ecoh/MgO/{molecule}_Monolayer/07_CCSDT_2B/02_revPBE-D4/{twob_term:02d}/{structure_conv_dict[structure_idx]}/FNOCCSDT_aVTZ.mrcc.out'

            shorten_fnocc(in_path, out_path)

            in_path = f'Data_original/cWFT/CCSDT/MgO/{molecule}_ML/MBE_Corr/{twob_term}_FNOCC/{twob_term}/QZ/{structure_idx}'

            out_path = f'Data/08-Ecoh/MgO/{molecule}_Monolayer/07_CCSDT_2B/02_revPBE-D4/{twob_term:02d}/{structure_conv_dict[structure_idx]}/FNOCCSDT_aVQZ.mrcc.out'

            shorten_fnocc(in_path, out_path)


In [11]:
for molecule in ['H2O','CH3OH']:
    structure_conv_dict = {
        1: 'Cluster_Monomer_1',
        2: 'Cluster_Monomer_2',
        3: 'Cluster_Monomer_3',
        4: 'Cluster_Monomer_4',
        5: 'Cluster'
    }

    for structure_idx in [1,2,3,4,5]:
        Path(f'Data/08-Ecoh/MgO/{molecule}_Tetramer/07_CCSDT/02_revPBE-D4/{structure_conv_dict[structure_idx]}').mkdir(parents=True, exist_ok=True)
        in_path = f'Data_original/cWFT/CCSDT/MgO/{molecule}_4/Cluster_Corr/{structure_idx}_FNOCC/TZ/'

        out_path = f'Data/08-Ecoh/MgO/{molecule}_Tetramer/07_CCSDT/02_revPBE-D4/{structure_conv_dict[structure_idx]}/FNOCCSDT_aVTZ.mrcc.out'

        shorten_fnocc(in_path, out_path)

        in_path = f'Data_original/cWFT/CCSDT/MgO/{molecule}_4/Cluster_Corr/{structure_idx}_FNOCC/QZ'

        out_path = f'Data/08-Ecoh/MgO/{molecule}_Tetramer/07_CCSDT/02_revPBE-D4/{structure_conv_dict[structure_idx]}/FNOCCSDT_aVQZ.mrcc.out'

        shorten_fnocc(in_path, out_path)

In [26]:
def shorten_dfcc(filepath_in, filepath_out):
    with open(f'{filepath_in}/mrcc.out', 'r', encoding="ISO-8859-1") as f:
        lines = f.readlines()
    
    # Starting from the last line find line number of the last ionic step: aborting loop because EDIFF is reached
    counter = 0
    for idx_start in range(len(lines)):

        if 'ghost=serialno' in lines[idx_start]:
            break
        
        if 'qmmm=Amber' in lines[idx_start]:
            break
    # Find the line number with " Dimension of arrays:"



    for idx_end in range(len(lines)-1, 0, -1):
        # if 'aborting loop because EDIFF is reached' in lines[idx]:
        if ' Executing ccsd...' in lines[idx_end]:
            break  



    # with open(f'{filepath_in}/orca.bq', 'r', encoding="ISO-8859-1") as f:
    #     lines_bq = f.readlines()

    start_outcar = lines[:idx_start+10].copy()
    end_outcar = lines[idx_end-1:].copy()
    # Write the shortened OUTCAR
    with open(filepath_out, 'w') as f:
        f.writelines(start_outcar)
#         f.write("""
# ================================================================================
#                                     Point Charge FILE
# ================================================================================
# """)
#         f.writelines(lines_bq)
#         f.write("\n================================================================================\n")
        # f.writelines(middle_outcar)
        for dummy_idx in range(10):
            f.write("--------------------Output has been truncated-------------------------\n\n\n")
        f.writelines(end_outcar)

molecule_conv_dict = {
    'CO2_3': 'CO2 Chemisorbed',
    'NO_6': 'NO Dimer',
    '_B3LYP': '06_B3LYP-D2-Ne',
    '': '02_revPBE-D4'
}
for molecule in ['CO2_3']:
    molecule_label = molecule_conv_dict[molecule].replace(' ','_')
    for xc_func in ['','_B3LYP']:
        xc_func_label = molecule_conv_dict[xc_func]
        for basis in ['TZ','QZ']:
            for structure in ['RS', 'FS']:
                structure_label = 'Molecule-Surface' if structure == 'FS' else 'Molecule'
                in_path = f'Data_original/cWFT/CCSDT/MgO/{molecule}{xc_func}/Conform_Corr/{structure}/{basis}' if molecule == 'CO2_3' else f'Data_original/cWFT/CCSDT/MgO/{molecule}{xc_func}/Conform_Corr/{basis}/{structure}'
                Path(f'Data/07-Econf/MgO/{molecule_label}/07_CCSDT/{xc_func_label}/{structure_label}').mkdir(parents=True, exist_ok=True)
                out_path = f'Data/07-Econf/MgO/{molecule_label}/07_CCSDT/{xc_func_label}/{structure_label}/CCSDT_aV{basis}.mrcc.out'
                shorten_dfcc(in_path, out_path)




In [29]:
crystal_xc_func_ensemble = {
    'MgO': ["01_PBE-D2-Ne", "02_revPBE-D4", "03_vdW-DF", "04_rev-vdW-DF2", "05_PBE0-D4", "06_B3LYP-D2-Ne"],
    'r-TiO2': ["01_PBE-U-TSHI", "02_revPBE-D4", "03_vdW-DF", "04_rev-vdW-DF2", "05_R2SCAN-rVV10", "06_HSE06-D4"],
    'a-TiO2': ["01_PBE-U-TSHI", "02_revPBE-D4", "03_vdW-DF", "04_rev-vdW-DF2", "05_R2SCAN-rVV10", "06_HSE06-D4"]
}

crystal_convert = {
    'MgO': 'MgO',
    'TiO2_rutile': 'r-TiO2',
    'TiO2_anatase': 'a-TiO2'
}


In [5]:

for crystal in crystal_convert:
    for xc_func in crystal_xc_func_ensemble[crystal_convert[crystal]]:
        # Make a directory for each crystal and xc_func
        Path(f'Data/01-Unit_Cell/{crystal_convert[crystal]}/{xc_func}').mkdir(parents=True, exist_ok=True)
        shorten_outcar(f'Data_original/DFT/00-UC/{crystal}/{xc_func}/OUTCAR', f'Data/01-Unit_Cell/{crystal_convert[crystal]}/{xc_func}/OUTCAR')   


In [6]:
for crystal in crystal_convert:
    for xc_func in crystal_xc_func_ensemble[crystal_convert[crystal]]:
        # Make a directory for each crystal and xc_func
        Path(f'Data/02-Surface/{crystal_convert[crystal]}/{xc_func}').mkdir(parents=True, exist_ok=True)

        in_path = f'Data_original/DFT/02-Surf/{crystal}/{xc_func}'
        if crystal == 'MgO':
            in_path = f'Data_original/DFT/02-Surf/{crystal}/{xc_func}/{xc_func}'
        out_path = f'Data/02-Surface/{crystal_convert[crystal]}/{xc_func}'
        shorten_outcar(f'{in_path}/OUTCAR', f'{out_path}/OUTCAR')   

In [7]:
for crystal in crystal_convert:
    for xc_func in crystal_xc_func_ensemble[crystal_convert[crystal]]:
        # Make a directory for each crystal and xc_func
        Path(f'Data/02-Surface/{crystal_convert[crystal]}/{xc_func}').mkdir(parents=True, exist_ok=True)

        in_path = f'Data_original/DFT/02-Surf/{crystal}/{xc_func}'
        if crystal == 'MgO':
            in_path = f'Data_original/DFT/02-Surf/{crystal}/{xc_func}/{xc_func}'
        out_path = f'Data/02-Surface/{crystal_convert[crystal]}/{xc_func}'
        shorten_outcar(f'{in_path}/OUTCAR', f'{out_path}/OUTCAR')

        if crystal == 'MgO':
            Path(f'Data/02-Surface/{crystal_convert[crystal]}_8x8/{xc_func}').mkdir(parents=True, exist_ok=True)

            in_path =  f'Data_original/DFT/02-Surf/{crystal}/SLAB_LARGE_PV/SP_pv/{xc_func}'
            out_path = f'Data/02-Surface/{crystal_convert[crystal]}_8x8/{xc_func}'
            shorten_outcar(f'{in_path}/OUTCAR', f'{out_path}/OUTCAR')

In [10]:
molecule_systems = {
    'MgO': ['CH4', 'C2H6', 'C6H6', 'CO', 'CO2', 'CH3OH', 'N2O', 'H2O', 'NH3', 'NO', 'NO_Dimer','CH3OH_Tetramer','H2O_Tetramer'],
    'r-TiO2': ['CO2','CH4','H2O','CH3OH'],
    'a-TiO2': ['H2O','NH3']
}

for crystal in crystal_convert:
    for molecule in molecule_systems[crystal_convert[crystal]]:
        for xc_func in crystal_xc_func_ensemble[crystal_convert[crystal]]:
            # Make a directory for each crystal and xc_func
            Path(f'Data/03-Molecule/{crystal_convert[crystal]}/{molecule}/{xc_func}').mkdir(parents=True, exist_ok=True)

            in_path = f'Data_original/DFT/01-Mol/{crystal}/{molecule}/{xc_func}'
            if 'TiO2' in crystal:
                in_path = f'Data_original/DFT/01-Mol/TiO2/{molecule}/{xc_func}'

            if molecule == 'C6H6':
                in_path = f'Data_original/DFT/01-Mol/{crystal}/{molecule}_4/{xc_func}'
            if '_Tetramer' in molecule:
                in_path = f'Data_original/DFT/01-Mol/{crystal}/{molecule.split("_")[0]}_4/{xc_func}'
            if 'NO_Dimer' in molecule:
                in_path = f'Data_original/DFT/01-Mol/{crystal}/N2O2/{xc_func}'

            out_path = f'Data/03-Molecule/{crystal_convert[crystal]}/{molecule}/{xc_func}'

            shorten_outcar(f'{in_path}/OUTCAR', f'{out_path}/OUTCAR') 

In [15]:
# Transferring the conformational correction calculations
for xc_func1 in crystal_xc_func_ensemble[crystal_convert['MgO']]:
    for xc_func2 in crystal_xc_func_ensemble[crystal_convert['MgO']]:
        in_path = f'Data_original/DFT/04-Int_Ene/CO2_3_Conformational_correction/CO2_3/{xc_func1}/{xc_func2}/AD_FS'
        out_path = f'Data/07-Econf/MgO/CO2_Chemisorbed/{xc_func1}/{xc_func2}/Molecule-Surface'
        
        Path(out_path).mkdir(parents=True, exist_ok=True)
        shorten_outcar(f'{in_path}/OUTCAR', f'{out_path}/OUTCAR')
        in_path = f'Data_original/DFT/04-Int_Ene/CO2_3_Conformational_correction/CO2_3/{xc_func1}/{xc_func2}/AD_RS'
        out_path = f'Data/07-Econf/MgO/CO2_Chemisorbed/{xc_func1}/{xc_func2}/Molecule'
        
        Path(out_path).mkdir(parents=True, exist_ok=True)
        shorten_outcar(f'{in_path}/OUTCAR', f'{out_path}/OUTCAR')

In [7]:
# Transferring the NO Dimer calculation
for xc_func1 in crystal_xc_func_ensemble[crystal_convert['MgO']]:
    in_path = f'Data_original/DFT/01-Mol/MgO/N2O2/{xc_func1}'
    out_path = f'Data/03-Molecule/MgO/NO_Dimer/{xc_func1}'
    
    Path(out_path).mkdir(parents=True, exist_ok=True)
    shorten_outcar(f'{in_path}/OUTCAR', f'{out_path}/OUTCAR')

In [17]:
# Transferring the conformational correction calculations
for xc_func1 in crystal_xc_func_ensemble[crystal_convert['MgO']]:
    for xc_func2 in crystal_xc_func_ensemble[crystal_convert['MgO']]:
        in_path = f'Data_original/DFT/04-Int_Ene/NO_6_Conformational_correction/{xc_func1}/{xc_func2}/AD_FS'
        out_path = f'Data/07-Econf/MgO/NO_Dimer/{xc_func1}/{xc_func2}/Molecule-Surface'
        
        Path(out_path).mkdir(parents=True, exist_ok=True)
        shorten_outcar(f'{in_path}/OUTCAR', f'{out_path}/OUTCAR')
        in_path = f'Data_original/DFT/04-Int_Ene/NO_6_Conformational_correction/{xc_func1}/{xc_func2}/AD_RS'
        out_path = f'Data/07-Econf/MgO/NO_Dimer/{xc_func1}/{xc_func2}/Molecule'
        
        Path(out_path).mkdir(parents=True, exist_ok=True)
        shorten_outcar(f'{in_path}/OUTCAR', f'{out_path}/OUTCAR')

In [7]:
# Transferring the cluster cohesive correction calculations
for molecule in ['CH3OH_4','H2O_4']:
    for xc_func1 in crystal_xc_func_ensemble[crystal_convert['MgO']]:
        for xc_func2 in crystal_xc_func_ensemble[crystal_convert['MgO']]:
            in_path = f'Data_original/DFT/04-Int_Ene/MgO/{molecule}/{xc_func1}/{xc_func2}/AD_FS'
            out_path = f'Data/08-Ecoh/MgO/{molecule.split("_")[0]+"_Tetramer"}/{xc_func1}/{xc_func2}/Cluster'
            
            Path(out_path).mkdir(parents=True, exist_ok=True)
            shorten_outcar(f'{in_path}/OUTCAR', f'{out_path}/OUTCAR')

            for monomer_idx in ['1','2','3','4']:
                in_path = f'Data_original/DFT/04-Int_Ene/MgO/{molecule}/{xc_func1}/{xc_func2}/AD_FS_{monomer_idx}'
                out_path = f'Data/08-Ecoh/MgO/{molecule.split("_")[0]+"_Tetramer"}/{xc_func1}/{xc_func2}/Cluster-Monomer-{monomer_idx}'
                
                Path(out_path).mkdir(parents=True, exist_ok=True)
                shorten_outcar(f'{in_path}/OUTCAR', f'{out_path}/OUTCAR')

In [8]:
# Transferring the monlayer cohesive correction calculations
for molecule in ['CH4','C2H6']:
    for xc_func1 in crystal_xc_func_ensemble[crystal_convert['MgO']]:
        print(xc_func1)
        if xc_func1 in ['05_PBE0-D4','06_B3LYP-D2-Ne']:
            continue
        for xc_func2 in crystal_xc_func_ensemble[crystal_convert['MgO']]:
            in_path = f'Data_original/DFT/07-Ecoh/ML_ALL/{molecule}/{xc_func1}/{xc_func2}/ALL_AD_FS'
            out_path = f'Data/08-Ecoh/MgO/{molecule.split("_")[0]+"_Monolayer"}/{xc_func1}/{xc_func2}/Monolayer'
            
            Path(out_path).mkdir(parents=True, exist_ok=True)
            shorten_outcar(f'{in_path}/OUTCAR', f'{out_path}/OUTCAR')

            in_path = f'Data_original/DFT/07-Ecoh/ML_ALL/{molecule}/{xc_func1}/{xc_func2}/ALL_AD_SLAB'
            out_path = f'Data/08-Ecoh/MgO/{molecule.split("_")[0]+"_Monolayer"}/{xc_func1}/{xc_func2}/Monolayer-Surface'
            
            Path(out_path).mkdir(parents=True, exist_ok=True)
            shorten_outcar(f'{in_path}/OUTCAR', f'{out_path}/OUTCAR')

            in_path = f'Data_original/DFT/07-Ecoh/ML_ALL/{molecule}/{xc_func1}/{xc_func2}/ALL_SLAB_FS'
            out_path = f'Data/08-Ecoh/MgO/{molecule.split("_")[0]+"_Monolayer"}/{xc_func1}/{xc_func2}/Surface'
            
            Path(out_path).mkdir(parents=True, exist_ok=True)
            shorten_outcar(f'{in_path}/OUTCAR', f'{out_path}/OUTCAR')

            for monomer_idx in ['1','2','3','4']:
                in_path = f'Data_original/DFT/07-Ecoh/{molecule}/{xc_func1}/{xc_func2}/{monomer_idx}_AD_FS'
                out_path = f'Data/08-Ecoh/MgO/{molecule.split("_")[0]+"_Monolayer"}/{xc_func1}/{xc_func2}/Monolayer-Monomer-{monomer_idx}'
                
                Path(out_path).mkdir(parents=True, exist_ok=True)
                shorten_outcar(f'{in_path}/OUTCAR', f'{out_path}/OUTCAR')

01_PBE-D2-Ne
02_revPBE-D4
03_vdW-DF
04_rev-vdW-DF2
05_PBE0-D4
06_B3LYP-D2-Ne
01_PBE-D2-Ne
02_revPBE-D4
03_vdW-DF
04_rev-vdW-DF2
05_PBE0-D4
06_B3LYP-D2-Ne


In [12]:
# Transferring the monlayer cohesive correction calculations
for molecule in ['NO_6']:
    for xc_func1 in crystal_xc_func_ensemble[crystal_convert['MgO']]:
        for xc_func2 in crystal_xc_func_ensemble[crystal_convert['MgO']]:
            in_path = f'Data_original/DFT/07-Ecoh/{molecule}/{xc_func1}/{xc_func2}/AD_RS'
            out_path = f'Data/08-Ecoh/MgO/{molecule.split("_")[0]+"_Dimer"}/{xc_func1}/{xc_func2}/Dimer'
            
            Path(out_path).mkdir(parents=True, exist_ok=True)
            shorten_outcar(f'{in_path}/OUTCAR', f'{out_path}/OUTCAR')

            in_path = f'Data_original/DFT/07-Ecoh/{molecule}/AD_RS_Monomer/{xc_func2}'
            out_path = f'Data/08-Ecoh/MgO/{molecule.split("_")[0]+"_Dimer"}/{xc_func1}/{xc_func2}/Monomer'
            
            Path(out_path).mkdir(parents=True, exist_ok=True)
            shorten_outcar(f'{in_path}/OUTCAR_{xc_func1}', f'{out_path}/OUTCAR')


            # for monomer_idx in ['1','2','3','4']:
            #     in_path = f'Data_original/DFT/07-Ecoh/{molecule}/{xc_func1}/{xc_func2}/{monomer_idx}_AD_FS'
            #     out_path = f'Data/08-Ecoh/MgO/{molecule.split("_")[0]+"_Monolayer"}/{xc_func1}/{xc_func2}/Monolayer_Monomer_{monomer_idx}'
                
            #     Path(out_path).mkdir(parents=True, exist_ok=True)
            #     shorten_outcar(f'{in_path}/OUTCAR', f'{out_path}/OUTCAR')

In [22]:
# # Transferring the monlayer cohesive correction calculations
# for molecule in ['CH4_ML','C2H6_ML']:
#     for xc_func1 in crystal_xc_func_ensemble[crystal_convert['MgO']]:
#         for xc_func2 in crystal_xc_func_ensemble[crystal_convert['MgO']]:
#             in_path = f'Data_original/DFT/04-Int_Ene/MgO/{molecule}/{xc_func1}/{xc_func2}/ALL_AD_FS'
#             out_path = f'Data/08-Ecoh/MgO/{molecule.split("_")[0]+"_Monolayer"}/{xc_func1}/{xc_func2}/Monolayer'
            
#             Path(out_path).mkdir(parents=True, exist_ok=True)
#             shorten_outcar(f'{in_path}/OUTCAR', f'{out_path}/OUTCAR')

#             for monomer_idx in ['1','2','3','4']:
#                 in_path = f'Data_original/DFT/04-Int_Ene/MgO/{molecule}/{xc_func1}/{xc_func2}/{monomer_idx}_AD_FS'
#                 out_path = f'Data/08-Ecoh/MgO/{molecule.split("_")[0]+"_Monolayer"}/{xc_func1}/{xc_func2}/Monolayer_Monomer_{monomer_idx}'
                
#                 Path(out_path).mkdir(parents=True, exist_ok=True)
#                 shorten_outcar(f'{in_path}/OUTCAR', f'{out_path}/OUTCAR')

In [64]:
adsorbate_config = {
    'NO Vertical-Hollow': 'NO_1',
    'NO Vertical-Mg': 'NO_2',
    'NO Bent-Bridge': 'NO_3',
    'NO Bent-Mg': 'NO_4',
    'NO Bent-O': 'NO_5',
    'NO Dimer': 'NO_6',
    'CO2 Physisorbed': 'CO2_2',
    'CO2 Chemisorbed': 'CO2_3',
    'CH3OH Tilted': 'CH3OH_1',
    'CH3OH Parallel': 'CH3OH_2',
    'CH3OH Tetramer': 'CH3OH_4',
    'H2O Monomer': 'H2O',
    'H2O Tetramer': 'H2O_4',
    'N2O Parallel': 'N2O_1',
    'N2O Tilted': 'N2O_2',
    'CO2 Parallel': 'CO2_1',
    'CO2 Tilted': 'CO2_2',
    'CH4 Monolayer': 'CH4_ML',
    'C2H6 Monolayer': 'C2H6_ML',
}

molecule_surface_systems = {
    'MgO': ['CH4', 'CH4 Monolayer', 'C2H6', 'C2H6 Monolayer', 'CO', 'C6H6', 'N2O Parallel', 'N2O Tilted', 'NO Vertical-Hollow', 'NO Vertical-Mg', 'NO Bent-Bridge', 'NO Bent-Mg', 'NO Bent-O', 'NO Dimer', 'H2O Monomer', 'H2O Tetramer', 'CH3OH Tilted', 'CH3OH Parallel', 'CH3OH Tetramer', 'NH3', 'CO2 Physisorbed', 'CO2 Chemisorbed'],
    'r-TiO2': ['CH4','CO2 Parallel','CO2 Tilted','H2O','CH3OH'],
    'a-TiO2': ['H2O','NH3']
}

for crystal in crystal_convert:
    for molecule in molecule_surface_systems[crystal_convert[crystal]]:
        for xc_func in crystal_xc_func_ensemble[crystal_convert[crystal]]:
            # Make a directory for each crystal and xc_func

            if molecule in adsorbate_config:
                in_path = f'Data_original/DFT/03-Mol_Surf/{crystal}/{adsorbate_config[molecule]}/{xc_func}'
            else:
                in_path = f'Data_original/DFT/03-Mol_Surf/{crystal}/{molecule}/{xc_func}'
            # if 'TiO2' in crystal:
            #     in_path = f'Data_original/DFT/03-Ads/TiO2/{molecule}/{xc_func}'
            molecule_label = molecule.replace(' ', '_')
            out_path = f'Data/04-Molecule_Surface/{crystal_convert[crystal]}/{molecule_label}/{xc_func}'
            Path(out_path).mkdir(parents=True, exist_ok=True)
            shorten_outcar(f'{in_path}/OUTCAR', f'{out_path}/OUTCAR')

In [75]:
structure_convert = {
    'AD_SLAB': 'Molecule-Surface',
    'SLAB_FS': 'Surface',
    'AD_FS': 'Molecule',
}

for crystal in crystal_convert:
    for molecule in molecule_surface_systems[crystal_convert[crystal]]:
        for xc_func1 in crystal_xc_func_ensemble[crystal_convert[crystal]]:
            for xc_func2 in crystal_xc_func_ensemble[crystal_convert[crystal]]:                

                for structure in ['AD_SLAB', 'SLAB_FS','AD_FS']:

                    if molecule in adsorbate_config:
                        if 'Monolayer' in molecule:
                            if structure == 'SLAB_FS':
                                in_path = f'Data_original/DFT/04-Int_Ene/{crystal}/{adsorbate_config[molecule]}/{xc_func1}/{xc_func2}/ALL_{structure}'
                            else:
                                in_path = f'Data_original/DFT/04-Int_Ene/{crystal}/{adsorbate_config[molecule]}/{xc_func1}/{xc_func2}/1_{structure}'
                        else:
                            in_path = f'Data_original/DFT/04-Int_Ene/{crystal}/{adsorbate_config[molecule]}/{xc_func1}/{xc_func2}/{structure}'
                    else:
                        in_path = f'Data_original/DFT/04-Int_Ene/{crystal}/{molecule}/{xc_func1}/{xc_func2}/{structure}'

                    molecule_label = molecule.replace(' ', '_')
                    out_path = f'Data/05a-Eint_and_Erlx_DFT/{crystal_convert[crystal]}/{molecule_label}/{xc_func1}/{xc_func2}/{structure_convert[structure]}'
                    Path(out_path).mkdir(parents=True, exist_ok=True)
                    shorten_outcar(f'{in_path}/OUTCAR', f'{out_path}/OUTCAR')

In [6]:
# Transferring MP2 data

def shorten_orca(filepath_in, filepath_out):
    with open(f'{filepath_in}/orca.out', 'r', encoding="ISO-8859-1") as f:
        lines = f.readlines()
    
    # Starting from the last line find line number of the last ionic step: aborting loop because EDIFF is reached
    for idx_start_start in range(len(lines)):
        if '                                       INPUT FILE' in lines[idx_start_start]:
            break
    
    for idx_start_end in range(len(lines)):
        if 'END OF INPUT' in lines[idx_start_end]:
            break

    # Find the line number with " Dimension of arrays:"
    for idx_middle_start in range(len(lines)):
        if 'TOTAL SCF ENERGY' in lines[idx_middle_start]:
            break

    for idx_middle_end in range(len(lines)):
        if 'Virial Ratio       :' in lines[idx_middle_end]:
            break



    for idx_end_start in range(len(lines)-1, 0, -1):
        # if 'aborting loop because EDIFF is reached' in lines[idx]:
        if 'RI-MP2 CORRELATION ENERGY:' in lines[idx_end_start]:
            break  

    for idx_end_end in range(len(lines)-1, 0, -1):
        # if 'aborting loop because EDIFF is reached' in lines[idx]:
        if 'FINAL SINGLE POINT ENERGY' in lines[idx_end_end]:
            break      

    # with open(f'{filepath_in}/orca.bq', 'r', encoding="ISO-8859-1") as f:
    #     lines_bq = f.readlines()

    start_outcar = lines[idx_start_start-1:idx_start_end+2].copy()
    middle_outcar = lines[idx_middle_start-1:idx_middle_end+2].copy()
    end_outcar = lines[idx_end_start-1:idx_end_start+2].copy() + lines[idx_end_end-7:idx_end_end+2].copy()
    # Write the shortened OUTCAR
    with open(filepath_out, 'w') as f:
        f.writelines(start_outcar)
#         f.write("""
# ================================================================================
#                                     Point Charge FILE
# ================================================================================
# """)
#         f.writelines(lines_bq)
#         f.write("\n================================================================================\n")
        f.writelines(middle_outcar)
        for dummy_idx in range(10):
            f.write("--------------------Output has been truncated-------------------------\n\n\n")
        f.writelines(end_outcar)
        f.writelines(lines[-1])

structure_convert = {
    'AD_SLAB': 'Molecule-Surface',
    'SLAB_CP': 'Surface',
    'AD_CP': 'Molecule'
}

crystal_skzcam_cluster_size = {
    'MgO': [7,5,3],
    'r-TiO2': [9,7,4],
    'a-TiO2': [9,7,4]
}

adsorbate_config = {
    'NO Vertical-Hollow': 'NO_1',
    'NO Vertical-Mg': 'NO_2',
    'NO Bent-Bridge': 'NO_3',
    'NO Bent-Mg': 'NO_4',
    'NO Bent-O': 'NO_5',
    'NO Dimer': 'NO_6',
    'CO2 Physisorbed': 'CO2_2',
    'CO2 Chemisorbed': 'CO2_3',
    'CH3OH Tilted': 'CH3OH_1',
    'CH3OH Parallel': 'CH3OH_2',
    'CH3OH Tetramer': 'CH3OH_4',
    'H2O Monomer': 'H2O',
    'H2O Tetramer': 'H2O_4',
    'N2O Parallel': 'N2O_1',
    'N2O Tilted': 'N2O_2',
    'CO2 Parallel': 'CO2_1',
    'CO2 Tilted': 'CO2_2',
    'CH4 Monolayer': 'CH4_ML',
    'C2H6 Monolayer': 'C2H6_ML',
}

molecule_surface_systems = {
    'MgO': ['CH4', 'CH4 Monolayer', 'C2H6', 'C2H6 Monolayer', 'CO', 'C6H6', 'N2O Parallel', 'N2O Tilted', 'NO Vertical-Hollow', 'NO Vertical-Mg', 'NO Bent-Bridge', 'NO Bent-Mg', 'NO Bent-O', 'NO Dimer', 'H2O Monomer', 'H2O Tetramer', 'CH3OH Tilted', 'CH3OH Parallel', 'CH3OH Tetramer', 'NH3', 'CO2 Physisorbed', 'CO2 Chemisorbed'],
    'r-TiO2': ['CH4','CO2 Parallel','CO2 Tilted','H2O','CH3OH'],
    'a-TiO2': ['H2O','NH3']
}

for crystal in crystal_convert:
    for molecule in molecule_surface_systems[crystal_convert[crystal]]:
        for skzcam_cluster_size in range(1, crystal_skzcam_cluster_size[crystal_convert[crystal]][0]+1):
            for structure in ['AD_SLAB', 'SLAB_CP','AD_CP']:
                if molecule in adsorbate_config:
                    molecule_label_in = adsorbate_config[molecule]
                else:
                    molecule_label_in = molecule
                molecule_label_out = molecule.replace(' ', '_')

                if 'NO' in molecule and molecule != 'NO Dimer':
                    if crystal == 'MgO':
                        in_path = f'Data_original/cWFT/MP2/{crystal}/{molecule_label_in}/{skzcam_cluster_size}/DZ/{structure}'
                        out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{skzcam_cluster_size}/{structure_convert[structure]}'                    
                        Path(out_path).mkdir(parents=True, exist_ok=True)
                        shorten_orca(f'{in_path}', f'{out_path}/MP2_aVDZ.orca.out')
                        shutil.copy(f'{in_path}/orca.bq',f'{out_path}/orca.pointcharges')
                    if crystal == 'MgO' and skzcam_cluster_size < crystal_skzcam_cluster_size[crystal_convert[crystal]][1]+1:
                        in_path = f'Data_original/cWFT/MP2/{crystal}/{molecule_label_in}/{skzcam_cluster_size}/TZ/{structure}'
                        out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{skzcam_cluster_size}/{structure_convert[structure]}'                    
                        Path(out_path).mkdir(parents=True, exist_ok=True)
                        shorten_orca(f'{in_path}', f'{out_path}/MP2_aVTZ.orca.out')
                    if crystal == 'MgO' and skzcam_cluster_size < crystal_skzcam_cluster_size[crystal_convert[crystal]][2]+1:
                        in_path = f'Data_original/cWFT/MP2/{crystal}/{molecule_label_in}/{skzcam_cluster_size}/QZ/{structure}'
                        out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{skzcam_cluster_size}/{structure_convert[structure]}'                    
                        Path(out_path).mkdir(parents=True, exist_ok=True)
                        shorten_orca(f'{in_path}', f'{out_path}/MP2_aVQZ.orca.out')    
                    if crystal == 'MgO' and skzcam_cluster_size < crystal_skzcam_cluster_size[crystal_convert[crystal]][2]+1:
                        in_path = f'Data_original/cWFT/MP2/{crystal}/{molecule_label_in}/{skzcam_cluster_size}/DZ_Mg_Hecore/{structure}'
                        out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{skzcam_cluster_size}/{structure_convert[structure]}'                    
                        Path(out_path).mkdir(parents=True, exist_ok=True)
                        shorten_orca(f'{in_path}', f'{out_path}/MP2_awCVDZ.orca.out')    
                    if crystal == 'MgO' and skzcam_cluster_size < crystal_skzcam_cluster_size[crystal_convert[crystal]][2]+1:
                        in_path = f'Data_original/cWFT/MP2/{crystal}/{molecule_label_in}/{skzcam_cluster_size}/TZ_Mg_Hecore/{structure}'
                        out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{skzcam_cluster_size}/{structure_convert[structure]}'                    
                        Path(out_path).mkdir(parents=True, exist_ok=True)
                        shorten_orca(f'{in_path}', f'{out_path}/MP2_awCVTZ.orca.out')
                if 'NO' in molecule and molecule == 'NO Dimer':
                    if crystal == 'MgO':
                        in_path = f'Data_original/cWFT/MP2/{crystal}/{molecule_label_in}_B3LYP/{skzcam_cluster_size}/DZ/{structure}'
                        out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{skzcam_cluster_size}/{structure_convert[structure]}'                    
                        Path(out_path).mkdir(parents=True, exist_ok=True)
                        shorten_orca(f'{in_path}', f'{out_path}/MP2_awCVDZ.orca.out')
                        shutil.copy(f'{in_path}/orca.bq',f'{out_path}/orca.pointcharges')
                    if crystal == 'MgO' and skzcam_cluster_size < crystal_skzcam_cluster_size[crystal_convert[crystal]][1]+1:
                        in_path = f'Data_original/cWFT/MP2/{crystal}/{molecule_label_in}_B3LYP/{skzcam_cluster_size}/TZ/{structure}'
                        out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{skzcam_cluster_size}/{structure_convert[structure]}'                    
                        Path(out_path).mkdir(parents=True, exist_ok=True)
                        shorten_orca(f'{in_path}', f'{out_path}/MP2_awCVTZ.orca.out')
                    if crystal == 'MgO' and skzcam_cluster_size < crystal_skzcam_cluster_size[crystal_convert[crystal]][2]+1:
                        in_path = f'Data_original/cWFT/MP2/{crystal}/{molecule_label_in}_B3LYP/{skzcam_cluster_size}/QZ/{structure}'
                        out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{skzcam_cluster_size}/{structure_convert[structure]}'                    
                        Path(out_path).mkdir(parents=True, exist_ok=True)
                        shorten_orca(f'{in_path}', f'{out_path}/MP2_awCVQZ.orca.out')                          


                else:
                    if crystal == 'MgO':
                        in_path = f'Data_original/cWFT/MP2/{crystal}/{molecule_label_in}/{skzcam_cluster_size}/DZ/{structure}'
                        out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{skzcam_cluster_size}/{structure_convert[structure]}'                    
                        Path(out_path).mkdir(parents=True, exist_ok=True)
                        shorten_orca(f'{in_path}', f'{out_path}/MP2_awCVDZ.orca.out')
                        shutil.copy(f'{in_path}/orca.bq',f'{out_path}/orca.pointcharges')
                    if crystal == 'MgO' and skzcam_cluster_size < crystal_skzcam_cluster_size[crystal_convert[crystal]][1]+1:
                        in_path = f'Data_original/cWFT/MP2/{crystal}/{molecule_label_in}/{skzcam_cluster_size}/TZ/{structure}'
                        out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{skzcam_cluster_size}/{structure_convert[structure]}'                    
                        Path(out_path).mkdir(parents=True, exist_ok=True)
                        shorten_orca(f'{in_path}', f'{out_path}/MP2_awCVTZ.orca.out')
                    if crystal == 'MgO' and skzcam_cluster_size < crystal_skzcam_cluster_size[crystal_convert[crystal]][2]+1:
                        in_path = f'Data_original/cWFT/MP2/{crystal}/{molecule_label_in}/{skzcam_cluster_size}/QZ/{structure}'
                        out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{skzcam_cluster_size}/{structure_convert[structure]}'                    
                        Path(out_path).mkdir(parents=True, exist_ok=True)
                        shorten_orca(f'{in_path}', f'{out_path}/MP2_awCVQZ.orca.out')                

                if skzcam_cluster_size == 1:
                    tio2_skzcam_cluster_size = '-2'
                elif skzcam_cluster_size == 2:
                    tio2_skzcam_cluster_size = '-1'
                else:
                    tio2_skzcam_cluster_size = skzcam_cluster_size - 2

                
                if 'TiO2' in crystal:
                    in_path = f'Data_original/cWFT/MP2/{crystal}/{molecule_label_in}/{skzcam_cluster_size}/DZ/{structure}'
                    out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{tio2_skzcam_cluster_size}/{structure_convert[structure]}'                    
                    Path(out_path).mkdir(parents=True, exist_ok=True)
                    shorten_orca(f'{in_path}', f'{out_path}/MP2_aVDZ.orca.out')
                    shutil.copy(f'{in_path}/orca.bq',f'{out_path}/orca.pointcharges')

                if 'TiO2' in crystal and skzcam_cluster_size < crystal_skzcam_cluster_size[crystal_convert[crystal]][1]+1:
                    in_path = f'Data_original/cWFT/MP2/{crystal}/{molecule_label_in}/{skzcam_cluster_size}/TZ/{structure}'
                    out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{tio2_skzcam_cluster_size}/{structure_convert[structure]}'                    
                    Path(out_path).mkdir(parents=True, exist_ok=True)
                    shorten_orca(f'{in_path}', f'{out_path}/MP2_aVTZ.orca.out')


                if 'TiO2' in crystal and skzcam_cluster_size < crystal_skzcam_cluster_size[crystal_convert[crystal]][2]+1:
                    in_path = f'Data_original/cWFT/MP2/{crystal}/{molecule_label_in}/{skzcam_cluster_size}/QZ/{structure}'
                    out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{tio2_skzcam_cluster_size}/{structure_convert[structure]}'                    
                    Path(out_path).mkdir(parents=True, exist_ok=True)
                    shorten_orca(f'{in_path}', f'{out_path}/MP2_aVQZ.orca.out')

                if 'TiO2' in crystal and skzcam_cluster_size < crystal_skzcam_cluster_size[crystal_convert[crystal]][2]+1:
                    in_path = f'Data_original/cWFT/MP2/{crystal}/{molecule_label_in}/{skzcam_cluster_size}/TZ_Ti_Necore/{structure}'
                    out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{tio2_skzcam_cluster_size}/{structure_convert[structure]}'                    
                    Path(out_path).mkdir(parents=True, exist_ok=True)
                    shorten_orca(f'{in_path}', f'{out_path}/MP2_awCVTZ.orca.out')
                    shutil.copy(f'{in_path}/orca.bq',f'{out_path}/orca.pointcharges')

                if 'TiO2' in crystal and skzcam_cluster_size < crystal_skzcam_cluster_size[crystal_convert[crystal]][2]+1:
                    in_path = f'Data_original/cWFT/MP2/{crystal}/{molecule_label_in}/{skzcam_cluster_size}/TZ_Ti_Necore/{structure}'
                    out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{tio2_skzcam_cluster_size}/{structure_convert[structure]}'                    
                    Path(out_path).mkdir(parents=True, exist_ok=True)
                    shorten_orca(f'{in_path}', f'{out_path}/MP2_awCVQZ.orca.out')
                    shutil.copy(f'{in_path}/orca.bq',f'{out_path}/orca.pointcharges')

In [5]:
# Let's copy the CCSD(T) calculations

# Transferring MP2 data

def shorten_dlpnocc(filepath_in, filepath_out):
    with open(f'{filepath_in}/orca_cc.out', 'r', encoding="ISO-8859-1") as f:
        lines = f.readlines()
    
    # Starting from the last line find line number of the last ionic step: aborting loop because EDIFF is reached
    for idx_start_start in range(len(lines)):
        if '                                       INPUT FILE' in lines[idx_start_start]:
            break
    
    for idx_start_end in range(len(lines)):
        if 'END OF INPUT' in lines[idx_start_end]:
            break

    # Find the line number with " Dimension of arrays:"
    for idx_middle_start in range(len(lines)):
        if 'TOTAL SCF ENERGY' in lines[idx_middle_start]:
            break

    for idx_middle_end in range(len(lines)):
        if 'Virial Ratio       :' in lines[idx_middle_end]:
            break



    for idx_end_start in range(len(lines)-1, 0, -1):
        # if 'aborting loop because EDIFF is reached' in lines[idx]:
        if 'The CCSD iterations have converged' in lines[idx_end_start]:
            break  

    for idx_end_end in range(len(lines)-1, 0, -1):
        # if 'aborting loop because EDIFF is reached' in lines[idx]:
        if 'Final correlation energy' in lines[idx_end_end]:
            break      

    # with open(f'{filepath_in}/orca.bq', 'r', encoding="ISO-8859-1") as f:
    #     lines_bq = f.readlines()

    start_outcar = lines[idx_start_start-1:idx_start_end+2].copy()
    middle_outcar = lines[idx_middle_start-1:idx_middle_end+2].copy()
    end_outcar = ["\n-------- CCSD Data ------------\n"] + lines[idx_end_start+1:idx_end_start+7].copy() + ["\n-------- CCSD(T) Data ------------\n"] + lines[idx_end_end:idx_end_end+3].copy()
    print(lines[-1])
    # Write the shortened OUTCAR
    with open(filepath_out, 'w') as f:
        f.writelines(start_outcar)
#         f.write("""
# ================================================================================
#                                     Point Charge FILE
# ================================================================================
# """)
#         f.writelines(lines_bq)
#         f.write("\n================================================================================\n")
        f.writelines(middle_outcar)
        for dummy_idx in range(10):
            f.write("--------------------Output has been truncated-------------------------\n\n\n")
        f.writelines(end_outcar)


def shorten_dlpnomp2(filepath_in, filepath_out):
    with open(f'{filepath_in}/orca_mp2.out', 'r', encoding="ISO-8859-1") as f:
        lines = f.readlines()
    
    # Starting from the last line find line number of the last ionic step: aborting loop because EDIFF is reached
    for idx_start_start in range(len(lines)):
        if '                                       INPUT FILE' in lines[idx_start_start]:
            break
    
    for idx_start_end in range(len(lines)):
        if 'END OF INPUT' in lines[idx_start_end]:
            break

    # Find the line number with " Dimension of arrays:"
    for idx_middle_start in range(len(lines)):
        if 'TOTAL SCF ENERGY' in lines[idx_middle_start]:
            break

    for idx_middle_end in range(len(lines)):
        if 'Virial Ratio       :' in lines[idx_middle_end]:
            break



    for idx_end_start in range(len(lines)-1, 0, -1):
        # if 'aborting loop because EDIFF is reached' in lines[idx]:
        if 'DLPNO-MP2 CORRELATION ENERGY:' in lines[idx_end_start]:
            break  

    for idx_end_end in range(len(lines)-1, 0, -1):
        # if 'aborting loop because EDIFF is reached' in lines[idx]:
        if 'Final correlation energy' in lines[idx_end_end]:
            break      

    # with open(f'{filepath_in}/orca.bq', 'r', encoding="ISO-8859-1") as f:
    #     lines_bq = f.readlines()

    start_outcar = lines[idx_start_start-1:idx_start_end+2].copy()
    middle_outcar = lines[idx_middle_start-1:idx_middle_end+2].copy()
    end_outcar = lines[idx_end_start-1:idx_end_start+2].copy()
    # Write the shortened OUTCAR
    with open(filepath_out, 'w') as f:
        f.writelines(start_outcar)
#         f.write("""
# ================================================================================
#                                     Point Charge FILE
# ================================================================================
# """)
#         f.writelines(lines_bq)
#         f.write("\n================================================================================\n")
        f.writelines(middle_outcar)
        for dummy_idx in range(10):
            f.write("--------------------Output has been truncated-------------------------\n\n\n")
        f.writelines(end_outcar)

def shorten_lnocc(filepath_in, filepath_out):
    with open(f'{filepath_in}/mrcc.out', 'r', encoding="ISO-8859-1") as f:
        lines = f.readlines()
    
    # Starting from the last line find line number of the last ionic step: aborting loop because EDIFF is reached
    counter = 0
    for idx_start in range(len(lines)):

        if 'ghost=serialno' in lines[idx_start]:
            break

    # Find the line number with " Dimension of arrays:"



    for idx_end in range(len(lines)-1, 0, -1):
        # if 'aborting loop because EDIFF is reached' in lines[idx]:
        if 'Final local CC results:' in lines[idx_end]:
            break  



    # with open(f'{filepath_in}/orca.bq', 'r', encoding="ISO-8859-1") as f:
    #     lines_bq = f.readlines()

    start_outcar = lines[:idx_start+10].copy()
    end_outcar = lines[idx_end-1:].copy()
    # Write the shortened OUTCAR
    with open(filepath_out, 'w') as f:
        f.writelines(start_outcar)
#         f.write("""
# ================================================================================
#                                     Point Charge FILE
# ================================================================================
# """)
#         f.writelines(lines_bq)
#         f.write("\n================================================================================\n")
        # f.writelines(middle_outcar)
        for dummy_idx in range(10):
            f.write("--------------------Output has been truncated-------------------------\n\n\n")
        f.writelines(end_outcar)


def shorten_dfcc(filepath_in, filepath_out):
    with open(f'{filepath_in}/mrcc.out', 'r', encoding="ISO-8859-1") as f:
        lines = f.readlines()
    
    # Starting from the last line find line number of the last ionic step: aborting loop because EDIFF is reached
    counter = 0
    for idx_start in range(len(lines)):

        if 'ghost=serialno' in lines[idx_start]:
            break
        
        if 'qmmm=Amber' in lines[idx_start]:
            break
    # Find the line number with " Dimension of arrays:"



    for idx_end in range(len(lines)-1, 0, -1):
        # if 'aborting loop because EDIFF is reached' in lines[idx]:
        if ' Executing ccsd...' in lines[idx_end]:
            break  



    # with open(f'{filepath_in}/orca.bq', 'r', encoding="ISO-8859-1") as f:
    #     lines_bq = f.readlines()

    start_outcar = lines[:idx_start+10].copy()
    end_outcar = lines[idx_end-1:].copy()
    # Write the shortened OUTCAR
    with open(filepath_out, 'w') as f:
        f.writelines(start_outcar)
#         f.write("""
# ================================================================================
#                                     Point Charge FILE
# ================================================================================
# """)
#         f.writelines(lines_bq)
#         f.write("\n================================================================================\n")
        # f.writelines(middle_outcar)
        for dummy_idx in range(10):
            f.write("--------------------Output has been truncated-------------------------\n\n\n")
        f.writelines(end_outcar)



structure_convert = {
    'AD_SLAB': 'Molecule-Surface',
    'SLAB_CP': 'Surface',
    'AD_CP': 'Molecule'
}

crystal_skzcam_cluster_size = {
    'MgO': [3,1],
    'r-TiO2': [4,1],
    'a-TiO2': [4,1]
}

for crystal in ['MgO']: #crystal_convert:
    for molecule in ['NO Dimer']: #molecule_surface_systems[crystal_convert[crystal]]:
        for skzcam_cluster_size in range(1, crystal_skzcam_cluster_size[crystal_convert[crystal]][0]+1):
            for structure in ['AD_SLAB', 'SLAB_CP','AD_CP']:
                if molecule in adsorbate_config:
                    molecule_label_in = adsorbate_config[molecule]
                else:
                    molecule_label_in = molecule
                molecule_label_out = molecule.replace(' ', '_')

                if 'NO' in molecule and molecule != 'NO Dimer':
                    if skzcam_cluster_size > 1:
                        continue
                    if crystal == 'MgO':
                        in_path = f'Data_original/cWFT/CCSDT/{crystal}/{molecule_label_in}/{skzcam_cluster_size}_Local/DZ/{structure}'
                        out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{skzcam_cluster_size}/{structure_convert[structure]}'                    
                        Path(out_path).mkdir(parents=True, exist_ok=True)
                        shorten_dlpnocc(f'{in_path}', f'{out_path}/DLPNOCCSDT_aVDZ.orca.out')
                        shutil.copy(f'{in_path}/orca.bq',f'{out_path}/orca.pointcharges')
                    if crystal == 'MgO':
                        in_path = f'Data_original/cWFT/CCSDT/{crystal}/{molecule_label_in}/{skzcam_cluster_size}_Local/TZ/{structure}'
                        out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{skzcam_cluster_size}/{structure_convert[structure]}'                    
                        Path(out_path).mkdir(parents=True, exist_ok=True)
                        shorten_dlpnocc(f'{in_path}', f'{out_path}/DLPNOCCSDT_aVTZ.orca.out')
                    if crystal == 'MgO':
                        in_path = f'Data_original/cWFT/CCSDT/{crystal}/{molecule_label_in}/{skzcam_cluster_size}_Local/DZ/{structure}'
                        out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{skzcam_cluster_size}/{structure_convert[structure]}'                    
                        Path(out_path).mkdir(parents=True, exist_ok=True)
                        shorten_dlpnomp2(f'{in_path}', f'{out_path}/DLPNOMP2_aVDZ.orca.out')
                    if crystal == 'MgO':
                        in_path = f'Data_original/cWFT/CCSDT/{crystal}/{molecule_label_in}/{skzcam_cluster_size}_Local/TZ/{structure}'
                        out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{skzcam_cluster_size}/{structure_convert[structure]}'                    
                        Path(out_path).mkdir(parents=True, exist_ok=True)
                        shorten_dlpnomp2(f'{in_path}', f'{out_path}/DLPNOMP2_aVTZ.orca.out')
                if 'NO' in molecule and molecule == 'NO Dimer':
                    if skzcam_cluster_size == 3:
                        continue
                    if crystal == 'MgO':
                        in_path = f'Data_original/cWFT/CCSDT/{crystal}/{molecule_label_in}_B3LYP/{skzcam_cluster_size}_Local/DZ/{structure}'
                        out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{skzcam_cluster_size}/{structure_convert[structure]}'                    
                        Path(out_path).mkdir(parents=True, exist_ok=True)
                        shorten_lnocc(f'{in_path}', f'{out_path}/LNOCCSDT_awCVDZ.mrcc.out')
                        shutil.copy(f'{in_path}/MINP',f'{out_path}/LNOCCSDT.mrcc.input')
                    if crystal == 'MgO':
                        in_path = f'Data_original/cWFT/CCSDT/{crystal}/{molecule_label_in}_B3LYP/{skzcam_cluster_size}_Local/TZ/{structure}'
                        out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{skzcam_cluster_size}/{structure_convert[structure]}'                    
                        Path(out_path).mkdir(parents=True, exist_ok=True)
                        shorten_lnocc(f'{in_path}', f'{out_path}/LNOCCSDT_awCVTZ.mrcc.out')
                        shutil.copy(f'{in_path}/MINP',f'{out_path}/LNOCCSDT.mrcc.input')


                    # if skzcam_cluster_size == 3:
                    # if crystal == 'MgO':
                    #     in_path = f'Data_original/cWFT/CCSDT/{crystal}/{molecule_label_in}_B3LYP/{skzcam_cluster_size}_Local/DZ/{structure}'
                    #     out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{skzcam_cluster_size}/{structure_convert[structure]}'                    
                    #     Path(out_path).mkdir(parents=True, exist_ok=True)
                    #     shorten_lnocc(f'{in_path}', f'{out_path}/LNOCCSDT_awCVDZ.mrcc.out')
                    #     shutil.copy(f'{in_path}/MINP',f'{out_path}/LNOCCSDT.mrcc.input')
                    # if crystal == 'MgO':
                    #     in_path = f'Data_original/cWFT/CCSDT/{crystal}/{molecule_label_in}_B3LYP/{skzcam_cluster_size}_Local/TZ/{structure}'
                    #     out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{skzcam_cluster_size}/{structure_convert[structure]}'                    
                    #     Path(out_path).mkdir(parents=True, exist_ok=True)
                    #     shorten_lnocc(f'{in_path}', f'{out_path}/LNOCCSDT_awCVTZ.mrcc.out')
                    #     shutil.copy(f'{in_path}/MINP',f'{out_path}/LNOCCSDT.mrcc.input')

                    if skzcam_cluster_size == 1:
                        if crystal == 'MgO' and molecule in ['CH4', 'C2H6', 'CO', 'N2O Parallel', 'N2O Tilted', 'NO Dimer', 'H2O Monomer', 'CH3OH Tilted', 'CH3OH Parallel', 'NH3', 'CO2 Physisorbed']:
                            in_path = f'Data_original/cWFT/CCSDT/{crystal}/{molecule_label_in}_B3LYP/{skzcam_cluster_size}_Canonical/DZ/{structure}'
                            out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{skzcam_cluster_size}/{structure_convert[structure]}'                    
                            Path(out_path).mkdir(parents=True, exist_ok=True)
                            shorten_dfcc(f'{in_path}', f'{out_path}/CCSDT_awCVDZ.mrcc.out')
                            shutil.copy(f'{in_path}/MINP',f'{out_path}/CCSDT.mrcc.input')

                        if crystal == 'MgO' and molecule in ['CH4', 'C2H6', 'CO', 'N2O Parallel', 'N2O Tilted', 'NO Dimer', 'H2O Monomer', 'CH3OH Tilted', 'CH3OH Parallel', 'NH3', 'CO2 Physisorbed']:
                            in_path = f'Data_original/cWFT/CCSDT/{crystal}/{molecule_label_in}_B3LYP/{skzcam_cluster_size}_Canonical/TZ/{structure}'
                            out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{skzcam_cluster_size}/{structure_convert[structure]}'                    
                            Path(out_path).mkdir(parents=True, exist_ok=True)
                            shorten_dfcc(f'{in_path}', f'{out_path}/CCSDT_awCVTZ.mrcc.out')
                            shutil.copy(f'{in_path}/MINP',f'{out_path}/CCSDT.mrcc.input')
                else:
                    if crystal == 'MgO':
                        in_path = f'Data_original/cWFT/CCSDT/{crystal}/{molecule_label_in}/{skzcam_cluster_size}_Local/DZ/{structure}'
                        out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{skzcam_cluster_size}/{structure_convert[structure]}'                    
                        Path(out_path).mkdir(parents=True, exist_ok=True)
                        shorten_lnocc(f'{in_path}', f'{out_path}/LNOCCSDT_awCVDZ.mrcc.out')
                        shutil.copy(f'{in_path}/MINP',f'{out_path}/LNOCCSDT.mrcc.input')
                    if crystal == 'MgO':
                        in_path = f'Data_original/cWFT/CCSDT/{crystal}/{molecule_label_in}/{skzcam_cluster_size}_Local/TZ/{structure}'
                        out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{skzcam_cluster_size}/{structure_convert[structure]}'                    
                        Path(out_path).mkdir(parents=True, exist_ok=True)
                        shorten_lnocc(f'{in_path}', f'{out_path}/LNOCCSDT_awCVTZ.mrcc.out')
                        shutil.copy(f'{in_path}/MINP',f'{out_path}/LNOCCSDT.mrcc.input')

                    if skzcam_cluster_size == 1:
                        if crystal == 'MgO' and molecule in ['CH4', 'C2H6', 'CO', 'N2O Parallel', 'N2O Tilted', 'NO Dimer', 'H2O Monomer', 'CH3OH Tilted', 'CH3OH Parallel', 'NH3', 'CO2 Physisorbed']:
                            in_path = f'Data_original/cWFT/CCSDT/{crystal}/{molecule_label_in}/{skzcam_cluster_size}_Canonical/DZ/{structure}'
                            out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{skzcam_cluster_size}/{structure_convert[structure]}'                    
                            Path(out_path).mkdir(parents=True, exist_ok=True)
                            shorten_dfcc(f'{in_path}', f'{out_path}/CCSDT_awCVDZ.mrcc.out')
                            shutil.copy(f'{in_path}/MINP',f'{out_path}/CCSDT.mrcc.input')

                        if crystal == 'MgO' and molecule in ['CH4', 'C2H6', 'CO', 'N2O Parallel', 'N2O Tilted', 'NO Dimer', 'H2O Monomer', 'CH3OH Tilted', 'CH3OH Parallel', 'NH3', 'CO2 Physisorbed']:
                            in_path = f'Data_original/cWFT/CCSDT/{crystal}/{molecule_label_in}/{skzcam_cluster_size}_Canonical/TZ/{structure}'
                            out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{skzcam_cluster_size}/{structure_convert[structure]}'                    
                            Path(out_path).mkdir(parents=True, exist_ok=True)
                            shorten_dfcc(f'{in_path}', f'{out_path}/CCSDT_awCVTZ.mrcc.out')
                            shutil.copy(f'{in_path}/MINP',f'{out_path}/CCSDT.mrcc.input')
                    # try:

                if skzcam_cluster_size == 1:
                    tio2_skzcam_cluster_size = '-2'
                elif skzcam_cluster_size == 2:
                    tio2_skzcam_cluster_size = '-1'
                else:
                    tio2_skzcam_cluster_size = skzcam_cluster_size - 2
                if 'TiO2' in crystal:
                    in_path = f'Data_original/cWFT/CCSDT/{crystal}/{molecule_label_in}/{skzcam_cluster_size}_Local/DZ/{structure}'
                    out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{tio2_skzcam_cluster_size}/{structure_convert[structure]}'                    
                    Path(out_path).mkdir(parents=True, exist_ok=True)
                    shorten_lnocc(f'{in_path}', f'{out_path}/LNOCCSDT_aVDZ.mrcc.out')
                    shutil.copy(f'{in_path}/MINP',f'{out_path}/LNOCCSDT.mrcc.input')
                if 'TiO2' in crystal:
                    in_path = f'Data_original/cWFT/CCSDT/{crystal}/{molecule_label_in}/{skzcam_cluster_size}_Local/TZ/{structure}'
                    out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{tio2_skzcam_cluster_size}/{structure_convert[structure]}'                    
                    Path(out_path).mkdir(parents=True, exist_ok=True)
                    shorten_lnocc(f'{in_path}', f'{out_path}/LNOCCSDT_aVTZ.mrcc.out')
                    shutil.copy(f'{in_path}/MINP',f'{out_path}/LNOCCSDT.mrcc.input')
                if 'TiO2' in crystal and tio2_skzcam_cluster_size == '-2':
                    in_path = f'Data_original/cWFT/CCSDT/{crystal}/{molecule_label_in}/{skzcam_cluster_size}_Canonical/DZ/{structure}'
                    out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{tio2_skzcam_cluster_size}/{structure_convert[structure]}'                    
                    Path(out_path).mkdir(parents=True, exist_ok=True)
                    shorten_dfcc(f'{in_path}', f'{out_path}/CCSDT_aVDZ.mrcc.out')
                    shutil.copy(f'{in_path}/MINP',f'{out_path}/CCSDT.mrcc.input')

                if 'TiO2' in crystal and tio2_skzcam_cluster_size == '-2':
                    in_path = f'Data_original/cWFT/CCSDT/{crystal}/{molecule_label_in}/{skzcam_cluster_size}_Canonical/TZ/{structure}'
                    out_path = f'Data/05b-Eint_SKZCAM/{crystal_convert[crystal]}/{molecule_label_out}/Cluster_{tio2_skzcam_cluster_size}/{structure_convert[structure]}'                    
                    Path(out_path).mkdir(parents=True, exist_ok=True)
                    shorten_dfcc(f'{in_path}', f'{out_path}/CCSDT_aVTZ.mrcc.out')
            




In [3]:
adsorbate_config

{'NO Vertical-Hollow': 'NO_1',
 'NO Vertical-Mg': 'NO_2',
 'NO Bent-Bridge': 'NO_3',
 'NO Bent-Mg': 'NO_4',
 'NO Bent-O': 'NO_5',
 'NO Dimer': 'NO_6',
 'CO2 Physisorbed': 'CO2_2',
 'CO2 Chemisorbed': 'CO2_3',
 'CH3OH Tilted': 'CH3OH_1',
 'CH3OH Parallel': 'CH3OH_2',
 'CH3OH Tetramer': 'CH3OH_4',
 'H2O Monomer': 'H2O',
 'H2O Tetramer': 'H2O_4',
 'N2O Parallel': 'N2O_1',
 'N2O Tilted': 'N2O_2',
 'CO2 Parallel': 'CO2_1',
 'CO2 Tilted': 'CO2_2',
 'CH4 Monolayer': 'CH4_ML',
 'C2H6 Monolayer': 'C2H6_ML'}

In [4]:
# Copy the vibrational output

structure_convert = {
    'AD_SLAB': 'Molecule-Surface',
    'SLAB_FS': 'Surface',
    'AD': 'Molecule',
}

for crystal in crystal_convert:
    for molecule in molecule_surface_systems[crystal_convert[crystal]]:
        for xc_func in crystal_xc_func_ensemble[crystal_convert[crystal]]:
                if xc_func[:2] in ['05','06']:
                    continue
                for structure in ['AD','AD_SLAB']:

                    if molecule == 'CO2 Chemisorbed':
                        in_path = f'Data_original/DFT/05-Vib/{crystal}/CO2_3_AD/{xc_func}/{structure}'
                    elif molecule in adsorbate_config:
                        in_path = f'Data_original/DFT/05-Vib/{crystal}/{adsorbate_config[molecule]}/{xc_func}/{structure}'
                    else:
                        in_path = f'Data_original/DFT/05-Vib/{crystal}/{molecule}/{xc_func}/{structure}'

                    molecule_label = molecule.replace(' ', '_')
                    out_path = f'Data/06-Etherm_and_EZPV_DFT/{crystal_convert[crystal]}/{molecule_label}/{xc_func}/{structure_convert[structure]}'
                    Path(out_path).mkdir(parents=True, exist_ok=True)
                    shorten_outcar(f'{in_path}/OUTCAR', f'{out_path}/OUTCAR')

In [125]:
# Copy the 2L test calculations.
# Copy the vibrational output

structure_convert = {
    'AD_SLAB': 'Molecule-Surface',
    'SLAB': 'Surface',
    'AD': 'Molecule',
}

for crystal in ['MgO']: #crystal_convert:
    for molecule in ['CO','CO2 Physisorbed','CO2 Chemisorbed','H2O Monomer']: #molecule_surface_systems[crystal_convert[crystal]]:
        for xc_func in crystal_xc_func_ensemble[crystal_convert[crystal]]:
                if xc_func[:2] in ['05','06']:
                    continue
                for structure in ['AD','AD_SLAB','SLAB']:
                    if structure == 'AD':
                        if molecule in adsorbate_config:
                            in_path = f'Data_original/DFT/05-Vib/{crystal}/{adsorbate_config[molecule]}/{xc_func}/{structure}'
                        else:
                            in_path = f'Data_original/DFT/05-Vib/{crystal}/{molecule}/{xc_func}/{structure}'

                    else:
                        if molecule in adsorbate_config:
                            in_path = f'Data_original/DFT/05-Vib/{crystal}/2L_Test/{adsorbate_config[molecule]}/{xc_func}/{structure}'
                        else:
                            in_path = f'Data_original/DFT/05-Vib/{crystal}/2L_Test/{molecule}/{xc_func}/{structure}'

                    molecule_label = molecule.replace(' ', '_')
                    out_path = f'Data/Miscellaneous/Etherm_and_EZPV_Surface_Test/{crystal_convert[crystal]}/{molecule_label}/{xc_func}/{structure_convert[structure]}'
                    Path(out_path).mkdir(parents=True, exist_ok=True)
                    shorten_outcar(f'{in_path}/OUTCAR', f'{out_path}/OUTCAR')

In [6]:
# Transfer Dissociated Structure Calculations

system_to_idx = {
    1: 'Monomer',
    2: 'Dimer',
    3: 'Trimer',
    4: 'Tetramer'
}

for molecule in ['H2O','CH3OH']:
    for system_idx in [1,2,3,4]:
        if molecule == 'H2O' and system_idx != 4:
            continue

        system = system_to_idx[system_idx]
        for system_type in ['Dissociated','Molecular']:
            in_path = f'Data_original/DFT/Dissociation_Tests/{molecule}/{system_idx}_{system_type.lower()}_correct'
            out_path = f'Data/Miscellaneous/Dissociation_Tests/{molecule}/{system}_{system_type}'
            Path(out_path).mkdir(parents=True, exist_ok=True)
            shorten_outcar(f'{in_path}/OUTCAR', f'{out_path}/OUTCAR')

    in_path = f'Data_original/DFT/Dissociation_Tests/CH3OH/surface'
    out_path = f'Data/Miscellaneous/Dissociation_Tests/{molecule}/Surface'
    Path(out_path).mkdir(parents=True, exist_ok=True)
    shorten_outcar(f'{in_path}/OUTCAR', f'{out_path}/OUTCAR')

    in_path = f'Data_original/DFT/01-Mol/MgO/{molecule}/02_revPBE-D4'
    out_path = f'Data/Miscellaneous/Dissociation_Tests/{molecule}/Molecule'
    Path(out_path).mkdir(parents=True, exist_ok=True)
    shorten_outcar(f'{in_path}/OUTCAR', f'{out_path}/OUTCAR')


In [2]:
# Transfer DFT functionals Eint calculations



# Getting the 

dft_xc_eint_dict = {x1: {z:50 for z in ['01_PBE-D30', '02_PBE-MBDFI', '03_rev-vdW-DF2', '04_R2SCAN-rVV10', '05_PBE0-TSHI','06_HSE06-D4','07_RPA','08_RPA-rSE']} for x1 in ['MgO CH4', 'MgO C2H6', 'MgO CO', 'MgO CO2_2', 'MgO CO2_3', 'MgO H2O','MgO N2O_1','MgO N2O_2', 'MgO NH3','TiO2_rutile CH4','TiO2_rutile CH3OH','TiO2_rutile CO2_1','TiO2_rutile CO2_2','TiO2_rutile H2O','TiO2_anatase H2O','TiO2_anatase NH3']}

mol_surface_to_mol = {
    'MgO CH4': 'CH4',
    'MgO C2H6': 'C2H6',
    'MgO CO': 'CO',
    'MgO CO2_2': 'CO2_Physisorbed',
    'MgO CO2_3': 'CO2_Chemisorbed',
    'MgO N2O_1': 'N2O_Parallel',
    'MgO N2O_2': 'N2O_Tilted',
    'MgO H2O': 'H2O_Monomer',
    'MgO NH3': 'NH3',
    'MgO CH4_ML': 'CH4_Monolayer',
    'MgO C2H6_ML': 'C2H6_Monolayer',
    'TiO2_rutile CH4': 'CH4',
    'TiO2_rutile CH3OH': 'CH3OH',
    'TiO2_rutile CO2_1': 'CO2_Parallel',
    'TiO2_rutile CO2_2': 'CO2_Tilted',
    'TiO2_rutile H2O': 'H2O',
    'TiO2_anatase H2O': 'H2O',
    'TiO2_anatase NH3': 'NH3'
}

for mol_surface in ['MgO CH4', 'MgO C2H6', 'MgO CO', 'MgO CO2_2', 'MgO CO2_3', 'MgO H2O','MgO N2O_1','MgO N2O_2', 'MgO NH3','TiO2_rutile CH4','TiO2_rutile CH3OH','TiO2_rutile CO2_1','TiO2_rutile CO2_2','TiO2_rutile H2O','TiO2_anatase H2O','TiO2_anatase NH3']:
    molecule = mol_surface.split()[-1]
    surface  = mol_surface.split()[0]
    for functional in ['01_PBE-D30', '02_PBE-MBDFI', '03_rev-vdW-DF2', '04_R2SCAN-rVV10', '05_PBE0-TSHI','06_HSE06-D4']:
        # if '06' in functional: 
        #     continue

        # if '02' in functional and molecule == 'CO':
        #     continue
        try:
            for structure in ['AD_SLAB', 'SLAB_FS','AD_FS']:
                in_path = f'Data_original/DFT/04-Int_Ene/DFT_Compare/{surface}/{molecule}/{functional}/{structure}'
                out_path = f'Data/Miscellaneous/DFT_Comparison/{crystal_convert[surface]}/{mol_surface_to_mol[mol_surface]}/{functional}/{structure_convert[structure]}'
                Path(out_path).mkdir(parents=True, exist_ok=True)
                shorten_outcar(f'{in_path}/OUTCAR', f'{out_path}/OUTCAR')

                if '06' in functional:
                    with open(f'{in_path}/D4_EDISP', 'r') as f:
                        lines = f.readlines()
                    with open(f'{out_path}/OUTCAR', 'a') as f:
                        f.write('\n\n')
                        f.writelines(lines)

        except:
            pass

    # Do RPA analysis if MgO



    if surface == 'MgO' and molecule in ['CH4','C2H6','CO','CO2_2','CO2_3','H2O','N2O_1','NH3']:
        for structure in ['AD_SLAB', 'SLAB_FS','AD_FS']:
            in_path = f'Data_original/DFT/04-Int_Ene/DFT_Compare/{surface}/{molecule}/07-RPA/{structure}'
            out_path = f'Data/Miscellaneous/DFT_Comparison/{crystal_convert[surface]}/{mol_surface_to_mol[mol_surface]}/07_RPA/{structure_convert[structure]}'
            Path(out_path).mkdir(parents=True, exist_ok=True)
            shutil.copy(f'{in_path}/OUT.EXX.run.encut.550.k.2',f'{out_path}/OUTCAR_EXX')


            in_path = f'Data_original/DFT/04-Int_Ene/DFT_Compare/{surface}/{molecule}/07-RPA/{structure}'
            out_path = f'Data/Miscellaneous/DFT_Comparison/{crystal_convert[surface]}/{mol_surface_to_mol[mol_surface]}/07_RPA/{structure_convert[structure]}'
            Path(out_path).mkdir(parents=True, exist_ok=True)
            shutil.copy(f'{in_path}/OUT.RPA.run.encut.550.k.2',f'{out_path}/OUTCAR_RPA')

            in_path = f'Data_original/DFT/04-Int_Ene/DFT_Compare/{surface}/{molecule}/07-RPA/{structure}'
            out_path = f'Data/Miscellaneous/DFT_Comparison/{crystal_convert[surface]}/{mol_surface_to_mol[mol_surface]}/08_RPA-rSE/{structure_convert[structure]}'
            Path(out_path).mkdir(parents=True, exist_ok=True)
            shutil.copy(f'{in_path}/OUT.RPA.run.encut.550.k.2',f'{out_path}/OUTCAR_RPA')

            in_path = f'Data_original/DFT/04-Int_Ene/DFT_Compare/{surface}/{molecule}/07-RPA/{structure}'
            out_path = f'Data/Miscellaneous/DFT_Comparison/{crystal_convert[surface]}/{mol_surface_to_mol[mol_surface]}/08_RPA-rSE/{structure_convert[structure]}'
            Path(out_path).mkdir(parents=True, exist_ok=True)
            shutil.copy(f'{in_path}/OUT.rSE.run.encut.550.k.2',f'{out_path}/OUTCAR_rSE')

        # ad_slab_ene_exx = find_energy(f'Data_original/DFT/04-Int_Ene/DFT_Compare/{surface}/{molecule}/07-RPA/AD_SLAB/OUT.EXX.run.encut.550.k.2', code_format='vasp')
        # slab_ene_exx = find_energy(f'Data_original/DFT/04-Int_Ene/DFT_Compare/{surface}/{molecule}/07-RPA/SLAB_FS/OUT.EXX.run.encut.550.k.2', code_format='vasp')
        # ad_ene_exx = find_energy(f'Data_original/DFT/04-Int_Ene/DFT_Compare/{surface}/{molecule}/07-RPA/AD_FS/OUT.EXX.run.encut.550.k.2', code_format='vasp')

        # ad_slab_ene_exx_rse = find_energy(f'Data_original/DFT/04-Int_Ene/DFT_Compare/{surface}/{molecule}/07-RPA/AD_SLAB/OUT.rSE.run.encut.550.k.2', code_format='vasp')
        # slab_ene_exx_rse = find_energy(f'Data_original/DFT/04-Int_Ene/DFT_Compare/{surface}/{molecule}/07-RPA/SLAB_FS/OUT.rSE.run.encut.550.k.2', code_format='vasp')
        # ad_ene_exx_rse = find_energy(f'Data_original/DFT/04-Int_Ene/DFT_Compare/{surface}/{molecule}/07-RPA/AD_FS/OUT.rSE.run.encut.550.k.2', code_format='vasp')


        # ad_slab_ene_rpac = find_energy(f'Data_original/DFT/04-Int_Ene/DFT_Compare/{surface}/{molecule}/07-RPA/AD_SLAB/OUT.RPA.run.encut.550.k.2', code_format='vasp_rpa')
        # slab_ene_rpac = find_energy(f'Data_original/DFT/04-Int_Ene/DFT_Compare/{surface}/{molecule}/07-RPA/SLAB_FS/OUT.RPA.run.encut.550.k.2', code_format='vasp_rpa')
        # ad_ene_rpac = find_energy(f'Data_original/DFT/04-Int_Ene/DFT_Compare/{surface}/{molecule}/07-RPA/AD_FS/OUT.RPA.run.encut.550.k.2', code_format='vasp_rpa')



    #     dft_xc_eint_dict[mol_surface]['07_RPA'] = (ad_slab_ene_exx - slab_ene_exx - ad_ene_exx)*1000 + (ad_slab_ene_rpac - slab_ene_rpac - ad_ene_rpac)*1000


    #     dft_xc_eint_dict[mol_surface]['08_RPA-rSE'] = (ad_slab_ene_exx_rse - slab_ene_exx_rse - ad_ene_exx_rse)*1000 + (ad_slab_ene_rpac - slab_ene_rpac - ad_ene_rpac)*1000
