# Get all active sites for slabs
---

Analyze slabs for active sites

# Import Modules

In [1]:
import os
print(os.getcwd())
import sys

import pickle


import numpy as np
import pandas as pd

from ase import io

# # from tqdm import tqdm
from tqdm.notebook import tqdm

# # #########################################################
from proj_data import metal_atom_symbol

# #########################################################
from methods import (
    get_df_slab,
    get_structure_coord_df,
    get_df_coord,
    get_df_active_sites,
    )

# #########################################################
from local_methods import (
    mean_O_metal_coord,
    get_all_active_sites,
    get_unique_active_sites,
    get_unique_active_sites_temp,
    )

/mnt/f/Dropbox/01_norskov/00_git_repos/PROJ_IrOx_OER/workflow/enumerate_adsorption


# Read Data

In [2]:
# #########################################################
df_slab = get_df_slab()
df_slab = df_slab.set_index("slab_id")

# #########################################################
df_active_sites_prev = get_df_active_sites()

if df_active_sites_prev is None:
    df_active_sites_prev = pd.DataFrame()

# Create Directories

In [3]:
directory = "out_data"
if not os.path.exists(directory):
    os.makedirs(directory)

In [4]:
slab_ids_to_proc = []
for slab_id_i, row_i in df_slab.iterrows():
    if slab_id_i not in df_active_sites_prev.index:
        slab_ids_to_proc.append(slab_id_i)

In [5]:
df_slab_i = df_slab.loc[
    slab_ids_to_proc
    ]

df_slab_i = df_slab_i[df_slab_i.phase == 2]

In [6]:
# #########################################################
data_dict_list = []
# #########################################################
iterator = tqdm(df_slab_i.index, desc="1st loop")
for i_cnt, slab_id in enumerate(iterator):
    print(i_cnt, slab_id)
    # #####################################################
    data_dict_i = dict()
    # #####################################################
    row_i = df_slab.loc[slab_id]
    # #####################################################
    slab = row_i.slab_final
    slab_id = row_i.name
    bulk_id = row_i.bulk_id
    facet = row_i.facet
    num_atoms = row_i.num_atoms
    # #####################################################

    # #################################################
    df_coord_slab_i = get_df_coord(
        slab_id=slab_id,
        mode="slab",
        slab=slab,
        )

    # #################################################
    active_sites = get_all_active_sites(
        slab=slab,
        slab_id=slab_id,
        bulk_id=bulk_id,
        df_coord_slab_i=df_coord_slab_i,
        )

    # #################################################
    # active_sites_unique = get_unique_active_sites(
    active_sites_unique = get_unique_active_sites_temp(
        slab=slab,
        active_sites=active_sites,
        bulk_id=bulk_id,
        slab_id=slab_id,
        facet=facet,
        metal_atom_symbol=metal_atom_symbol,
        df_coord_slab_i=df_coord_slab_i,
        create_heatmap_plot=True,
        )


    # #################################################
    data_dict_i["active_sites"] = active_sites
    data_dict_i["num_active_sites"] = len(active_sites)
    data_dict_i["active_sites_unique"] = active_sites_unique
    data_dict_i["num_active_sites_unique"] = len(active_sites_unique)
    data_dict_i["slab_id"] = slab_id
    data_dict_i["bulk_id"] = bulk_id
    data_dict_i["facet"] = facet
    data_dict_i["num_atoms"] = num_atoms
    # #####################################################
    data_dict_list.append(data_dict_i)


# #########################################################
df_active_sites = pd.DataFrame(data_dict_list)
df_active_sites = df_active_sites.set_index("slab_id", drop=False)

df_active_sites = df_active_sites = pd.concat([
    df_active_sites,
    df_active_sites_prev,
    ])

HBox(children=(FloatProgress(value=0.0, description='1st loop', max=157.0, style=ProgressStyle(description_wid…

0 patowami_63
rdf_heat_maps_1/nu7hbg6rnt__001__patowami_63_rdf_diff_heat_map
1 bigawati_47
rdf_heat_maps_1/nu7hbg6rnt__1-10__bigawati_47_rdf_diff_heat_map
2 pafosope_20
rdf_heat_maps_1/73nhvjxyxf__002__pafosope_20_rdf_diff_heat_map
3 dugihabe_70
rdf_heat_maps_1/73nhvjxyxf__020__dugihabe_70_rdf_diff_heat_map
4 momaposi_60
rdf_heat_maps_1/73nhvjxyxf__111__momaposi_60_rdf_diff_heat_map
5 diwarise_06
rdf_heat_maps_1/73nhvjxyxf__100__diwarise_06_rdf_diff_heat_map
6 pisewoku_94
rdf_heat_maps_1/nd919pnr6q__100__pisewoku_94_rdf_diff_heat_map
7 romudini_21
rdf_heat_maps_1/nd919pnr6q__2-21__romudini_21_rdf_diff_heat_map
8 bofahisa_20
rdf_heat_maps_1/nd919pnr6q__002__bofahisa_20_rdf_diff_heat_map
9 likeniri_51
rdf_heat_maps_1/nd919pnr6q__111__likeniri_51_rdf_diff_heat_map
10 sugohule_17
rdf_heat_maps_1/nd919pnr6q__010__sugohule_17_rdf_diff_heat_map
11 kopomemu_66
rdf_heat_maps_1/zo9p9en4n3__110__kopomemu_66_rdf_diff_heat_map
12 bihetofu_24
rdf_heat_maps_1/zo9p9en4n3__011__bihetofu_24_rdf_diff_hea

# Post-process active site dataframe

In [7]:
from misc_modules.pandas_methods import reorder_df_columns

columns_list = [
    'bulk_id',
    'slab_id',
    'facet',
    'num_atoms',
    'num_active_sites',
    'active_sites',
    ]

df_active_sites = reorder_df_columns(columns_list, df_active_sites)

# Combining previous `df_active_sites` and the rows processed during current run

In [8]:
# df_active_sites = df_active_sites = pd.concat([
#     df_active_sites,
#     df_active_sites_prev,
#     ])

# Summary of data objects

In [9]:
print(
    "Number of active sites:",
    df_active_sites.num_active_sites.sum())
print(
    "Number of unique active sites",
    df_active_sites.num_active_sites_unique.sum())

Number of active sites: 3708
Number of unique active sites 1674


In [10]:
# Pickling data ###########################################
directory = "out_data"
if not os.path.exists(directory): os.makedirs(directory)
with open(os.path.join(directory, "df_active_sites.pickle"), "wb") as fle:
    pickle.dump(df_active_sites, fle)
# #########################################################

In [11]:
print(df_active_sites.shape)

(503, 8)


In [12]:
assert False

AssertionError: 

In [None]:
df_rdf_ij_dict = dict()
for i_cnt, row_i in df_active_sites.iterrows():
    file_name_i = row_i.bulk_id + "__" + row_i.facet + \
        "__" + row_i.slab_id + ".pickle"
    path_i = os.path.join(
        "out_data/df_rdf_ij", file_name_i)

    # #########################################################
    import pickle; import os
    with open(path_i, "rb") as fle:
        df_rdf_ij_i = pickle.load(fle)
    # #########################################################

    df_rdf_ij_dict[row_i.slab_id] = df_rdf_ij_i

In [None]:
rdf_ij_list = [i for i in df_rdf_ij_i.values.flatten() if i != 0.]