In [1]:
# ase: adsorption site ensemble
def translate_ase_to_features(row):
    # Create a new DataFrame with the same columns as df_permut
    new_row = {}
    for m in ['Ag', 'Au', 'Cu', 'Pd', 'Pt']:
        new_row[f'M_{m}'] = 0
        new_row[f'B_{m}'] = 0
        new_row[f'S_{m}'] = 0

    for col in row.index:
        # if not string
        if type(row[col]) is not str:
            continue
        
        metal = row[col]
        if col.startswith('centre_'):
            new_row[f'M_{metal}'] += 1
        elif col.startswith('surf_'):
            new_row[f'S_{metal}'] += 1 
        elif col.startswith('bulk_'):
            new_row[f'B_{metal}'] += 1
        else:
            continue

    return new_row

# Define the dictionary for the adsorption sites, surface and bulk neighbors distinguished
# Also distinguished between neighbors with CN=6 (corner, "c") and CN=8 (edge, "e")
ads_site_ensm_dic = {
            'site_centralat_cn6': {
                'centre_c': None,
                'surf_e_1': None,
                'surf_e_2': None,
                'surf_e_3': None,
                'surf_e_4': None,
                'surf_e_5': None,
                'bulk_1': None,
            },
            'site_centralat_cn8': {
                'centre_e': None,
                'surf_e_1': None,
                'surf_e_2': None,
                'surf_e_3': None,
                'surf_e_4': None,
                'surf_c_5': None,
                'surf_c_6': None,
                'bulk_1': None,
                'bulk_2': None,
            },
}

In [2]:
import random
import pandas as pd

n_total = 10000
# There are 12 corner sites among the 42 Surface sites
n_corner = int( n_total * (12/42) )

# Define the ranges for the percentages of each metal
min_p = 0
max_p = 60
step = 5

list_adssiteens = []
for Ag_p in range(min_p, max_p+1, step):
    for Au_p in range(min_p, max_p-Ag_p+1, step):
                Cu_p = max_p - Ag_p - Au_p
                Pd_p = (100 - max_p) / 2
                Pt_p = (100 - max_p) / 2
                if 0 <= Cu_p <= max_p:
                    formulation = {'Ag': Ag_p, 'Au': Au_p, 'Cu': Cu_p, 'Pd': Pd_p, 'Pt': Pt_p}

                    m_rnd_ranges = {}
                    saved = 0
                    for m, perc in formulation.items():
                        m_rnd_ranges[m] = [saved, saved + perc/100]
                        saved += perc/100

                    for i in range(n_total):
                        if i < n_corner:
                            site_label = 'site_centralat_cn6'
                            site_ensemble = ads_site_ensm_dic[site_label].copy()

                            for site in site_ensemble.keys():
                                rnd_num = random.random()
                                for m in formulation.keys():
                                    if m_rnd_ranges[m][0] <= rnd_num < m_rnd_ranges[m][1]:
                                        site_ensemble[site] = m
                                        break

                        if i >= n_corner:
                            site_label = 'site_centralat_cn8'
                            site_ensemble = ads_site_ensm_dic[site_label].copy()
                            
                            for site in site_ensemble.keys():
                                rnd_num = random.random()
                                for m in formulation.keys():
                                    if m_rnd_ranges[m][0] <= rnd_num < m_rnd_ranges[m][1]:
                                        site_ensemble[site] = m
                                        break
                        for f in formulation.keys():
                            site_ensemble[f] = formulation[f]

                        list_adssiteens.append(site_ensemble)                

df_ase_pdpt_20 = pd.DataFrame(list_adssiteens)
column_order = ['Ag', 'Au', 'Cu', 'Pd', 'Pt', 'centre_c', 'centre_e', 'surf_e_1', 'surf_e_2', 'surf_e_3', 'surf_e_4', 'surf_e_5', 'surf_c_5', 'surf_c_6', 'bulk_1', 'bulk_2']
df_ase_pdpt_20 = df_ase_pdpt_20[column_order]

# Translate the previos dataframe to Features df
df_ase_pdpt_20_features = df_ase_pdpt_20[['Ag', 'Au', 'Cu', 'Pd', 'Pt']].copy()
_df = df_ase_pdpt_20.apply(translate_ase_to_features, axis=1)
_df = pd.DataFrame(_df.tolist())
# Order by bonding_*, neigh_*, total_*
df_ase_pdpt_20_features = pd.concat([df_ase_pdpt_20_features, _df], axis=1)

# Use the Features df to predict the new Ads E
#features_to_use = []
#for col in df_ase_pdpt_20_features.columns:
#    if 'bonding_' in col or 'neigh_' in col:
#        features_to_use.append(col)

#df_ase_pdpt_20_features['E_ads'] = model.predict(df_ase_pdpt_20_features[X_train.columns])

# Define the composition columns
#composition_cols = ['Ag', 'Au', 'Cu', 'Pd', 'Pt']

# Create a boolean mask for E_ads between -0.6 and -0.4
#mask = df_ase_pdpt_20_features['E_ads'].between(-0.6, -0.4)

# Group by the full composition tuple and compute the percentage in the range
#summary_df = (
#    df_ase_pdpt_20_features
#    .groupby(composition_cols)
#    .apply(lambda grp: grp.loc[mask, 'E_ads'].count() / len(grp) * 100)
#    .reset_index(name='Percentage E_ads in [-0.6, -0.4] (%)')
#)

#summary_df

In [3]:
feat_col = []
for col in df_ase_pdpt_20_features.columns:
    if col in ['Au', 'Ag', 'Cu', 'Pd', 'Pt']:
        feat_col.append(col)
    if 'M_' in col:
        feat_col.append(col)

feat_col = sorted(feat_col)

df_ase_pdpt_20_features = df_ase_pdpt_20_features[['Ag', 'Au', 'Cu', 'Pd', 'Pt', 
                                                   'M_Ag', 'M_Au', 'M_Cu', 'M_Pd', 'M_Pt', 
                                                   'S_Ag', 'S_Au', 'S_Cu', 'S_Pd', 'S_Pt',
                                                   'B_Ag', 'B_Au', 'B_Cu', 'B_Pd', 'B_Pt',
                                                   ]]
df_ase_pdpt_20_features

Unnamed: 0,Ag,Au,Cu,Pd,Pt,M_Ag,M_Au,M_Cu,M_Pd,M_Pt,S_Ag,S_Au,S_Cu,S_Pd,S_Pt,B_Ag,B_Au,B_Cu,B_Pd,B_Pt
0,0,0,60,20.0,20.0,0,0,1,0,0,0,0,3,2,0,0,0,1,0,0
1,0,0,60,20.0,20.0,0,0,0,0,1,0,0,3,0,2,0,0,0,0,1
2,0,0,60,20.0,20.0,0,0,1,0,0,0,0,2,1,2,0,0,0,1,0
3,0,0,60,20.0,20.0,0,0,1,0,0,0,0,2,2,1,0,0,1,0,0
4,0,0,60,20.0,20.0,0,0,1,0,0,0,0,3,1,1,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
909995,60,0,0,20.0,20.0,0,0,0,1,0,3,0,0,2,1,0,0,0,0,2
909996,60,0,0,20.0,20.0,0,0,0,0,1,4,0,0,2,0,2,0,0,0,0
909997,60,0,0,20.0,20.0,1,0,0,0,0,3,0,0,0,3,1,0,0,1,0
909998,60,0,0,20.0,20.0,1,0,0,0,0,3,0,0,3,0,2,0,0,0,0


In [4]:
df_ase_pdpt_20_features.to_pickle('monte-carlo_motifs_Pd_Pt_20.pkl')