<a href="https://colab.research.google.com/github/ZiqiTang427/HEA4HER/blob/main/HEA_descriptor_and_screening_.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
import itertools

def calculate_comprehensive_hea_parameters():
    """
    Reads comprehensive elemental properties, generates 5-element HEA combinations,
    calculates an extensive set of parameters including geometric mean for diffusivity,
    filters the results, predicts structure, and saves the final analysis to a CSV file.
    """
    # --- USER CONTROLS --- 'Si', 'C', 'B','Tc','Ru','Rh','Pd','Ag','Re','Os','Ir','Pt','Au', 'Ag', 'Au'
    elements_to_exclude = ['Si']
    max_delta = 8.5
    input_filename = 'HEA_Elemental_Properties.csv'
    # --- END OF CONTROLS ---

    try:
        df_props = pd.read_csv(input_filename)
        print(f"‚úÖ Successfully loaded data from '{input_filename}'.")
    except FileNotFoundError:
        print(f"‚ùå ERROR: FILE NOT FOUND: '{input_filename}'")
        print("Please ensure the file has been uploaded to this session before running.")
        return

    # --- Data Cleaning and Preparation ---
    df_props.columns = df_props.columns.str.strip()
    df_props.set_index('Element', inplace=True)

    numeric_cols = [
        'AtomicRadius_pm', 'MeltingTemp_K', 'Electronegativity', 'VEC',
        'H-Binding energy', 'Work Function', 'E',
        'OH-Binding energy', 'D_H' # New columns
    ]

    print("üîé Cleaning and preparing data...")
    for col in numeric_cols:
        if col in df_props.columns:
            if not pd.api.types.is_numeric_dtype(df_props[col]):
                df_props[col] = pd.to_numeric(df_props[col], errors='coerce')
        else:
            print(f"‚ö†Ô∏è Warning: Column '{col}' not found. It will be skipped.")
            df_props[col] = np.nan

    essential_cols = ['AtomicRadius_pm', 'MeltingTemp_K', 'Electronegativity', 'VEC']
    df_props.dropna(subset=essential_cols, inplace=True)

    R = 8.314

    # --- Combination Generation ---
    available_elements = df_props.index.tolist()
    elements_for_combination = [elem for elem in available_elements if elem not in elements_to_exclude]
    print(f"üî¨ Using {len(elements_for_combination)} elements for alloy generation.")

    element_combinations = list(itertools.combinations(elements_for_combination, 5))
    print(f"Found {len(element_combinations):,} possible combinations to analyze.")

    # --- Calculation Loop ---
    results_list = []
    print("üß† Calculating parameters and applying filters...")
    for combo in element_combinations:
        combo_props = df_props.loc[list(combo)]
        c_i = 1 / 5.0

        r_bar = (combo_props['AtomicRadius_pm'] * c_i).sum()
        delta = np.sqrt((c_i * (1 - combo_props['AtomicRadius_pm'] / r_bar)**2).sum()) * 100

        if delta <= max_delta:
            vec = (combo_props['VEC'] * c_i).sum()

            # --- NEW: OH-Binding Energy Calculation ---
            oh_binding_energy_avg = combo_props['OH-Binding energy'].mean()

            # --- NEW: Hydrogen Diffusivity (Geometric Mean) ---
            d_h_values = combo_props['D_H']
            d_h_geometric_mean = np.nan
            # Ensure all values are positive before taking the log to avoid errors
            if (d_h_values > 0).all():
                log_d_h_sum = np.log(d_h_values).sum()
                d_h_geometric_mean = np.exp(c_i * log_d_h_sum)

            # Other pre-existing calculations
            delta_chi = np.sqrt((c_i * (combo_props['Electronegativity'] - (combo_props['Electronegativity'] * c_i).sum())**2).sum())
            wf_mean = combo_props['Work Function'].mean()
            wf_std_dev = combo_props['Work Function'].std()
            wf_min = combo_props['Work Function'].min()
            wf_max = combo_props['Work Function'].max()
            electrode_potential_avg = combo_props['E'].mean()
            h_binding_energy_avg = combo_props['H-Binding energy'].mean()
            delta_s_mix = R * np.log(5)
            omega_numerator = (combo_props['MeltingTemp_K'] * c_i).sum() * delta_s_mix
            structure = 'BCC' if vec < 6.87 else ('FCC' if vec > 8 else 'BCC+FCC')

            results_list.append({
                'Alloy': ''.join(sorted(combo)),
                'Predicted_Structure': structure,
                'D_H_Geometric_Mean': d_h_geometric_mean,
                'OH_Binding_Energy_Avg_eV': oh_binding_energy_avg,
                'Work_Function_Mean_eV': wf_mean,
                'Work_Function_StdDev': wf_std_dev,
                'Work_Function_Min_eV': wf_min,
                'Work_Function_Max_eV': wf_max,
                'Electrode_Potential_Avg_V': electrode_potential_avg,
                'H-Binding_Energy_eV': h_binding_energy_avg,
                'VEC': vec,
                'delta (%)': delta,
                'delta_chi': delta_chi,
            })

    # --- Save Results ---
    df_results = pd.DataFrame(results_list)
    output_filename = 'HEA_Comprehensive_Analysis.csv'
    df_results.to_csv(output_filename, index=False)

    print("\n" + "="*50)
    print(f"üéâ Analysis Complete! üéâ")
    print(f"Found {len(df_results):,} alloys that meet the criteria.")
    print(f"Results saved to '{output_filename}'")
    print("="*50)

# Run the entire analysis
calculate_comprehensive_hea_parameters()

‚úÖ Successfully loaded data from 'HEA_Elemental_Properties.csv'.
üîé Cleaning and preparing data...
üî¨ Using 39 elements for alloy generation.
Found 575,757 possible combinations to analyze.
üß† Calculating parameters and applying filters...

üéâ Analysis Complete! üéâ
Found 175,721 alloys that meet the criteria.
Results saved to 'HEA_Comprehensive_Analysis.csv'


In [2]:
atomic_no = {
    "H":1, "He":2, "Li":3, "Be":4, "B":5, "C":6, "N":7, "O":8, "F":9, "Ne":10, "Na":11, "Mg":12, "Al":13, "Si":14, "P":15, "S":16, "Cl":17, "Ar":18, "K":19, "Ca":20,
    "Sc":21, "Ti":22, "V":23, "Cr":24, "Mn":25, "Fe":26, "Co":27, "Ni":28, "Cu":29, "Zn":30, "Ga":31, "Ge":32, "As":33, "Se":34, "Br":35, "Kr":36, "Rb":37, "Sr":38,
    "Y":39, "Zr":40, "Nb":41, "Mo":42, "Tc":43, "Ru":44, "Rh":45, "Pd":46, "Ag":47, "Cd":48, "In":49, "Sn":50, "Sb":51, "Te":52, "I":53, "Xe":54, "Cs":55, "Ba":56,
    "La":57, "Ce":58, "Pr":59, "Nd":60, "Pm":61, "Sm":62, "Eu":63, "Gd":64, "Tb":65, "Dy":66, "Ho":67, "Er":68, "Tm":69, "Yb":70, "Lu":71, "Hf":72, "Ta":73, "W":74,
    "Re":75, "Os":76, "Ir":77, "Pt":78, "Au":79, "Hg":80, "Tl":81, "Pb":82, "Bi":83, "Po":84, "At":85, "Rn":86, "Fr":87, "Ra":88, "Ac":89, "Th":90, "Pa":91, "U":92,
    "Np":93, "Pu":94, "Am":95, "Cm":96, "Bk":97, "Cf":98, "Es":99, "Fm":100, "Md":101, "No":102, "Lr":103
    }

group_no = {
    "H":1, "He":18, "Li":1, "Be":2, "B":13, "C":14, "N":15, "O":16, "F":17, "Ne":18, "Na":1, "Mg":2, "Al":13, "Si":14, "P":15, "S":16, "Cl":17, "Ar":18, "K":1, "Ca":2,
    "Sc":3, "Ti":4, "V":5, "Cr":6, "Mn":7, "Fe":8, "Co":9, "Ni":10, "Cu":11, "Zn":12, "Ga":13, "Ge":14, "As":15, "Se":16, "Br":17, "Kr":18, "Rb":1, "Sr":2,
    "Y":3, "Zr":4, "Nb":5, "Mo":6, "Tc":7, "Ru":8, "Rh":9, "Pd":10, "Ag":11, "Cd":12, "In":13, "Sn":14, "Sb":15, "Te":16, "I":17, "Xe":18, "Cs":1, "Ba":2,
    "La":3, "Ce":4, "Pr":5, "Nd":6, "Pm":7, "Sm":8, "Eu":9, "Gd":10, "Tb":11, "Dy":12, "Ho":13, "Er":14, "Tm":15, "Yb":16, "Lu":17, "Hf":4, "Ta":5, "W":6,
    "Re":7, "Os":8, "Ir":9, "Pt":10, "Au":11, "Hg":12, "Tl":13, "Pb":14, "Bi":15, "Po":16, "At":17, "Rn":18, "Fr":1, "Ra":2, "Ac":3, "Th":4, "Pa":5, "U":6,
    "Np":7, "Pu":8, "Am":9, "Cm":10, "Bk":11, "Cf":12, "Es":13, "Fm":14, "Md":15, "No":16, "Lr":17
    }


valence = {
    "H":1, "He":2, "Li":1, "Be":2, "B":3, "C":4, "N":5, "O":6, "F":7, "Ne":8, "Na":1, "Mg":2, "Al":3, "Si":4, "P":5, "S":6, "Cl":7, "Ar":8, "K":1, "Ca":2,
    "Sc":2, "Ti":2, "V":2, "Cr":1, "Mn":2, "Fe":2, "Co":2, "Ni":2, "Cu":1, "Zn":2, "Ga":3, "Ge":4, "As":5, "Se":6, "Br":7, "Kr":8, "Rb":1, "Sr":2,
    "Y":2, "Zr":2, "Nb":1, "Mo":1, "Tc":2, "Ru":1, "Rh":1, "Pd":9, "Ag":1, "Cd":2, "In":3, "Sn":4, "Sb":5, "Te":6, "I":7, "Xe":8, "Cs":1, "Ba":2,
    "La":2, "Ce":2, "Pr":2, "Nd":2, "Pm":2, "Sm":2, "Eu":2, "Gd":2, "Tb":2, "Dy":2, "Ho":2, "Er":2, "Tm":2, "Yb":2, "Lu":2, "Hf":2, "Ta":2, "W":2,
    "Re":2, "Os":2, "Ir":2, "Pt":1, "Au":1, "Hg":2, "Tl":3, "Pb":4, "Bi":5, "Po":6, "At":7, "Rn":8, "Fr":1, "Ra":2, "Ac":2, "Th":2, "Pa":2, "U":2,
    "Np":2, "Pu":2, "Am":2, "Cm":2, "Bk":2, "Cf":2, "Es":2, "Fm":2, "Md":2, "No":2, "Lr":3
    }


electronegativity = {
    'H':2.2, 'He':'n.a', 'Li':0.98, 'Be':   1.57, 'B':  2.04, 'C':2.55,'N': 3.04,'O':   3.44,'F':   3.98,'Ne':  "n.a",'Na': 0.93,'Mg':  1.31,'Al':  1.61,'Si':  1.9,
    'P':    2.19,'S':   2.58,'Cl':  3.16,'Ar':  "n.a", 'K': 0.82, 'Ca': 1, 'Sc':    1.36, 'Ti': 1.54, 'V':  1.63,'Cr':  1.66,'Mn':  1.55,'Fe':  1.83,'Co':  1.88,'Ni':  1.91,
    'Cu':   1.9, 'Zn':  1.65, 'Ga': 1.81, 'Ge': 2.01,'As':  2.18,'Se':  2.55,'Br':  2.96,'Kr':  "n.a",'Rb': 0.82,'Sr':  0.95,'Y':   1.22,'Zr':  1.33,'Nb':  1.6,'Mo':   2.16,
    'Tc':   2.1,'Ru':   2.2,'Rh':   2.28,'Pd':  2.2,'Ag':   1.93,'Cd':  1.69,'In':  1.78,'Sn':  1.96,'Sb':  2.05,'Te':  2.1,'I':    2.66,'Xe':  2.6,'Cs':   0.79,'Ba':  0.89,
    'La':   1.1,'Ce':   1.12,'Pr':  1.13,'Nd':  1.14,'Pm':  1.13,'Sm':  1.17,'Eu':  1.2,'Gd':   1.2,'Tb':   1.1,'Dy':   1.22,'Ho':  1.23,'Er':  1.24,'Tm':  1.25,'Yb':  1.1,
    'Lu':   1,'Hf': 1.3,'Ta':   1.5,'W':    1.7,'Re':   1.9,'Os':   2.2,'Ir':   2.2,'Pt':   2.2,'Au':   2.4,'Hg':   1.9,'Tl':   1.8,'Pb':   1.8,'Bi':   1.9,'Po':   2,'At': 2.2,'Rn':   2.2,'Fr':   0.7,
    'Ra':   0.9,'Ac':   1.1,'Th':   1.3,'Pa':   1.5,'U':    1.7,'Np':   1.3,'Pu':   1.3,'Am':   1.3,'Cm':   1.3,'Bk':   1.3,'Cf':   1.3,'Es':   1.3,'Fm':   1.3,'Md':   1.3,'No':   1.3,'Lr':   1.3
    }

covalent_radius = {
    'H':    0.31,'He':  0.28,'Li':  1.28,'Be':  0.96,'B':   0.84,'C':   0.76,'N':   0.71,'O':   0.66,'F':   0.57,'Ne':  0.58,'Na':  1.66,'Mg':  1.41,'Al':  1.21,'Si':  1.11,'P':   1.07,
    'S':    1.05,'Cl':  1.02,'Ar':  1.06,'K':   2.03,'Ca':  1.76,'Sc':  1.7,'Ti':   1.6,'V':    1.53,'Cr':  1.39,'Mn':  1.39,'Fe':  1.32,'Co':  1.26,'Ni':  1.24,'Cu':  1.32,'Zn':  1.22,
    'Ga':   1.22,'Ge':  1.2,'As':   1.19,'Se':  1.2,'Br':   1.2,'Kr':   1.16,'Rb':  2.2,'Sr':   1.95,'Y':   1.9,'Zr':   1.75,'Nb':  1.64,'Mo':  1.54,'Tc':  1.47,'Ru':  1.46,'Rh':  1.42,
    'Pd':   1.39,'Ag':  1.45,'Cd':  1.44,'In':  1.42,'Sn':  1.39,'Sb':  1.39,'Te':  1.38,'I':   1.39,'Xe':  1.4,'Cs':   2.44,'Ba':  2.15,'La':  2.07,'Ce':  2.04,'Pr':  2.03,'Nd':  2.01,
    'Pm':   1.99,'Sm':  1.98,'Eu':  1.98,'Gd':  1.96,'Tb':  1.94,'Dy':  1.92,'Ho':  1.92,'Er':  1.89,'Tm':  1.9,'Yb':   1.87,'Lu':  1.87,'Hf':  1.75,'Ta':  1.7,'W':    1.62,'Re':  1.51,
    'Os':   1.44,'Ir':  1.41,'Pt':  1.36,'Au':  1.36,'Hg':  1.32,'Tl':  1.45,'Pb':  1.46,'Bi':  1.48,'Po':  1.4,'At':   1.5,'Rn':   1.5,'Fr':   2.6,'Ra':   2.21,'Ac':  2.15,'Th':  2.06,
    'Pa':   2,'U':  1.96,'Np':  1.9,'Pu':   1.87,'Am':  1.8,'Cm':   1.69,'Bk':  'n.a','Cf': 'n.a','Es': 'n.a','Fm': 'n.a','Md': 'n.a','No': 'n.a','Lr': 'n.a'
    }

ionization_energy = {
    'H':    13.59844,'He':  24.58741,'Li':  5.39172,'Be':   9.3227,'B': 8.29803,'C':    11.2603,'N':    14.53414,'O':   13.61806,'F':   17.42282,'Ne':  21.5646,'Na':   5.13908,'Mg':   7.64624,
    'Al':   5.98577,'Si':   8.15169,'P':    10.48669,'S':   10.36001,'Cl':  12.96764,'Ar':  15.75962,'K':   4.34066,'Ca':   6.11316,'Sc':   6.5615,'Ti':    6.8281,'V': 6.7462,'Cr':    6.7665,
    'Mn':   7.43402,'Fe':   7.9024,'Co':    7.881,'Ni': 7.6398,'Cu':    7.72638,'Zn':   9.3942,'Ga':    5.9993,'Ge':    7.8994,'As':    9.7886,'Se':    9.75238,'Br':   11.81381,'Kr':  13.99961,
    'Rb':   4.17713,'Sr':   5.6949,'Y': 6.2171,'Zr':    6.6339,'Nb':    6.75885,'Mo':   7.09243,'Tc':   7.28,'Ru':  7.3605,'Rh':    7.4589,'Pd':    8.3369,'Ag':    7.5762,'Cd':    8.9938,'In':    5.78636,
    'Sn':   7.3439,'Sb':    8.6084,'Te':    9.0096,'I': 10.45126,'Xe':  12.1298,'Cs':   3.8939,'Ba':    5.2117,'La':    5.5769,'Ce':    5.5387,'Pr':    5.473,'Nd': 5.525,'Pm': 5.582,'Sm': 5.6436,
    'Eu':   5.6704,'Gd':    6.1501,'Tb':    5.8638,'Dy':    5.9389,'Ho':    6.0215,'Er':    6.1077,'Tm':    6.18431,'Yb':   6.25416,'Lu':   5.4259,'Hf':    6.82507,'Ta':   7.5496,'W': 7.864,'Re': 7.8335,
    'Os':   8.4382,'Ir':    8.967,'Pt': 8.9587,'Au':    9.2255,'Hg':    10.4375,'Tl':   6.1082,'Pb':    7.41666,'Bi':   7.2856,'Po':    8.417,'At': "n.a",'Rn': 10.7485,'Fr':   4.0727,'Ra':    5.2784,
    'Ac':   5.17,'Th':  6.3067,'Pa':    5.89,'U':   6.19405,'Np':   6.2657,'Pu':    6.0262,'Am':    5.9738,'Cm':    5.9915,'Bk':    6.1979,'Cf':    6.2817,'Es':    6.42,'Fm':  6.5,'Md':   6.58,'No':  6.65,
    'Lr':   4.9
    }



polarizability = {
    'H':    0.666793,'He':  0.204956,'Li':  24.3,'Be':  5.6,'B':    3.03,'C':   1.76,'N':   1.1,'O':    0.802,'F':  0.557,'Ne': 0.3956,'Na':    24.11,'Mg': 10.6,'Al':  6.8,'Si':   5.38,'P':   3.63,
    'S':    2.9,'Cl':   2.18,'Ar':  1.6411,'K': 43.4,'Ca':  22.8,'Sc':  17.8,'Ti':  14.6,'V':   12.4,'Cr':  11.6,'Mn':  9.4,'Fe':   8.4,'Co':   7.5,'Ni':   6.8,'Cu':   6.2,'Zn':   5.75,'Ga':  8.12,
    'Ge':   6.07,'As':  4.31,'Se':  3.77,'Br':  3.05,'Kr':  2.4844,'Rb':    47.3,'Sr':  27.6,'Y':   22.7,'Zr':  17.9,'Nb':  15.7,'Mo':  12.8,'Tc':  11.4,'Ru':  9.6,'Rh':   8.6,'Pd':   4.8,
    'Ag':   7.2,'Cd':   7.36,'In':  10.2,'Sn':  7.7,'Sb':   6.6,'Te':   5.5,'I':    5.35,'Xe':  4.044,'Cs': 59.42,'Ba': 39.7,'La':  31.1,'Ce':  29.6,'Pr':  28.2,'Nd':  31.4,'Pm':  30.1,
    'Sm':   28.8,'Eu':  27.7,'Gd':  23.5,'Tb':  25.5,'Dy':  24.5,'Ho':  23.6,'Er':  22.7,'Tm':  21.8,'Yb':  21,'Lu':    21.9,'Hf':  16.2,'Ta':  13.1,'W':   11.1,'Re':  9.7,'Os':   8.5,
    'Ir':   7.6,'Pt':   6.5,'Au':   5.8,'Hg':   5.02,'Tl':  7.6,'Pb':   6.8,'Bi':   7.4,'Po':   6.8,'At':   6,'Rn': 5.3,'Fr':   47.1,'Ra':  38.3,'Ac':  32.1,'Th':  32.1,'Pa':  25.4,'U':   24.9,'Np':  24.8,
    'Pu':   24.5,'Am':  23.3,'Cm':  23,'Bk':    22.7,'Cf':  20.5,'Es':  19.7,'Fm':  23.8,'Md':  18.2,'No':  17.5,'Lr':  "n.a."
    }


In [3]:
import re
import pandas as pd

df_analysis = pd.read_csv('/content/HEA_Comprehensive_Analysis.csv')

# Function to split the alloy string into a list of elements
def split_alloy(alloy_string):
  elements = []
  i = 0
  while i < len(alloy_string):
    found = False
    # Try to match two characters first
    if i + 1 < len(alloy_string) and alloy_string[i:i+2] in atomic_no:
      elements.append(alloy_string[i:i+2])
      i += 2
      found = True
    # If no two-character element found, try one character
    elif alloy_string[i] in atomic_no:
      elements.append(alloy_string[i])
      i += 1
      found = True
    # If no element found, move to the next character (shouldn't happen with valid element strings)
    if not found:
        i += 1
  return elements


# Apply the function to the 'Alloy' column
df_analysis['Elements'] = df_analysis['Alloy'].apply(split_alloy)

display(df_analysis.head())

Unnamed: 0,Alloy,Predicted_Structure,D_H_Geometric_Mean,OH_Binding_Energy_Avg_eV,Work_Function_Mean_eV,Work_Function_StdDev,Work_Function_Min_eV,Work_Function_Max_eV,Electrode_Potential_Avg_V,H-Binding_Energy_eV,VEC,delta (%),delta_chi,Elements
0,MnScTiVZn,BCC,1.37973e-10,-0.006,4.1,0.347059,3.5,4.33,-0.8764,0.046,6.2,8.026352,0.102489,"[Mn, Sc, Ti, V, Zn]"
1,MnNbScTiV,BCC,1.37973e-09,-0.18,4.106,0.35097,3.5,4.33,-0.9438,-0.154,4.8,7.888414,0.093936,"[Mn, Nb, Sc, Ti, V]"
2,MnMoScTiV,BCC,8.705506e-10,-0.036,4.178,0.428976,3.5,4.66,-0.764,-0.114,5.0,8.257422,0.270806,"[Mn, Mo, Sc, Ti, V]"
3,MnRhScTiV,BCC,1.201124e-10,0.27,4.242,0.530679,3.5,4.98,-0.5724,-0.054,5.6,8.428613,0.316569,"[Mn, Rh, Sc, Ti, V]"
4,MnPdScTiV,BCC,2.186724e-10,0.376,4.328,0.690847,3.5,5.41,-0.534,-0.054,5.8,8.159554,0.285979,"[Mn, Pd, Sc, Ti, V]"


In [4]:
# Define the properties to calculate mean and standard deviation for
properties = ['atomic_no', 'group_no', 'valence', 'electronegativity', 'covalent_radius', 'ionization_energy', 'polarizability']

# Create new columns for mean and standard deviation of each property
for prop in properties:
    df_analysis[f'{prop}_mean'] = df_analysis['Elements'].apply(lambda elements: pd.Series([
        globals()[prop][el] for el in elements
        if el in globals()[prop] and pd.notna(globals()[prop][el])
    ]).mean())
    df_analysis[f'{prop}_sigma'] = df_analysis['Elements'].apply(lambda elements: pd.Series([
        globals()[prop][el] for el in elements
        if el in globals()[prop] and pd.notna(globals()[prop][el])
    ]).std())

display(df_analysis.head())

Unnamed: 0,Alloy,Predicted_Structure,D_H_Geometric_Mean,OH_Binding_Energy_Avg_eV,Work_Function_Mean_eV,Work_Function_StdDev,Work_Function_Min_eV,Work_Function_Max_eV,Electrode_Potential_Avg_V,H-Binding_Energy_eV,...,valence_mean,valence_sigma,electronegativity_mean,electronegativity_sigma,covalent_radius_mean,covalent_radius_sigma,ionization_energy_mean,ionization_energy_sigma,polarizability_mean,polarizability_sigma
0,MnScTiVZn,BCC,1.37973e-10,-0.006,4.1,0.347059,3.5,4.33,-0.8764,0.046,...,2.0,0.0,1.546,0.114586,1.488,0.187537,7.392804,1.165691,11.99,4.647096
1,MnNbScTiV,BCC,1.37973e-09,-0.18,4.106,0.35097,3.5,4.33,-0.9438,-0.154,...,1.8,0.447214,1.536,0.105024,1.572,0.119038,6.865734,0.332655,13.98,3.217452
2,MnMoScTiV,BCC,8.705506e-10,-0.036,4.178,0.428976,3.5,4.66,-0.764,-0.114,...,1.8,0.447214,1.648,0.302771,1.552,0.113004,6.93245,0.339245,13.4,3.088689
3,MnRhScTiV,BCC,1.201124e-10,0.27,4.242,0.530679,3.5,4.98,-0.5724,-0.054,...,1.8,0.447214,1.672,0.353935,1.528,0.127945,7.005744,0.413837,12.56,3.785234
4,MnPdScTiV,BCC,2.186724e-10,0.376,4.328,0.690847,3.5,5.41,-0.534,-0.054,...,3.4,3.130495,1.656,0.319734,1.522,0.134796,7.181344,0.724136,11.8,4.973932


In [6]:
import pandas as pd

# Load the new data from HEA literature data.csv
df_literature = pd.read_csv('/content/HEA literature data.csv')

# Apply the split_alloy function to the 'Alloy Composition' column in the new DataFrame
df_literature['Elements'] = df_literature['Alloy Composition'].apply(globals().get('split_alloy'))



In [7]:
# Create a sorted string representation of the elements for merging
df_literature['Sorted_Elements'] = df_literature['Elements'].apply(lambda x: ''.join(sorted(x)))
df_analysis['Sorted_Elements'] = df_analysis['Elements'].apply(lambda x: ''.join(sorted(x)))

# Merge the two dataframes based on the sorted elements
df_merged = pd.merge(df_literature, df_analysis, left_on='Sorted_Elements', right_on='Sorted_Elements', how='inner')

# Drop the auxiliary columns
df_merged = df_merged.drop(columns=['Sorted_Elements', 'Elements_x', 'Elements_y'])

# Display the first few rows of the merged dataframe
display(df_merged.head())

Unnamed: 0,Alloy Composition,Tafel slope,Overpotential,Alloy,Predicted_Structure,D_H_Geometric_Mean,OH_Binding_Energy_Avg_eV,Work_Function_Mean_eV,Work_Function_StdDev,Work_Function_Min_eV,...,valence_mean,valence_sigma,electronegativity_mean,electronegativity_sigma,covalent_radius_mean,covalent_radius_sigma,ionization_energy_mean,ionization_energy_sigma,polarizability_mean,polarizability_sigma
0,PtCoNiRuIr,34.2,18.0,CoIrNiPtRu,FCC,3.017088e-13,1.894,5.242,0.412698,4.71,...,1.6,0.547723,2.078,0.167392,1.346,0.094763,8.1614,0.754455,7.6,1.210372
1,FeCoNiCuMo,55.78,37.590769,CoCuFeMoNi,FCC,2.511886e-12,1.694,4.884,0.216979,4.66,...,1.6,0.547723,1.936,0.128957,1.336,0.119499,7.648402,0.329346,8.34,2.6245
2,IrPdPtRhRu,31.0,16.0,IrPdPtRhRu,FCC,1.04564e-12,1.85,5.28,0.421485,4.71,...,2.8,3.49285,2.216,0.035777,1.408,0.037014,8.2164,0.780273,7.42,1.86333
3,PtPdCoNiCu,20.15,12.0,CoCuNiPdPt,FCC,1e-13,2.258,5.206,0.311577,4.82,...,3.0,3.391165,2.018,0.166493,1.314,0.063875,8.108556,0.545999,6.36,0.996494
4,FeCoCrNiMo,150.0,261.0,CoCrFeMoNi,BCC+FCC,1.245731e-10,1.388,4.82,0.278927,4.5,...,1.6,0.547723,1.888,0.180194,1.35,0.121244,7.456426,0.50529,9.42,2.634768


In [8]:
df_merged.to_csv('merged_hea_data.csv', index=False)