In [1]:
from utils.FeatureCalculator import FeatureCalculator
import numpy as np
import pandas as pd
import os

# display the current working directory
display("Current working directory: {0}".format(os.getcwd()))

'Current working directory: /nethome/yuxiang.wu/CCA-representation-ML/Dataset_Cleaned'

In [2]:
# Define a list of component elements and their corresponding fractions
compo_elem = ["Ni", "Cr", "Mo", "Ti", "Fe"]
ele_frac = np.array([43.8, 38.3, 2.44, 1.04, 0])

# Create a dictionary mapping each element to its corresponding fraction,
ele_frac_dict = {elem: frac for elem, frac in zip(
    compo_elem, ele_frac)}

# Prepare data in the format required for FeatureCalculator - a list of tuples,
# where each tuple contains a list of elements and their corresponding fractions
compositions = [(list(ele_frac_dict.keys()), list(ele_frac_dict.values()))]

print(compositions)

# Create a FeatureCalculator object with the prepared compositions
calculator = FeatureCalculator(compositions)

# Calculate the features using the FeatureCalculator object
features = calculator.calculate_features()

print(features)

[(['Ni', 'Cr', 'Mo', 'Ti', 'Fe'], [43.8, 38.3, 2.44, 1.04, 0.0])]
[array([ 1.07257700e+02,  9.14284921e+00,  1.68265280e+05,  1.53842608e+06,
       -5.74967136e+04,  2.54891799e+06, -3.07385957e+02,  1.54108000e+02,
        1.40898674e+03,  6.86600000e+02,  6.27750827e+03,  1.46876000e+13,
        1.34286583e+05])]


In [2]:
import pandas as pd
from utils.FeatureCalculator import FeatureCalculator

# Define constants and load data
feature_names = ["a", "delta_a", "Tm", "sigma_Tm", "Hmix", "sigma_Hmix", "ideal_S",
                 "elec_nega", "sigma_elec_nega", "VEC", "sigma_VEC", "bulk_modulus", "sigma_bulk_modulus"]

data_file_names = [
    "LiteratureDataset_Corrosion_YW_v3.xlsx",
    "LiteratureDataset_Hardness_YW_v3.xlsx",
    "MultiTaskModel_NiCrCoVFe_KW99_at_pct.xlsx",
    "MultiTaskModel_NiCrCoVFe_KW99_wt_pct.xlsx",
    "MultiTaskModel_NiCrMoTiFe_KW130_at_pct.xlsx",
    "MultiTaskModel_NiCrMoTiFe_KW130_wt_pct.xlsx",
    "MultiTaskModel_NiCrMoTiFe_KW131_at_pct.xlsx",
    "MultiTaskModel_NiCrMoTiFe_KW131_wt_pct.xlsx"]

element_columns = [
    ['Fe', 'Cr', 'Ni', 'Mo', 'W', 'N', 'Nb', 'C', 'Si',
        'Mn', 'Cu', 'Al', 'V', 'Ta', 'Ti', 'Co', 'Mg', 'Y'],
    ['Fe', 'Cr', 'Ni', 'Mo', 'W', 'N', 'Nb', 'C', 'Si', 'Mn',
        'Cu', 'Al', 'V', 'Ta', 'Ti', 'Co', 'Mg', 'Y', 'Zr', 'Hf'],
    ['Ni', 'Cr', 'Co', 'V', 'Fe'],
    ['Ni', 'Cr', 'Co', 'V', 'Fe'],
    ['Ni', 'Cr', 'Mo', 'Ti', 'Fe'],
    ['Ni', 'Cr', 'Mo', 'Ti', 'Fe'],
    ['Ni', 'Cr', 'Mo', 'Ti', 'Fe'],
    ['Ni', 'Cr', 'Mo', 'Ti', 'Fe']
]

df_header_list = [2, 2, 0, 0, 0, 0, 0, 0]

features_dfs = []
# Iterate over each data file and corresponding element column
for i in range(len(data_file_names)):
    # Load data from excel
    data_df = pd.read_excel(data_file_names[i], header=df_header_list[i])
    element_fractions = data_df[element_columns[i]].fillna(0)

    # Prepare compositions and calculate features
    compositions = [(element_columns[i], element_fraction)
                    for element_fraction in element_fractions.values]
    feature_calculator = FeatureCalculator(compositions)
    calculated_features = feature_calculator.calculate_features()

    # Create DataFrame of features
    features_df = pd.DataFrame(calculated_features, columns=feature_names)
    features_dfs.append(features_df)

    if i == 0:  # corrosion dataset
        df_C_compo, df_C_specific_testing, df_C_specific_features, df_C_output = element_fractions, data_df[[
            'TestTemperature_C', 'ChlorideIonConcentration', 'pH', 'ScanRate_mVs']], features_df, data_df['AvgPittingPotential_mV']

    if i == 1:  # hardness dataset
        df_H_compo, df_H_specific_features, df_H_output = element_fractions, features_df, data_df[
            'converted HV']


display
# # Display first few rows of each features DataFrame
# for df in features_dfs:
#     display(df.head())

In [None]:
# # Define column names for the composition dataframe
# column_compo_H =
# column_compo_C =

# # Import the Hardness dataset, starting from the third row (header=2) because the first two rows are presumably not relevant
# df_H = pd.read_excel(data_path + 'Hardness_database_YW_v3.xlsx', header=2)

# # Extract relevant columns from the Hardness dataframe, and fill in any missing values with zero
# df_H_compo = df_H[column_compo_H].fillna(0)

# # Import the Corrosion dataset, starting from the third row (header=2) because the first two rows are presumably not relevant
# df_C = pd.read_excel(data_path + 'Corrosion_database_YW_v3.xlsx', header=2)

# # Extract relevant columns from the Corrosion dataframe, and fill in any missing values with zero
# df_C_compo = df_C[column_compo_C].fillna(0)

# # Display the first row of both dataframes
# display(df_H_compo.head(1), df_H_compo.shape,
#         df_C_compo.head(1), df_C_compo.shape)