In [1]:
from utils.FeatureCalculator import FeatureCalculator
import numpy as np
import pandas as pd
import os

# display the current working directory
display("Current working directory: {0}".format(os.getcwd()))

'Current working directory: /nethome/yuxiang.wu/CCA-representation-ML/Dataset_Cleaned'

In [2]:
# Define a list of component elements and their corresponding fractions
compo_elem = ["Ni", "Cr", "Mo", "Ti", "Fe"]
ele_frac = np.array([43.8, 38.3, 2.44, 1.04, 0])

# Create a dictionary mapping each element to its corresponding fraction,
ele_frac_dict = {elem: frac for elem, frac in zip(
    compo_elem, ele_frac)}

# Prepare data in the format required for FeatureCalculator - a list of tuples,
# where each tuple contains a list of elements and their corresponding fractions
compositions = [(list(ele_frac_dict.keys()), list(ele_frac_dict.values()))]

print(compositions)

# Create a FeatureCalculator object with the prepared compositions
calculator = FeatureCalculator(compositions)

# Calculate the features using the FeatureCalculator object
features = calculator.calculate_features()

print(features)

[(['Ni', 'Cr', 'Mo', 'Ti', 'Fe'], [43.8, 38.3, 2.44, 1.04, 0.0])]
[array([ 1.07257700e+02,  9.14284921e+00,  1.68265280e+05,  1.53842608e+06,
       -5.74967136e+04,  2.54891799e+06, -3.07385957e+02,  1.54108000e+02,
        1.40898674e+03,  6.86600000e+02,  6.27750827e+03,  1.46876000e+13,
        1.34286583e+05])]


In [6]:
import pandas as pd
from utils.FeatureCalculator import FeatureCalculator

# Define constants and load data
feature_names = ["a", "delta_a", "Tm", "sigma_Tm", "Hmix", "sigma_Hmix", "ideal_S",
                 "elec_nega", "sigma_elec_nega", "VEC", "sigma_VEC", "bulk_modulus", "sigma_bulk_modulus"]

data_file_names = [
    "LiteratureDataset_Corrosion_YW_v3.xlsx",
    "LiteratureDataset_Hardness_YW_v3.xlsx",
    "MultiTaskModel_NiCrCoVFe_KW99_at_pct.xlsx",
    "MultiTaskModel_NiCrCoVFe_KW99_wt_pct.xlsx",
    "MultiTaskModel_NiCrMoTiFe_KW130_at_pct.xlsx",
    "MultiTaskModel_NiCrMoTiFe_KW130_wt_pct.xlsx",
    "MultiTaskModel_NiCrMoTiFe_KW131_at_pct.xlsx",
    "MultiTaskModel_NiCrMoTiFe_KW131_wt_pct.xlsx"]

element_columns = [
    ['Fe', 'Cr', 'Ni', 'Mo', 'W', 'N', 'Nb', 'C', 'Si',
        'Mn', 'Cu', 'Al', 'V', 'Ta', 'Ti', 'Co', 'Mg', 'Y'],
    ['Fe', 'Cr', 'Ni', 'Mo', 'W', 'N', 'Nb', 'C', 'Si', 'Mn',
        'Cu', 'Al', 'V', 'Ta', 'Ti', 'Co', 'Mg', 'Y', 'Zr', 'Hf'],
    ['Ni', 'Cr', 'Co', 'V', 'Fe'],
    ['Ni', 'Cr', 'Co', 'V', 'Fe'],
    ['Ni', 'Cr', 'Mo', 'Ti', 'Fe'],
    ['Ni', 'Cr', 'Mo', 'Ti', 'Fe'],
    ['Ni', 'Cr', 'Mo', 'Ti', 'Fe'],
    ['Ni', 'Cr', 'Mo', 'Ti', 'Fe']
]

df_header_list = [2, 2, 0, 0, 0, 0, 0, 0]

features_dfs = []
# Iterate over each data file and corresponding element column
for i in range(len(data_file_names)):
    # Load data from excel
    data_df = pd.read_excel(data_file_names[i], header=df_header_list[i])
    element_fractions = data_df[element_columns[i]].fillna(0)

    if i == 0:
        display(element_fractions)

    # Prepare compositions and calculate features
    compositions = [(element_columns[i], element_fraction)
                    for element_fraction in element_fractions.values]
    feature_calculator = FeatureCalculator(compositions)
    calculated_features = feature_calculator.calculate_features()

    # Create DataFrame of features
    features_df = pd.DataFrame(calculated_features, columns=feature_names)
    features_dfs.append(features_df)

# # Display first few rows of each features DataFrame
# for df in features_dfs:
#     display(df.head())

Unnamed: 0,Fe,Cr,Ni,Mo,W,N,Nb,C,Si,Mn,Cu,Al,V,Ta,Ti,Co,Mg,Y
0,69.7700,18.000,10.0,0.0,0.0,0.0,0.0,0.03,1.0000,1.000,0.2000,0.000,0.0,0.0,0.0,0.0,0.00,0.0
1,69.7700,18.000,10.0,0.0,0.0,0.0,0.0,0.03,1.0000,1.000,0.2000,0.000,0.0,0.0,0.0,0.0,0.00,0.0
2,69.7700,18.000,10.0,0.0,0.0,0.0,0.0,0.03,1.0000,1.000,0.2000,0.000,0.0,0.0,0.0,0.0,0.00,0.0
3,69.7700,18.000,10.0,0.0,0.0,0.0,0.0,0.03,1.0000,1.000,0.2000,0.000,0.0,0.0,0.0,0.0,0.00,0.0
4,69.7700,18.000,10.0,0.0,0.0,0.0,0.0,0.03,1.0000,1.000,0.2000,0.000,0.0,0.0,0.0,0.0,0.00,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
707,0.0007,0.000,0.0,0.0,0.0,0.0,0.0,0.00,0.0016,0.000,0.0039,99.990,0.0,0.0,0.0,0.0,0.00,0.0
708,0.0007,0.000,0.0,0.0,0.0,0.0,0.0,0.00,0.0016,0.000,0.0039,99.990,0.0,0.0,0.0,0.0,0.00,0.0
709,0.4800,0.002,0.0,0.0,0.0,0.0,0.0,0.00,0.0800,0.004,0.1000,99.400,0.0,0.0,0.0,0.0,0.00,0.0
710,0.0020,0.000,0.0,0.0,0.0,0.0,0.0,0.00,0.0020,1.300,0.0060,98.689,0.0,0.0,0.0,0.0,0.00,0.0


Unnamed: 0,a,delta_a,Tm,sigma_Tm,Hmix,sigma_Hmix,ideal_S,elec_nega,sigma_elec_nega,VEC,sigma_VEC,bulk_modulus,sigma_bulk_modulus
0,124.30836,9.9,186465.714,1846011.0,-29725.004,1441756.0,-370.81335,180.5656,1787.599615,779.48,7716.860233,16789890000000.0,166219.942589
1,124.30836,9.9,186465.714,1846011.0,-29725.004,1441756.0,-370.81335,180.5656,1787.599615,779.48,7716.860233,16789890000000.0,166219.942589
2,124.30836,9.9,186465.714,1846011.0,-29725.004,1441756.0,-370.81335,180.5656,1787.599615,779.48,7716.860233,16789890000000.0,166219.942589
3,124.30836,9.9,186465.714,1846011.0,-29725.004,1441756.0,-370.81335,180.5656,1787.599615,779.48,7716.860233,16789890000000.0,166219.942589
4,124.30836,9.9,186465.714,1846011.0,-29725.004,1441756.0,-370.81335,180.5656,1787.599615,779.48,7716.860233,16789890000000.0,166219.942589


Unnamed: 0,a,delta_a,Tm,sigma_Tm,Hmix,sigma_Hmix,ideal_S,elec_nega,sigma_elec_nega,VEC,sigma_VEC,bulk_modulus,sigma_bulk_modulus
0,126.69848,9.900001,179579.93,1777843.0,-42038.7456,2657898.0,-299.679129,177.0884,1753.175704,806.21,7981.49141,16254600000000.0,160920.693702
1,126.65624,9.900001,176387.2343,1746237.0,-89152.608,5598099.0,-302.819622,180.1732,1783.715028,775.19,7674.410959,16255040000000.0,160925.193114
2,127.43953,9.899496,185512.88,1836485.0,-75215.4288,4852893.0,-281.454191,174.9043,1731.464782,759.06,7514.329935,16213700000000.0,160507.57255
3,126.65624,9.900001,176387.2343,1746237.0,-89152.608,5598099.0,-302.819622,180.1732,1783.715028,775.19,7674.410959,16255040000000.0,160925.193114
4,126.65624,9.900001,176387.2343,1746237.0,-89152.608,5598099.0,-302.819622,180.1732,1783.715028,775.19,7674.410959,16255040000000.0,160925.193114


Unnamed: 0,a,delta_a,Tm,sigma_Tm,Hmix,sigma_Hmix,ideal_S,elec_nega,sigma_elec_nega,VEC,sigma_VEC,bulk_modulus,sigma_bulk_modulus
0,124.75847,9.898485,189820.66,1878938.0,-62675.4376,3508344.0,-344.238787,181.0626,1792.245782,830.89,8224.573016,17182400000000.0,170079.752
1,124.92665,9.902525,191175.1,1893117.0,-64749.2584,3704706.0,-337.667331,180.5,1787.406086,818.81,8108.307737,17144800000000.0,169776.830929
2,124.98248,9.901515,191666.59,1897791.0,-65938.3416,3855988.0,-329.745843,180.1019,1783.282025,810.61,8026.288558,17118600000000.0,169500.098549
3,124.9977,9.897475,192019.25,1900507.0,-67847.02,4048174.0,-321.738771,179.6045,1777.631373,801.75,7935.322005,17087000000000.0,169118.175763
4,125.22892,9.90101,192350.06,1904461.0,-70502.192,4290892.0,-314.953022,179.5316,1777.544538,796.0,7881.22556,17096200000000.0,169269.672175


Unnamed: 0,a,delta_a,Tm,sigma_Tm,Hmix,sigma_Hmix,ideal_S,elec_nega,sigma_elec_nega,VEC,sigma_VEC,bulk_modulus,sigma_bulk_modulus
0,124.778843,9.9,188718.357573,1868313.0,-61020.587842,3393591.0,-345.477555,181.761829,1799.44247,841.438517,8330.261438,17239450000000.0,170670.628171
1,124.842929,9.9,189890.063373,1879913.0,-63222.042883,3601207.0,-338.17,181.071302,1792.606263,828.92021,8206.33088,17189450000000.0,170175.546253
2,124.918543,9.9,190414.584469,1885106.0,-64471.54312,3760735.0,-330.133581,180.712902,1789.058102,820.898563,8126.916683,17167060000000.0,169953.878916
3,125.026464,9.9,190912.654444,1890036.0,-66440.704458,3962493.0,-322.329031,180.36528,1785.61664,812.728833,8046.036472,17149740000000.0,169782.441998
4,125.160641,9.9,191106.445106,1891955.0,-68806.371931,4184529.0,-315.271101,180.170497,1783.688286,806.438509,7983.762215,17147440000000.0,169759.729336


Unnamed: 0,a,delta_a,Tm,sigma_Tm,Hmix,sigma_Hmix,ideal_S,elec_nega,sigma_elec_nega,VEC,sigma_VEC,bulk_modulus,sigma_bulk_modulus
0,125.17991,9.90101,193808.87,1918905.0,-66318.952,3744998.0,-346.212175,180.8982,1791.075343,807.82,7998.256042,17172600000000.0,170026.144038
1,125.40275,9.90303,195817.01,1939184.0,-67966.0224,3887557.0,-341.846987,180.46,1787.101211,793.3,7856.09602,17143200000000.0,169769.68849
2,125.62846,9.900505,197514.01,1955491.0,-71252.0328,4126571.0,-336.030083,180.1622,1783.697309,780.12,7723.605184,17118900000000.0,169485.84648
3,126.02125,9.901011,199270.07,1972977.0,-75406.0696,4432524.0,-329.77861,180.362,1785.766567,769.9,7622.811294,17141400000000.0,169717.288954
4,126.41164,9.897476,200804.35,1987459.0,-82322.8424,4896293.0,-323.835886,180.1228,1782.761529,756.68,7489.245834,17099300000000.0,169240.035547


Unnamed: 0,a,delta_a,Tm,sigma_Tm,Hmix,sigma_Hmix,ideal_S,elec_nega,sigma_elec_nega,VEC,sigma_VEC,bulk_modulus,sigma_bulk_modulus
0,125.312524,9.9,194359.993767,1924166.0,-65304.99337,3702854.0,-343.250201,182.119747,1802.985997,814.217573,8060.775817,17328470000000.0,171551.901918
1,125.532355,9.9,196781.619396,1948140.0,-66918.189724,3856461.0,-337.886795,181.81817,1800.000438,798.603442,7906.196528,17327020000000.0,171537.618301
2,125.879574,9.900001,199298.48674,1973058.0,-69786.293232,4085803.0,-331.732003,181.902802,1800.838363,784.697185,7768.52515,17361990000000.0,171883.854044
3,126.337531,9.900001,202022.535171,2000026.0,-72987.722966,4343567.0,-325.259881,182.468869,1806.442509,772.808492,7650.827546,17450620000000.0,172761.258678
4,126.852525,9.900001,204505.951458,2024612.0,-78458.588261,4728691.0,-319.806701,182.713109,1808.860559,759.401957,7518.103637,17485070000000.0,173102.326427


Unnamed: 0,a,delta_a,Tm,sigma_Tm,Hmix,sigma_Hmix,ideal_S,elec_nega,sigma_elec_nega,VEC,sigma_VEC,bulk_modulus,sigma_bulk_modulus
0,125.2522,9.904039,194524.78,1926583.0,-66107.3936,3743217.0,-346.161112,180.643,1789.095818,802.6,7949.004158,17152600000000.0,169880.082845
1,125.26869,9.895454,196346.76,1942942.0,-67714.6488,3882266.0,-339.765474,179.9108,1780.299551,785.38,7771.714415,17096500000000.0,169177.704971
2,125.72912,9.90202,198293.54,1963509.0,-71150.92,4134968.0,-334.869308,180.0394,1782.754264,774.84,7672.504248,17112200000000.0,169445.439206
3,126.09847,9.90303,199991.11,1980520.0,-74560.512,4389477.0,-329.433334,180.0714,1783.253009,763.74,7563.36337,17114600000000.0,169486.50804
4,126.63184,9.90303,201943.56,1999856.0,-80960.6528,4838894.0,-322.86388,180.3858,1786.366603,752.68,7453.836654,17140000000000.0,169738.079605


Unnamed: 0,a,delta_a,Tm,sigma_Tm,Hmix,sigma_Hmix,ideal_S,elec_nega,sigma_elec_nega,VEC,sigma_VEC,bulk_modulus,sigma_bulk_modulus
0,125.308551,9.9,194925.230159,1929762.0,-65164.565094,3706477.0,-342.746471,181.760924,1799.433655,808.684574,8005.999297,17297920000000.0,171249.455524
1,125.602242,9.9,197761.339261,1957840.0,-66922.194757,3877342.0,-336.153724,181.609967,1797.939246,791.643919,7837.297375,17317480000000.0,171443.142519
2,125.960864,9.900001,200205.481868,1982037.0,-69617.122677,4093274.0,-330.223063,181.806806,1799.888019,778.903781,7711.170467,17363540000000.0,171899.180874
3,126.366217,9.900001,202683.784907,2006573.0,-72144.716982,4302185.0,-324.544236,182.1343,1803.130281,766.458373,7587.961239,17420890000000.0,172466.983711
4,126.97549,9.900001,205805.76526,2037481.0,-76830.069222,4650611.0,-318.356218,182.926183,1810.970024,753.866177,7463.299118,17531650000000.0,173563.566703


In [None]:
# # Define column names for the composition dataframe
# column_compo_H =
# column_compo_C =

# # Import the Hardness dataset, starting from the third row (header=2) because the first two rows are presumably not relevant
# df_H = pd.read_excel(data_path + 'Hardness_database_YW_v3.xlsx', header=2)

# # Extract relevant columns from the Hardness dataframe, and fill in any missing values with zero
# df_H_compo = df_H[column_compo_H].fillna(0)

# # Import the Corrosion dataset, starting from the third row (header=2) because the first two rows are presumably not relevant
# df_C = pd.read_excel(data_path + 'Corrosion_database_YW_v3.xlsx', header=2)

# # Extract relevant columns from the Corrosion dataframe, and fill in any missing values with zero
# df_C_compo = df_C[column_compo_C].fillna(0)

# # Display the first row of both dataframes
# display(df_H_compo.head(1), df_H_compo.shape,
#         df_C_compo.head(1), df_C_compo.shape)