In [1]:
import pandas as pd
import numpy as np
from pymatgen.core import Composition

# No warnings about setting value on copy of slice
pd.options.mode.chained_assignment = None

# Display up to 60 columns of a dataframe
pd.set_option('display.max_columns', 60)

# Matplotlib visualization
import matplotlib.pyplot as plt
from matplotlib import rcParams
%matplotlib inline

# Internal ipython tool for setting figure size
from IPython.core.pylabtools import figsize

# Seaborn for visualization
import seaborn as sns

# Splitting data into training and testing
from sklearn.model_selection import train_test_split
import warnings
warnings.filterwarnings('ignore')
# 全局设置字体及大小，设置公式字体即可，若要修改刻度字体，可在此修改全局字体
config = {
    "mathtext.fontset":'stix',
    "font.family":'serif',
    "font.serif": ['Times New Roman'],
    "font.size": 24,# 字号，大家自行调节
    'axes.unicode_minus': False # 处理负号，即-号
}
rcParams.update(config)
plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号
large = 22; med = 16; small = 12
params = {'axes.titlesize': large,
          'legend.fontsize': med,
          'figure.figsize': (8, 6),
          'axes.labelsize': med,
          'axes.titlesize': med,
          'xtick.labelsize': med,
          'ytick.labelsize': med,
          'figure.titlesize': large}
plt.rcParams.update(params)
plt.rcParams['figure.dpi'] = 300 #分辨率

In [2]:
a_data = pd.read_csv('./data/a.csv',index_col=0)
b_data = pd.read_csv('./data/b.csv',index_col=0)
a_data = a_data[a_data['spacegroup_number']==227]
b_data = b_data[b_data['spacegroup_number']==227]

# Get formula

In [3]:
formula = []
for i,row in a_data.iterrows():
    A1 = row['A1']
    A2 = row['A2']
    a1 = row['a1']
    a2 = row['a2']
    B = row['B']
    if A1 != '0' and A2 != '0':
        formula.append(A1+str(a1)+A2+str(a2)+B+'2'+'O4')
    elif A1 == '0' and A2 != '0':
        formula.append(A2+str(a2)+B+'2'+'O4')
    elif A2 == '0' and A1 != '0':
        formula.append(A1+str(a1)+B+'2'+'O4')
a_data['formula'] = formula

In [4]:
a_data['formula'] = a_data['formula'].apply(lambda x:Composition(x).reduced_formula)
a_data

Unnamed: 0,A1,A2,a1,a2,B,C,spacegroup_number,formula
0,Na,Ag,0.1,0.9,Mn,O,227,Na0.1Mn2Ag0.9O4
1,Na,Al,0.1,0.9,Mn,O,227,Na0.1Mn2Al0.9O4
2,Na,Ba,0.1,0.9,Mn,O,227,Ba0.9Na0.1Mn2O4
3,Na,Be,0.1,0.9,Mn,O,227,Na0.1Mn2Be0.9O4
4,Na,Ca,0.1,0.9,Mn,O,227,Na0.1Ca0.9Mn2O4
...,...,...,...,...,...,...,...,...
137,Li,0,1.0,0.0,Ni,O,227,Li(NiO2)2
150,Na,0,1.0,0.0,Sn,O,227,Na(SnO2)2
162,Mn,0,1.0,0.0,Rh,O,227,Mn(RhO2)2
166,Li,0,1.0,0.0,V,O,227,LiV2O4


In [5]:
a_data = a_data.drop_duplicates(subset="formula",keep='first')
a_data

Unnamed: 0,A1,A2,a1,a2,B,C,spacegroup_number,formula
0,Na,Ag,0.1,0.9,Mn,O,227,Na0.1Mn2Ag0.9O4
1,Na,Al,0.1,0.9,Mn,O,227,Na0.1Mn2Al0.9O4
2,Na,Ba,0.1,0.9,Mn,O,227,Ba0.9Na0.1Mn2O4
3,Na,Be,0.1,0.9,Mn,O,227,Na0.1Mn2Be0.9O4
4,Na,Ca,0.1,0.9,Mn,O,227,Na0.1Ca0.9Mn2O4
...,...,...,...,...,...,...,...,...
131,Li,0,1.0,0.0,Mo,O,227,Li(MoO2)2
136,Eu,0,1.0,0.0,La,O,227,La2EuO4
137,Li,0,1.0,0.0,Ni,O,227,Li(NiO2)2
150,Na,0,1.0,0.0,Sn,O,227,Na(SnO2)2


In [6]:
formula = []
for i,row in b_data.iterrows():
    B1 = row['B1']
    B2 = row['B2']
    b1 = row['b1']
    b2 = row['b2']
    A = row['A']
    if str(B1) != '0' and str(B2) != '0':
        formula.append(A+B1+str(b1)+B2+str(b2)+'O4')
    elif str(B1) == '0' and str(B2) != '0':
        formula.append(A+B2+str(b2)+'O4')
    elif str(B2) == '0' and str(B1) != '0':
        formula.append(A+B1+str(b1)+'O4')
b_data['formula'] = formula

In [7]:
b_data['formula'] = b_data['formula'].apply(lambda x:Composition(x).reduced_formula)
b_data

Unnamed: 0,A,B1,B2,b1,b2,C,spacegroup_number,formula
0,Na,Mn,Ag,0.1,1.9,O,227,Na1Mn0.1Ag1.9O4
1,Na,Mn,Al,0.1,1.9,O,227,Na1Mn0.1Al1.9O4
2,Na,Mn,Bi,0.1,1.9,O,227,Na1Mn0.1Bi1.9O4
3,Na,Mn,Ca,0.1,1.9,O,227,Na1Ca1.9Mn0.1O4
4,Na,Mn,Cd,0.1,1.9,O,227,Na1Mn0.1Cd1.9O4
...,...,...,...,...,...,...,...,...
3994,V,V,0,2.0,0.0,O,227,V3O4
3995,V,W,0,2.0,0.0,O,227,V(WO2)2
3996,V,Y,0,2.0,0.0,O,227,Y2VO4
3997,V,Yb,0,2.0,0.0,O,227,Yb2VO4


In [8]:
b_data = b_data.drop_duplicates(subset="formula",keep='first')
b_data

Unnamed: 0,A,B1,B2,b1,b2,C,spacegroup_number,formula
0,Na,Mn,Ag,0.1,1.9,O,227,Na1Mn0.1Ag1.9O4
1,Na,Mn,Al,0.1,1.9,O,227,Na1Mn0.1Al1.9O4
2,Na,Mn,Bi,0.1,1.9,O,227,Na1Mn0.1Bi1.9O4
3,Na,Mn,Ca,0.1,1.9,O,227,Na1Ca1.9Mn0.1O4
4,Na,Mn,Cd,0.1,1.9,O,227,Na1Mn0.1Cd1.9O4
...,...,...,...,...,...,...,...,...
3993,V,Tm,0,2.0,0.0,O,227,Tm2VO4
3994,V,V,0,2.0,0.0,O,227,V3O4
3995,V,W,0,2.0,0.0,O,227,V(WO2)2
3996,V,Y,0,2.0,0.0,O,227,Y2VO4


# Coarse screening of spinel

In [9]:
spinels = pd.read_csv('./data/spinel_oxides.csv')
spinels = spinels[['formula_pretty','band_gap','spacegroup_number','is_gap_direct']]
spinels = spinels.rename(columns={"formula_pretty":"formula"})
spinels = spinels[['formula']]
mp_data = pd.read_csv('./data/data.csv')
mp_data_non_spinel = mp_data[mp_data['spacegroup']!=227]
mp_data_non_spinel = mp_data_non_spinel[mp_data_non_spinel['formula'].str.contains('O')][['formula']]
mp_data_non_spinel = mp_data_non_spinel.sample(200,random_state=22).reset_index(drop=True)

In [10]:
spinels['is_spinel'] = 1
mp_data_non_spinel['is_spinel'] = 0
data = pd.concat([spinels,mp_data_non_spinel],axis=0).reset_index(drop=True)
data

Unnamed: 0,formula,is_spinel
0,NaMn2O4,1
1,Cd(RhO2)2,1
2,CaIn2O4,1
3,Si(NiO2)2,1
4,Mg2FeO4,1
...,...,...
365,Li4Ti3V3(TeO8)2,0
366,NaLa2Ti2P3(SO8)3,0
367,Na8La2Si5Sn2SO24,0
368,RbDy(WO4)2,0


In [11]:
from matminer.featurizers.base import MultipleFeaturizer
from matminer.featurizers import composition as cf
feature_calculators = MultipleFeaturizer([cf.ElementProperty.from_preset("magpie"),
                                          cf.ValenceOrbital(props=['avg']), cf.IonProperty(fast=True)])
feature_labels = feature_calculators.feature_labels()
data['composition'] = data['formula'].map(Composition)
data_features = feature_calculators.featurize_dataframe(data,col_id='composition')

MultipleFeaturizer:   0%|          | 0/370 [00:00<?, ?it/s]

In [12]:
def remove_collinear_features(x, threshold):
    '''
    Objective:
        Remove collinear features in a dataframe with a correlation coefficient
        greater than the threshold. Removing collinear features can help a model
        to generalize and improves the interpretability of the model.
        
    Inputs: 
        threshold: any features with correlations greater than this value are removed
    
    Output: 
        dataframe that contains only the non-highly-collinear features
    '''  
    # Calculate the correlation matrix
    corr_matrix = x.corr()
    iters = range(len(corr_matrix.columns) - 1)
    drop_cols = []

    # Iterate through the correlation matrix and compare correlations
    for i in iters:
        for j in range(i):
            item = corr_matrix.iloc[j:(j+1), (i+1):(i+2)]
            col = item.columns
            row = item.index
            val = abs(item.values)
            
            # If correlation exceeds the threshold
            if val >= threshold:
                # Print the correlated features and the correlation value
                # print(col.values[0], "|", row.values[0], "|", round(val[0][0], 2))
                drop_cols.append(col.values[0])

    # Drop one of each pair of correlated columns
    drops = set(drop_cols)
    x = x.drop(columns = drops)
    return x

In [13]:
X = data_features[feature_labels]
y = data['is_spinel']
# 剔除共线性特征
X = remove_collinear_features(X, 0.8);
feature_labels = X.columns

In [14]:
X_train,X_test,y_train,y_test = train_test_split(X,y)

In [15]:
# 首先训练分类模型
#模型评估定义，这里使用5fold StratifiedKFold 
from sklearn.model_selection import StratifiedKFold
from tpot import TPOTClassifier
cv = StratifiedKFold(n_splits=5) 
#定义TPOTClassifier 
model_clf = TPOTClassifier(generations=10,population_size=50,cv=cv, scoring='roc_auc', verbosity=2, 
                       random_state=42, n_jobs=-1) 
#搜索最佳拟合
model_clf.fit(X_train, y_train)

Optimization Progress:   0%|          | 0/550 [00:00<?, ?pipeline/s]

Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f85bbdc1430>
Traceback (most recent call last):
  File "/usr/local/anaconda3/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/usr/local/anaconda3/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/usr/local/anaconda3/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/usr/local/anaconda3/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_modul


Generation 2 - Current best internal CV score: 0.97894783377542

Generation 3 - Current best internal CV score: 0.97894783377542

Generation 4 - Current best internal CV score: 0.9822281167108754

Generation 5 - Current best internal CV score: 0.9822281167108754


Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_module_callback at 0x7f85be036f70>
Traceback (most recent call last):
  File "/usr/local/anaconda3/lib/python3.8/site-packages/threadpoolctl.py", line 400, in match_module_callback
    self._make_module_from_path(filepath)
  File "/usr/local/anaconda3/lib/python3.8/site-packages/threadpoolctl.py", line 515, in _make_module_from_path
    module = module_class(filepath, prefix, user_api, internal_api)
  File "/usr/local/anaconda3/lib/python3.8/site-packages/threadpoolctl.py", line 606, in __init__
    self.version = self.get_version()
  File "/usr/local/anaconda3/lib/python3.8/site-packages/threadpoolctl.py", line 646, in get_version
    config = get_config().split()
AttributeError: 'NoneType' object has no attribute 'split'
Exception ignored on calling ctypes callback function: <function _ThreadpoolInfo._find_modules_with_dl_iterate_phdr.<locals>.match_modul


Generation 6 - Current best internal CV score: 0.9839964633068081

Generation 7 - Current best internal CV score: 0.9839964633068081

Generation 8 - Current best internal CV score: 0.9839964633068081

Generation 9 - Current best internal CV score: 0.9839964633068081

Generation 10 - Current best internal CV score: 0.9839964633068081

Best pipeline: GradientBoostingClassifier(SelectPercentile(input_matrix, percentile=22), learning_rate=0.1, max_depth=3, max_features=0.05, min_samples_leaf=4, min_samples_split=18, n_estimators=100, subsample=0.7500000000000001)


In [16]:
# 模型泛化能力验证
from sklearn.metrics import roc_auc_score
y_test_pred = model_clf.predict(X_test)
roc_auc = roc_auc_score(y_test,y_test_pred)
roc_auc

0.9466981132075472

In [17]:
a_data['composition'] = a_data['formula'].map(Composition)
b_data['composition'] = b_data['formula'].map(Composition)

In [18]:
a_data_features = feature_calculators.featurize_dataframe(a_data, col_id='composition',ignore_errors=True)

MultipleFeaturizer:   0%|          | 0/34984 [00:00<?, ?it/s]

In [19]:
a_X = a_data_features[feature_labels]
a_data['is_spinel'] = model_clf.predict(a_X)
a_data['is_spinel'].value_counts()

1    28841
0     6143
Name: is_spinel, dtype: int64

In [20]:
b_data_features = feature_calculators.featurize_dataframe(b_data, col_id='composition',ignore_errors=True)

MultipleFeaturizer:   0%|          | 0/111871 [00:00<?, ?it/s]

In [21]:
b_X = b_data_features[feature_labels]
b_data['is_spinel'] = model_clf.predict(b_X)
b_data['is_spinel'].value_counts()

1    85212
0    26659
Name: is_spinel, dtype: int64

In [22]:
a_data_spinel = a_data[a_data['is_spinel']==1]
b_data_spinel = b_data[b_data['is_spinel']==1]
print(a_data_spinel.shape,b_data_spinel.shape)

(28841, 10) (85212, 10)


# feature project

In [23]:
ele_df1 = pd.read_csv('./data/elements.csv')
ele_df2 = pd.read_csv('./data/elements2.csv')
spinels = pd.read_csv('./data/spinel_oxides.csv')
mp_data = pd.read_csv('./data/data.csv')

In [24]:
spinels = spinels[['formula_pretty','spacegroup_number','band_gap','is_gap_direct']]
spinels.columns = ['formula','spacegroup','GGA','is_gap_direct']
spinels

Unnamed: 0,formula,spacegroup,GGA,is_gap_direct
0,NaMn2O4,227,0.0000,False
1,Cd(RhO2)2,227,0.8367,False
2,CaIn2O4,227,2.0208,True
3,Si(NiO2)2,227,3.5770,True
4,Mg2FeO4,227,0.0000,False
...,...,...,...,...
165,MgCr2O4,227,2.5264,True
166,Mg(RhO2)2,227,1.1521,False
167,LiV2O4,227,0.0000,False
168,MnAl2O4,227,2.6578,True


In [25]:
spinels = pd.merge(spinels,mp_data, on=['formula','spacegroup'], how='inner')
spinels

Unnamed: 0,formula,spacegroup,GGA_x,is_gap_direct,0,GGA_y,target
0,NaMn2O4,227,0.0000,False,131,0.0000,0.000000
1,Cd(RhO2)2,227,0.8367,False,125609,0.8367,2.470591
2,CaIn2O4,227,2.0208,True,81767,2.0208,3.393936
3,Si(NiO2)2,227,3.5770,True,843,3.5770,3.577000
4,Mg2FeO4,227,0.0000,False,363,0.0000,0.000000
...,...,...,...,...,...,...,...
165,MgCr2O4,227,2.5264,True,27914,2.5264,2.526400
166,Mg(RhO2)2,227,1.1521,False,129217,1.1521,3.042557
167,LiV2O4,227,0.0000,False,28026,0.0000,0.000000
168,MnAl2O4,227,2.6578,True,28051,2.6578,2.657800


In [26]:
spinels = spinels[['formula','spacegroup','GGA_x','is_gap_direct','target']]
spinels.columns = ['formula','spacegroup','GGA','is_gap_direct','target']
spinels

Unnamed: 0,formula,spacegroup,GGA,is_gap_direct,target
0,NaMn2O4,227,0.0000,False,0.000000
1,Cd(RhO2)2,227,0.8367,False,2.470591
2,CaIn2O4,227,2.0208,True,3.393936
3,Si(NiO2)2,227,3.5770,True,3.577000
4,Mg2FeO4,227,0.0000,False,0.000000
...,...,...,...,...,...
165,MgCr2O4,227,2.5264,True,2.526400
166,Mg(RhO2)2,227,1.1521,False,3.042557
167,LiV2O4,227,0.0000,False,0.000000
168,MnAl2O4,227,2.6578,True,2.657800


In [27]:
def get_a_symbol(composition: Composition):
    comp = composition.as_dict()
    for k, v in comp.items():
        if v == 1: return k
    return None

def get_b_symbol(composition: Composition):
    comp = composition.as_dict()
    for k, v in comp.items():
        if v == 2: return k
    return None

spinels['composition'] = spinels['formula'].map(Composition)
spinels['A'] = spinels['composition'].map(get_a_symbol)
spinels['B'] = spinels['composition'].map(get_b_symbol)

In [28]:
def get_base_features_AB2O4(data:pd.DataFrame):
    result = []
    for i,row in data.iterrows():
        tmp = {}
        a = row['A']
        b = row['B']
        o = 'O'
        tmp["A_Density"] = ele_df1[ele_df1['symbol']==a]['Density'].values[0]
        tmp["B_Density"] = ele_df1[ele_df1['symbol']==b]['Density'].values[0]
        tmp["O_Density"] = ele_df1[ele_df1['symbol']==o]['Density'].values[0]
        tmp["A_dipole Polarizability"] = ele_df1[ele_df1['symbol']==a]['dipole_polarizability'].values[0]
        tmp["B_dipole Polarizability"] = ele_df1[ele_df1['symbol']==b]['dipole_polarizability'].values[0]
        tmp["O_dipole Polarizability"] = ele_df1[ele_df1['symbol']==o]['dipole_polarizability'].values[0]
        tmp["A_covalent Radius"] = ele_df1[ele_df1['symbol']==a]['covalent_radius'].values[0]
        tmp["B_covalent Radius"] = ele_df1[ele_df1['symbol']==b]['covalent_radius'].values[0]
        tmp["O_covalent Radius"] = ele_df1[ele_df1['symbol']==o]['covalent_radius'].values[0]
        tmp["A_atomic Radius"] = ele_df2[ele_df1['symbol']==a]['Atomic radius (Å)'].values[0]
        tmp["B_atomic Radius"] = ele_df2[ele_df1['symbol']==b]['Atomic radius (Å)'].values[0]
        tmp["O_atomic Radius"] = ele_df2[ele_df1['symbol']==o]['Atomic radius (Å)'].values[0]
        tmp["A_FirstIonization"] = ele_df1[ele_df1['symbol']==a]['FirstIonization'].values[0]
        tmp["B_FirstIonization"] = ele_df1[ele_df1['symbol']==b]['FirstIonization'].values[0]
        tmp["O_FirstIonization"] = ele_df1[ele_df1['symbol']==o]['FirstIonization'].values[0]
        tmp["A_number of Valence Electrons"] = ele_df1[ele_df1['symbol']==a]['number_of_valence_electrons'].values[0]
        tmp["B_number of Valence Electrons"] = ele_df1[ele_df1['symbol']==b]['number_of_valence_electrons'].values[0]
        tmp["O_number of Valence Electrons"] = ele_df1[ele_df1['symbol']==o]['number_of_valence_electrons'].values[0]
        tmp["A_number"] = ele_df1[ele_df1['symbol']==a]['number'].values[0]
        tmp["B_number"] = ele_df1[ele_df1['symbol']==b]['number'].values[0]
        tmp["O_number"] = ele_df1[ele_df1['symbol']==o]['number'].values[0]
        tmp["A_Period"] = ele_df1[ele_df1['symbol']==a]['Period'].values[0]
        tmp["B_Period"] = ele_df1[ele_df1['symbol']==b]['Period'].values[0]
        tmp["O_Period"] = ele_df1[ele_df1['symbol']==o]['Period'].values[0]
        tmp["A_Electronegativity"] = ele_df1[ele_df1['symbol']==a]['Electronegativity'].values[0]
        tmp["B_Electronegativity"] = ele_df1[ele_df1['symbol']==b]['Electronegativity'].values[0]
        tmp["O_Electronegativity"] = ele_df1[ele_df1['symbol']==o]['Electronegativity'].values[0]
        tmp["A_number of s+p Electrons"] = ele_df2[ele_df2['symbol']==a]['Number of s electrons'].values[0] + ele_df2[ele_df2['symbol']==a]['Number of p electrons'].values[0]
        tmp["B_number of s+p Electrons"] = ele_df2[ele_df2['symbol']==b]['Number of s electrons'].values[0] + ele_df2[ele_df2['symbol']==b]['Number of p electrons'].values[0]
        tmp["O_number of s+p Electrons"] = ele_df2[ele_df2['symbol']==o]['Number of s electrons'].values[0] + ele_df2[ele_df2['symbol']==o]['Number of p electrons'].values[0]
        tmp["A_number of d Electrons"] = ele_df2[ele_df2['symbol']==a]['Number of d electrons'].values[0]
        tmp["B_number of d Electrons"] = ele_df2[ele_df2['symbol']==b]['Number of d electrons'].values[0]
        tmp["O_number of d Electrons"] = ele_df2[ele_df2['symbol']==o]['Number of d electrons'].values[0]
        tmp["A_Mulliken EN"] = ele_df2[ele_df2['symbol']==a]['Mulliken EN'].values[0]
        tmp["B_Mulliken EN"] = ele_df2[ele_df2['symbol']==b]['Mulliken EN'].values[0]
        tmp["O_Mulliken EN"] = ele_df2[ele_df2['symbol']==o]['Mulliken EN'].values[0]
        result.append(tmp)
    return pd.DataFrame(result)
base_features_AB2O4 = get_base_features_AB2O4(spinels)
base_features_AB2O4

Unnamed: 0,A_Density,B_Density,O_Density,A_dipole Polarizability,B_dipole Polarizability,O_dipole Polarizability,A_covalent Radius,B_covalent Radius,O_covalent Radius,A_atomic Radius,B_atomic Radius,O_atomic Radius,A_FirstIonization,B_FirstIonization,O_FirstIonization,A_number of Valence Electrons,B_number of Valence Electrons,O_number of Valence Electrons,A_number,B_number,O_number,A_Period,B_Period,O_Period,A_Electronegativity,B_Electronegativity,O_Electronegativity,A_number of s+p Electrons,B_number of s+p Electrons,O_number of s+p Electrons,A_number of d Electrons,B_number of d Electrons,O_number of d Electrons,A_Mulliken EN,B_Mulliken EN,O_Mulliken EN
0,0.971,7.44,0.00143,162.7000,68.0,5.3,155,119,63,1.80,1.40,0.6,495.8,717.3,1313.9,1,7,6,11,25,8,3,4,2,0.93,1.55,3.44,1,2,6,0,5,0,2.85,3.72,7.54
1,8.690,12.40,0.00143,46.0000,66.0,5.3,136,125,63,1.55,1.35,0.6,867.8,719.7,1313.9,12,9,6,48,45,8,5,5,2,1.69,2.28,3.44,2,1,6,10,8,0,4.33,4.30,7.54
2,1.540,7.31,0.00143,160.8000,65.0,5.3,171,142,63,1.80,1.55,0.6,589.8,558.3,1313.9,2,3,6,20,49,8,4,5,2,1.00,1.78,3.44,2,3,6,0,10,0,2.20,3.10,7.54
3,2.330,8.91,0.00143,37.3000,49.0,5.3,116,110,63,1.10,1.35,0.6,786.5,737.1,1313.9,4,10,6,14,28,8,3,4,2,1.90,1.91,3.44,4,2,6,0,8,0,4.77,4.40,7.54
4,7.870,1.74,0.00143,62.0000,71.2,5.3,116,139,63,1.40,1.50,0.6,762.5,737.7,1313.9,8,2,6,26,12,8,4,3,2,1.83,1.31,3.44,2,2,6,6,0,0,4.06,3.75,7.54
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
165,1.740,7.15,0.00143,71.2000,83.0,5.3,139,122,63,1.50,1.40,0.6,737.7,652.9,1313.9,2,6,6,12,24,8,3,4,2,1.31,1.66,3.44,2,1,6,0,5,0,3.75,3.72,7.54
166,1.740,12.40,0.00143,71.2000,66.0,5.3,139,125,63,1.50,1.35,0.6,737.7,719.7,1313.9,2,9,6,12,45,8,3,5,2,1.31,2.28,3.44,2,1,6,0,8,0,3.75,4.30,7.54
167,0.534,6.11,0.00143,164.1125,87.0,5.3,133,134,63,1.45,1.35,0.6,520.2,650.9,1313.9,1,5,6,3,23,8,2,4,2,0.98,1.63,3.44,1,2,6,0,3,0,3.01,3.60,7.54
168,7.440,2.70,0.00143,68.0000,57.8,5.3,119,126,63,1.40,1.25,0.6,717.3,577.5,1313.9,7,3,6,25,13,8,4,3,2,1.55,1.61,3.44,2,3,6,5,0,0,3.72,3.23,7.54


In [31]:
def get_base_features_AAB2O4(data:pd.DataFrame):
    result = []
    for i,row in data.iterrows():
        tmp = {}
        o = 'O'
        if row['A1'] != '0' and row['A2'] != '0':
            A1 = row['A1']
            A2 = row['A2']
            a1 = row['a1']
            a2 = row['a2']
            b = row['B']
            tmp["A_Density"] = a1 * ele_df1[ele_df1['symbol']==A1]['Density'].values[0] + \
            a2 * ele_df1[ele_df1['symbol']==A2]['Density'].values[0]
            tmp["B_Density"] = ele_df1[ele_df1['symbol']==b]['Density'].values[0]
            tmp["O_Density"] = ele_df1[ele_df1['symbol']==o]['Density'].values[0]
            tmp["A_dipole Polarizability"] = a1 * ele_df1[ele_df1['symbol']==A1]['dipole_polarizability'].values[0] + \
            a2 * ele_df1[ele_df1['symbol']==A2]['dipole_polarizability'].values[0]
            tmp["B_dipole Polarizability"] = ele_df1[ele_df1['symbol']==b]['dipole_polarizability'].values[0]
            tmp["O_dipole Polarizability"] = ele_df1[ele_df1['symbol']==o]['dipole_polarizability'].values[0]
            tmp["A_covalent Radius"] = a1 * ele_df1[ele_df1['symbol']==A1]['covalent_radius'].values[0] + \
            a2 * ele_df1[ele_df1['symbol']==A2]['covalent_radius'].values[0]
            tmp["B_covalent Radius"] = ele_df1[ele_df1['symbol']==b]['covalent_radius'].values[0]
            tmp["O_covalent Radius"] = ele_df1[ele_df1['symbol']==o]['covalent_radius'].values[0]
            tmp["A_atomic Radius"] = a1 * ele_df2[ele_df1['symbol']==A1]['Atomic radius (Å)'].values[0] + \
            a2 * ele_df2[ele_df1['symbol']==A2]['Atomic radius (Å)'].values[0]
            tmp["B_atomic Radius"] = ele_df2[ele_df1['symbol']==b]['Atomic radius (Å)'].values[0]
            tmp["O_atomic Radius"] = ele_df2[ele_df1['symbol']==o]['Atomic radius (Å)'].values[0]
            tmp["A_FirstIonization"] = a1 * ele_df1[ele_df1['symbol']==A1]['FirstIonization'].values[0] + \
            a2 * ele_df1[ele_df1['symbol']==A2]['FirstIonization'].values[0]
            tmp["B_FirstIonization"] = ele_df1[ele_df1['symbol']==b]['FirstIonization'].values[0]
            tmp["O_FirstIonization"] = ele_df1[ele_df1['symbol']==o]['FirstIonization'].values[0]
            tmp["A_number of Valence Electrons"] = a1 * ele_df1[ele_df1['symbol']==A1]['number_of_valence_electrons'].values[0] + \
            a2 * ele_df1[ele_df1['symbol']==A2]['number_of_valence_electrons'].values[0]
            tmp["B_number of Valence Electrons"] = ele_df1[ele_df1['symbol']==b]['number_of_valence_electrons'].values[0]
            tmp["O_number of Valence Electrons"] = ele_df1[ele_df1['symbol']==o]['number_of_valence_electrons'].values[0]
            tmp["A_number"] = a1 * ele_df1[ele_df1['symbol']==A1]['number'].values[0] + \
            a2 * ele_df1[ele_df1['symbol']==A2]['number'].values[0]
            tmp["B_number"] = ele_df1[ele_df1['symbol']==b]['number'].values[0]
            tmp["O_number"] = ele_df1[ele_df1['symbol']==o]['number'].values[0]
            tmp["A_Period"] = a1 * ele_df1[ele_df1['symbol']==A1]['Period'].values[0] + \
            a2 * ele_df1[ele_df1['symbol']==A2]['Period'].values[0]
            tmp["B_Period"] = ele_df1[ele_df1['symbol']==b]['Period'].values[0]
            tmp["O_Period"] = ele_df1[ele_df1['symbol']==o]['Period'].values[0]
            tmp["A_Electronegativity"] = a1 * ele_df1[ele_df1['symbol']==A1]['Electronegativity'].values[0] + \
            a2 * ele_df1[ele_df1['symbol']==A2]['Electronegativity'].values[0]
            tmp["B_Electronegativity"] = ele_df1[ele_df1['symbol']==b]['Electronegativity'].values[0]
            tmp["O_Electronegativity"] = ele_df1[ele_df1['symbol']==o]['Electronegativity'].values[0]
            tmp["A_number of s+p Electrons"] = a1*(ele_df2[ele_df2['symbol']==A1]['Number of s electrons'].values[0] \
                                                   + ele_df2[ele_df2['symbol']==A1]['Number of p electrons'].values[0])+ \
            a2*(ele_df2[ele_df2['symbol']==A2]['Number of s electrons'].values[0] + \
                ele_df2[ele_df2['symbol']==A2]['Number of p electrons'].values[0])
            tmp["B_number of s+p Electrons"] = ele_df2[ele_df2['symbol']==b]['Number of s electrons'].values[0] \
            + ele_df2[ele_df2['symbol']==b]['Number of p electrons'].values[0]
            tmp["O_number of s+p Electrons"] = ele_df2[ele_df2['symbol']==o]['Number of s electrons'].values[0] \
            + ele_df2[ele_df2['symbol']==o]['Number of p electrons'].values[0]
            tmp["A_number of d Electrons"] = a1*ele_df2[ele_df2['symbol']==A1]['Number of d electrons'].values[0] + \
            a2*ele_df2[ele_df2['symbol']==A2]['Number of d electrons'].values[0]
            tmp["B_number of d Electrons"] = ele_df2[ele_df2['symbol']==b]['Number of d electrons'].values[0]
            tmp["O_number of d Electrons"] = ele_df2[ele_df2['symbol']==o]['Number of d electrons'].values[0]
            tmp["A_Mulliken EN"] = a1*ele_df2[ele_df2['symbol']==A1]['Mulliken EN'].values[0] \
            + a2*ele_df2[ele_df2['symbol']==A2]['Mulliken EN'].values[0]
            tmp["B_Mulliken EN"] = ele_df2[ele_df2['symbol']==b]['Mulliken EN'].values[0]
            tmp["O_Mulliken EN"] = ele_df2[ele_df2['symbol']==o]['Mulliken EN'].values[0]
            result.append(tmp)
        elif row['A1'] == '0' and row['A2'] != '0':
                a = row['A2']
                b = row['B']
                tmp["A_Density"] = ele_df1[ele_df1['symbol']==a]['Density'].values[0]
                tmp["B_Density"] = ele_df1[ele_df1['symbol']==b]['Density'].values[0]
                tmp["O_Density"] = ele_df1[ele_df1['symbol']==o]['Density'].values[0]
                tmp["A_dipole Polarizability"] = ele_df1[ele_df1['symbol']==a]['dipole_polarizability'].values[0]
                tmp["B_dipole Polarizability"] = ele_df1[ele_df1['symbol']==b]['dipole_polarizability'].values[0]
                tmp["O_dipole Polarizability"] = ele_df1[ele_df1['symbol']==o]['dipole_polarizability'].values[0]
                tmp["A_covalent Radius"] = ele_df1[ele_df1['symbol']==a]['covalent_radius'].values[0]
                tmp["B_covalent Radius"] = ele_df1[ele_df1['symbol']==b]['covalent_radius'].values[0]
                tmp["O_covalent Radius"] = ele_df1[ele_df1['symbol']==o]['covalent_radius'].values[0]
                tmp["A_atomic Radius"] = ele_df2[ele_df1['symbol']==a]['Atomic radius (Å)'].values[0]
                tmp["B_atomic Radius"] = ele_df2[ele_df1['symbol']==b]['Atomic radius (Å)'].values[0]
                tmp["O_atomic Radius"] = ele_df2[ele_df1['symbol']==o]['Atomic radius (Å)'].values[0]
                tmp["A_FirstIonization"] = ele_df1[ele_df1['symbol']==a]['FirstIonization'].values[0]
                tmp["B_FirstIonization"] = ele_df1[ele_df1['symbol']==b]['FirstIonization'].values[0]
                tmp["O_FirstIonization"] = ele_df1[ele_df1['symbol']==o]['FirstIonization'].values[0]
                tmp["A_number of Valence Electrons"] = ele_df1[ele_df1['symbol']==a]['number_of_valence_electrons'].values[0]
                tmp["B_number of Valence Electrons"] = ele_df1[ele_df1['symbol']==b]['number_of_valence_electrons'].values[0]
                tmp["O_number of Valence Electrons"] = ele_df1[ele_df1['symbol']==o]['number_of_valence_electrons'].values[0]
                tmp["A_number"] = ele_df1[ele_df1['symbol']==a]['number'].values[0]
                tmp["B_number"] = ele_df1[ele_df1['symbol']==b]['number'].values[0]
                tmp["O_number"] = ele_df1[ele_df1['symbol']==o]['number'].values[0]
                tmp["A_Period"] = ele_df1[ele_df1['symbol']==a]['Period'].values[0]
                tmp["B_Period"] = ele_df1[ele_df1['symbol']==b]['Period'].values[0]
                tmp["O_Period"] = ele_df1[ele_df1['symbol']==o]['Period'].values[0]
                tmp["A_Electronegativity"] = ele_df1[ele_df1['symbol']==a]['Electronegativity'].values[0]
                tmp["B_Electronegativity"] = ele_df1[ele_df1['symbol']==b]['Electronegativity'].values[0]
                tmp["O_Electronegativity"] = ele_df1[ele_df1['symbol']==o]['Electronegativity'].values[0]
                tmp["A_number of s+p Electrons"] = ele_df2[ele_df2['symbol']==a]['Number of s electrons'].values[0] 
                + ele_df2[ele_df2['symbol']==a]['Number of p electrons'].values[0]
                tmp["B_number of s+p Electrons"] = ele_df2[ele_df2['symbol']==b]['Number of s electrons'].values[0] 
                + ele_df2[ele_df2['symbol']==b]['Number of p electrons'].values[0]
                tmp["O_number of s+p Electrons"] = ele_df2[ele_df2['symbol']==o]['Number of s electrons'].values[0] 
                + ele_df2[ele_df2['symbol']==o]['Number of p electrons'].values[0]
                tmp["A_number of d Electrons"] = ele_df2[ele_df2['symbol']==a]['Number of d electrons'].values[0]
                tmp["B_number of d Electrons"] = ele_df2[ele_df2['symbol']==b]['Number of d electrons'].values[0]
                tmp["O_number of d Electrons"] = ele_df2[ele_df2['symbol']==o]['Number of d electrons'].values[0]
                tmp["A_Mulliken EN"] = ele_df2[ele_df2['symbol']==a]['Mulliken EN'].values[0]
                tmp["B_Mulliken EN"] = ele_df2[ele_df2['symbol']==b]['Mulliken EN'].values[0]
                tmp["O_Mulliken EN"] = ele_df2[ele_df2['symbol']==o]['Mulliken EN'].values[0]
                result.append(tmp)
        elif row['A2'] == '0' and row['A1'] != '0':
                a = row['A1']
                b = row['B']
                tmp["A_Density"] = ele_df1[ele_df1['symbol']==a]['Density'].values[0]
                tmp["B_Density"] = ele_df1[ele_df1['symbol']==b]['Density'].values[0]
                tmp["O_Density"] = ele_df1[ele_df1['symbol']==o]['Density'].values[0]
                tmp["A_dipole Polarizability"] = ele_df1[ele_df1['symbol']==a]['dipole_polarizability'].values[0]
                tmp["B_dipole Polarizability"] = ele_df1[ele_df1['symbol']==b]['dipole_polarizability'].values[0]
                tmp["O_dipole Polarizability"] = ele_df1[ele_df1['symbol']==o]['dipole_polarizability'].values[0]
                tmp["A_covalent Radius"] = ele_df1[ele_df1['symbol']==a]['covalent_radius'].values[0]
                tmp["B_covalent Radius"] = ele_df1[ele_df1['symbol']==b]['covalent_radius'].values[0]
                tmp["O_covalent Radius"] = ele_df1[ele_df1['symbol']==o]['covalent_radius'].values[0]
                tmp["A_atomic Radius"] = ele_df2[ele_df1['symbol']==a]['Atomic radius (Å)'].values[0]
                tmp["B_atomic Radius"] = ele_df2[ele_df1['symbol']==b]['Atomic radius (Å)'].values[0]
                tmp["O_atomic Radius"] = ele_df2[ele_df1['symbol']==o]['Atomic radius (Å)'].values[0]
                tmp["A_FirstIonization"] = ele_df1[ele_df1['symbol']==a]['FirstIonization'].values[0]
                tmp["B_FirstIonization"] = ele_df1[ele_df1['symbol']==b]['FirstIonization'].values[0]
                tmp["O_FirstIonization"] = ele_df1[ele_df1['symbol']==o]['FirstIonization'].values[0]
                tmp["A_number of Valence Electrons"] = ele_df1[ele_df1['symbol']==a]['number_of_valence_electrons'].values[0]
                tmp["B_number of Valence Electrons"] = ele_df1[ele_df1['symbol']==b]['number_of_valence_electrons'].values[0]
                tmp["O_number of Valence Electrons"] = ele_df1[ele_df1['symbol']==o]['number_of_valence_electrons'].values[0]
                tmp["A_number"] = ele_df1[ele_df1['symbol']==a]['number'].values[0]
                tmp["B_number"] = ele_df1[ele_df1['symbol']==b]['number'].values[0]
                tmp["O_number"] = ele_df1[ele_df1['symbol']==o]['number'].values[0]
                tmp["A_Period"] = ele_df1[ele_df1['symbol']==a]['Period'].values[0]
                tmp["B_Period"] = ele_df1[ele_df1['symbol']==b]['Period'].values[0]
                tmp["O_Period"] = ele_df1[ele_df1['symbol']==o]['Period'].values[0]
                tmp["A_Electronegativity"] = ele_df1[ele_df1['symbol']==a]['Electronegativity'].values[0]
                tmp["B_Electronegativity"] = ele_df1[ele_df1['symbol']==b]['Electronegativity'].values[0]
                tmp["O_Electronegativity"] = ele_df1[ele_df1['symbol']==o]['Electronegativity'].values[0]
                tmp["A_number of s+p Electrons"] = ele_df2[ele_df2['symbol']==a]['Number of s electrons'].values[0] 
                + ele_df2[ele_df2['symbol']==a]['Number of p electrons'].values[0]
                tmp["B_number of s+p Electrons"] = ele_df2[ele_df2['symbol']==b]['Number of s electrons'].values[0] 
                + ele_df2[ele_df2['symbol']==b]['Number of p electrons'].values[0]
                tmp["O_number of s+p Electrons"] = ele_df2[ele_df2['symbol']==o]['Number of s electrons'].values[0] 
                + ele_df2[ele_df2['symbol']==o]['Number of p electrons'].values[0]
                tmp["A_number of d Electrons"] = ele_df2[ele_df2['symbol']==a]['Number of d electrons'].values[0]
                tmp["B_number of d Electrons"] = ele_df2[ele_df2['symbol']==b]['Number of d electrons'].values[0]
                tmp["O_number of d Electrons"] = ele_df2[ele_df2['symbol']==o]['Number of d electrons'].values[0]
                tmp["A_Mulliken EN"] = ele_df2[ele_df2['symbol']==a]['Mulliken EN'].values[0]
                tmp["B_Mulliken EN"] = ele_df2[ele_df2['symbol']==b]['Mulliken EN'].values[0]
                tmp["O_Mulliken EN"] = ele_df2[ele_df2['symbol']==o]['Mulliken EN'].values[0]
                result.append(tmp)
    return pd.DataFrame(result)
base_features_AAB2O4 = get_base_features_AAB2O4(a_data_spinel)
base_features_AAB2O4

Unnamed: 0,A_Density,B_Density,O_Density,A_dipole Polarizability,B_dipole Polarizability,O_dipole Polarizability,A_covalent Radius,B_covalent Radius,O_covalent Radius,A_atomic Radius,B_atomic Radius,O_atomic Radius,A_FirstIonization,B_FirstIonization,O_FirstIonization,A_number of Valence Electrons,B_number of Valence Electrons,O_number of Valence Electrons,A_number,B_number,O_number,A_Period,B_Period,O_Period,A_Electronegativity,B_Electronegativity,O_Electronegativity,A_number of s+p Electrons,B_number of s+p Electrons,O_number of s+p Electrons,A_number of d Electrons,B_number of d Electrons,O_number of d Electrons,A_Mulliken EN,B_Mulliken EN,O_Mulliken EN
0,9.5471,7.44,0.00143,65.7700,68.0,5.3,130.7,119,63,1.620,1.40,0.6,707.48,717.3,1313.9,10.0,7,6,43.4,25,8,4.8,4,2,1.830,1.55,3.44,1.0,2,6,9.0,5,0,4.281,3.72,7.54
1,1.7621,7.44,0.00143,50.2360,68.0,5.3,107.3,119,63,1.125,1.40,0.6,859.13,717.3,1313.9,1.9,7,6,4.7,25,8,2.1,4,2,1.506,1.55,3.44,1.9,2,6,0.0,5,0,4.695,3.72,7.54
2,1.4831,7.44,0.00143,160.9900,68.0,5.3,169.4,119,63,1.800,1.40,0.6,580.40,717.3,1313.9,1.9,7,6,19.1,25,8,3.9,4,2,0.993,1.55,3.44,1.9,2,6,0.0,5,0,2.265,3.72,7.54
3,8.0711,7.44,0.00143,65.7700,68.0,5.3,115.4,119,63,1.395,1.40,0.6,733.94,717.3,1313.9,8.2,7,6,25.4,25,8,3.9,4,2,1.785,1.55,3.44,1.9,2,6,6.3,5,0,4.155,3.72,7.54
4,8.1611,7.44,0.00143,58.1200,68.0,5.3,116.3,119,63,1.395,1.40,0.6,720.53,717.3,1313.9,10.0,7,6,27.2,25,8,3.9,4,2,1.803,1.55,3.44,1.0,2,6,9.0,5,0,4.317,3.72,7.54
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
28836,0.5340,10.20,0.00143,164.1125,87.0,5.3,133.0,138,63,1.450,1.45,0.6,520.20,684.3,1313.9,1.0,6,6,3.0,42,8,2.0,5,2,0.980,2.16,3.44,1.0,1,2,0.0,5,0,3.010,3.90,7.54
28837,5.2400,6.15,0.00143,184.0000,215.0,5.3,168.0,180,63,1.850,1.95,0.6,547.10,538.1,1313.9,3.0,3,6,63.0,57,8,6.0,6,2,1.200,1.10,3.44,2.0,2,2,0.0,1,0,3.100,3.10,7.54
28838,0.5340,8.91,0.00143,164.1125,49.0,5.3,133.0,110,63,1.450,1.35,0.6,520.20,737.1,1313.9,1.0,10,6,3.0,28,8,2.0,4,2,0.980,1.91,3.44,1.0,2,2,0.0,8,0,3.010,4.40,7.54
28839,0.9710,7.29,0.00143,162.7000,53.0,5.3,155.0,140,63,1.800,1.45,0.6,495.80,708.6,1313.9,1.0,4,6,11.0,50,8,3.0,5,2,0.930,1.96,3.44,1.0,2,2,0.0,10,0,2.850,4.30,7.54


In [37]:
def get_base_features_ABB2O4(data:pd.DataFrame):
    result = []
    for i,row in data.iterrows():
        tmp = {}
        o = 'O'
        if str(row['B1']) != '0' and str(row['B2']) != '0':
            a = row['A']
            b1 = row['b1']
            b2 = row['b2']
            B1 = row['B1']
            B2 = row['B2']
            tmp["B_Density"] = b1 * ele_df1[ele_df1['symbol']==B1]['Density'].values[0] + \
            b2 * ele_df1[ele_df1['symbol']==B2]['Density'].values[0]
            tmp["A_Density"] = ele_df1[ele_df1['symbol']==a]['Density'].values[0]
            tmp["O_Density"] = ele_df1[ele_df1['symbol']==o]['Density'].values[0]
            tmp["B_dipole Polarizability"] = b1 * ele_df1[ele_df1['symbol']==B1]['dipole_polarizability'].values[0] + \
            b2 * ele_df1[ele_df1['symbol']==B2]['dipole_polarizability'].values[0]
            tmp["A_dipole Polarizability"] = ele_df1[ele_df1['symbol']==a]['dipole_polarizability'].values[0]
            tmp["O_dipole Polarizability"] = ele_df1[ele_df1['symbol']==o]['dipole_polarizability'].values[0]
            tmp["B_covalent Radius"] = b1 * ele_df1[ele_df1['symbol']==B1]['covalent_radius'].values[0] + \
            b2 * ele_df1[ele_df1['symbol']==B2]['covalent_radius'].values[0]
            tmp["A_covalent Radius"] = ele_df1[ele_df1['symbol']==a]['covalent_radius'].values[0]
            tmp["O_covalent Radius"] = ele_df1[ele_df1['symbol']==o]['covalent_radius'].values[0]
            tmp["B_atomic Radius"] = b1 * ele_df2[ele_df1['symbol']==B1]['Atomic radius (Å)'].values[0] + \
            b2 * ele_df2[ele_df1['symbol']==B2]['Atomic radius (Å)'].values[0]
            tmp["A_atomic Radius"] = ele_df2[ele_df1['symbol']==a]['Atomic radius (Å)'].values[0]
            tmp["O_atomic Radius"] = ele_df2[ele_df1['symbol']==o]['Atomic radius (Å)'].values[0]
            tmp["B_FirstIonization"] = b1 * ele_df1[ele_df1['symbol']==B1]['FirstIonization'].values[0] + \
            b2 * ele_df1[ele_df1['symbol']==B2]['FirstIonization'].values[0]
            tmp["A_FirstIonization"] = ele_df1[ele_df1['symbol']==a]['FirstIonization'].values[0]
            tmp["O_FirstIonization"] = ele_df1[ele_df1['symbol']==o]['FirstIonization'].values[0]
            tmp["B_number of Valence Electrons"] = b1 * ele_df1[ele_df1['symbol']==B1]['number_of_valence_electrons'].values[0] + \
            b2 * ele_df1[ele_df1['symbol']==B2]['number_of_valence_electrons'].values[0]
            tmp["A_number of Valence Electrons"] = ele_df1[ele_df1['symbol']==a]['number_of_valence_electrons'].values[0]
            tmp["O_number of Valence Electrons"] = ele_df1[ele_df1['symbol']==o]['number_of_valence_electrons'].values[0]
            tmp["B_number"] = b1 * ele_df1[ele_df1['symbol']==B1]['number'].values[0] + \
            b2 * ele_df1[ele_df1['symbol']==B2]['number'].values[0]
            tmp["A_number"] = ele_df1[ele_df1['symbol']==a]['number'].values[0]
            tmp["O_number"] = ele_df1[ele_df1['symbol']==o]['number'].values[0]
            tmp["B_Period"] = b1 * ele_df1[ele_df1['symbol']==B1]['Period'].values[0] + \
            b2 * ele_df1[ele_df1['symbol']==B2]['Period'].values[0]
            tmp["A_Period"] = ele_df1[ele_df1['symbol']==a]['Period'].values[0]
            tmp["O_Period"] = ele_df1[ele_df1['symbol']==o]['Period'].values[0]
            tmp["B_Electronegativity"] = b1 * ele_df1[ele_df1['symbol']==B1]['Electronegativity'].values[0] + \
            b2 * ele_df1[ele_df1['symbol']==B2]['Electronegativity'].values[0]
            tmp["A_Electronegativity"] = ele_df1[ele_df1['symbol']==a]['Electronegativity'].values[0]
            tmp["O_Electronegativity"] = ele_df1[ele_df1['symbol']==o]['Electronegativity'].values[0]
            tmp["B_number of s+p Electrons"] = b1*(ele_df2[ele_df2['symbol']==B1]['Number of s electrons'].values[0] \
                                                   + ele_df2[ele_df2['symbol']==B1]['Number of p electrons'].values[0])+ \
            b2*(ele_df2[ele_df2['symbol']==B2]['Number of s electrons'].values[0] + \
                ele_df2[ele_df2['symbol']==B2]['Number of p electrons'].values[0])
            tmp["A_number of s+p Electrons"] = ele_df2[ele_df2['symbol']==a]['Number of s electrons'].values[0] \
            + ele_df2[ele_df2['symbol']==a]['Number of p electrons'].values[0]
            tmp["O_number of s+p Electrons"] = ele_df2[ele_df2['symbol']==o]['Number of s electrons'].values[0] \
            + ele_df2[ele_df2['symbol']==o]['Number of p electrons'].values[0]
            tmp["B_number of d Electrons"] = b1*ele_df2[ele_df2['symbol']==B1]['Number of d electrons'].values[0] + \
            b2*ele_df2[ele_df2['symbol']==B2]['Number of d electrons'].values[0]
            tmp["A_number of d Electrons"] = ele_df2[ele_df2['symbol']==a]['Number of d electrons'].values[0]
            tmp["O_number of d Electrons"] = ele_df2[ele_df2['symbol']==o]['Number of d electrons'].values[0]
            tmp["B_Mulliken EN"] = b1*ele_df2[ele_df2['symbol']==B1]['Mulliken EN'].values[0] \
            + b2*ele_df2[ele_df2['symbol']==B2]['Mulliken EN'].values[0]
            tmp["A_Mulliken EN"] = ele_df2[ele_df2['symbol']==a]['Mulliken EN'].values[0]
            tmp["O_Mulliken EN"] = ele_df2[ele_df2['symbol']==o]['Mulliken EN'].values[0]
            result.append(tmp)
        elif str(row['B1']) == '0' and str(row['B2']) != '0':
                a = row['A']
                b = row['B2']
                tmp["A_Density"] = ele_df1[ele_df1['symbol']==a]['Density'].values[0]
                tmp["B_Density"] = ele_df1[ele_df1['symbol']==b]['Density'].values[0]
                tmp["O_Density"] = ele_df1[ele_df1['symbol']==o]['Density'].values[0]
                tmp["A_dipole Polarizability"] = ele_df1[ele_df1['symbol']==a]['dipole_polarizability'].values[0]
                tmp["B_dipole Polarizability"] = ele_df1[ele_df1['symbol']==b]['dipole_polarizability'].values[0]
                tmp["O_dipole Polarizability"] = ele_df1[ele_df1['symbol']==o]['dipole_polarizability'].values[0]
                tmp["A_covalent Radius"] = ele_df1[ele_df1['symbol']==a]['covalent_radius'].values[0]
                tmp["B_covalent Radius"] = ele_df1[ele_df1['symbol']==b]['covalent_radius'].values[0]
                tmp["O_covalent Radius"] = ele_df1[ele_df1['symbol']==o]['covalent_radius'].values[0]
                tmp["A_atomic Radius"] = ele_df2[ele_df1['symbol']==a]['Atomic radius (Å)'].values[0]
                tmp["B_atomic Radius"] = ele_df2[ele_df1['symbol']==b]['Atomic radius (Å)'].values[0]
                tmp["O_atomic Radius"] = ele_df2[ele_df1['symbol']==o]['Atomic radius (Å)'].values[0]
                tmp["A_FirstIonization"] = ele_df1[ele_df1['symbol']==a]['FirstIonization'].values[0]
                tmp["B_FirstIonization"] = ele_df1[ele_df1['symbol']==b]['FirstIonization'].values[0]
                tmp["O_FirstIonization"] = ele_df1[ele_df1['symbol']==o]['FirstIonization'].values[0]
                tmp["A_number of Valence Electrons"] = ele_df1[ele_df1['symbol']==a]['number_of_valence_electrons'].values[0]
                tmp["B_number of Valence Electrons"] = ele_df1[ele_df1['symbol']==b]['number_of_valence_electrons'].values[0]
                tmp["O_number of Valence Electrons"] = ele_df1[ele_df1['symbol']==o]['number_of_valence_electrons'].values[0]
                tmp["A_number"] = ele_df1[ele_df1['symbol']==a]['number'].values[0]
                tmp["B_number"] = ele_df1[ele_df1['symbol']==b]['number'].values[0]
                tmp["O_number"] = ele_df1[ele_df1['symbol']==o]['number'].values[0]
                tmp["A_Period"] = ele_df1[ele_df1['symbol']==a]['Period'].values[0]
                tmp["B_Period"] = ele_df1[ele_df1['symbol']==b]['Period'].values[0]
                tmp["O_Period"] = ele_df1[ele_df1['symbol']==o]['Period'].values[0]
                tmp["A_Electronegativity"] = ele_df1[ele_df1['symbol']==a]['Electronegativity'].values[0]
                tmp["B_Electronegativity"] = ele_df1[ele_df1['symbol']==b]['Electronegativity'].values[0]
                tmp["O_Electronegativity"] = ele_df1[ele_df1['symbol']==o]['Electronegativity'].values[0]
                tmp["A_number of s+p Electrons"] = ele_df2[ele_df2['symbol']==a]['Number of s electrons'].values[0] 
                + ele_df2[ele_df2['symbol']==a]['Number of p electrons'].values[0]
                tmp["B_number of s+p Electrons"] = ele_df2[ele_df2['symbol']==b]['Number of s electrons'].values[0] 
                + ele_df2[ele_df2['symbol']==b]['Number of p electrons'].values[0]
                tmp["O_number of s+p Electrons"] = ele_df2[ele_df2['symbol']==o]['Number of s electrons'].values[0] 
                + ele_df2[ele_df2['symbol']==o]['Number of p electrons'].values[0]
                tmp["A_number of d Electrons"] = ele_df2[ele_df2['symbol']==a]['Number of d electrons'].values[0]
                tmp["B_number of d Electrons"] = ele_df2[ele_df2['symbol']==b]['Number of d electrons'].values[0]
                tmp["O_number of d Electrons"] = ele_df2[ele_df2['symbol']==o]['Number of d electrons'].values[0]
                tmp["A_Mulliken EN"] = ele_df2[ele_df2['symbol']==a]['Mulliken EN'].values[0]
                tmp["B_Mulliken EN"] = ele_df2[ele_df2['symbol']==b]['Mulliken EN'].values[0]
                tmp["O_Mulliken EN"] = ele_df2[ele_df2['symbol']==o]['Mulliken EN'].values[0]
                result.append(tmp)
        elif str(row['B2']) == '0' and str(row['B1']) != '0':
                a = row['A']
                b = row['B1']
                tmp["A_Density"] = ele_df1[ele_df1['symbol']==a]['Density'].values[0]
                tmp["B_Density"] = ele_df1[ele_df1['symbol']==b]['Density'].values[0]
                tmp["O_Density"] = ele_df1[ele_df1['symbol']==o]['Density'].values[0]
                tmp["A_dipole Polarizability"] = ele_df1[ele_df1['symbol']==a]['dipole_polarizability'].values[0]
                tmp["B_dipole Polarizability"] = ele_df1[ele_df1['symbol']==b]['dipole_polarizability'].values[0]
                tmp["O_dipole Polarizability"] = ele_df1[ele_df1['symbol']==o]['dipole_polarizability'].values[0]
                tmp["A_covalent Radius"] = ele_df1[ele_df1['symbol']==a]['covalent_radius'].values[0]
                tmp["B_covalent Radius"] = ele_df1[ele_df1['symbol']==b]['covalent_radius'].values[0]
                tmp["O_covalent Radius"] = ele_df1[ele_df1['symbol']==o]['covalent_radius'].values[0]
                tmp["A_atomic Radius"] = ele_df2[ele_df1['symbol']==a]['Atomic radius (Å)'].values[0]
                tmp["B_atomic Radius"] = ele_df2[ele_df1['symbol']==b]['Atomic radius (Å)'].values[0]
                tmp["O_atomic Radius"] = ele_df2[ele_df1['symbol']==o]['Atomic radius (Å)'].values[0]
                tmp["A_FirstIonization"] = ele_df1[ele_df1['symbol']==a]['FirstIonization'].values[0]
                tmp["B_FirstIonization"] = ele_df1[ele_df1['symbol']==b]['FirstIonization'].values[0]
                tmp["O_FirstIonization"] = ele_df1[ele_df1['symbol']==o]['FirstIonization'].values[0]
                tmp["A_number of Valence Electrons"] = ele_df1[ele_df1['symbol']==a]['number_of_valence_electrons'].values[0]
                tmp["B_number of Valence Electrons"] = ele_df1[ele_df1['symbol']==b]['number_of_valence_electrons'].values[0]
                tmp["O_number of Valence Electrons"] = ele_df1[ele_df1['symbol']==o]['number_of_valence_electrons'].values[0]
                tmp["A_number"] = ele_df1[ele_df1['symbol']==a]['number'].values[0]
                tmp["B_number"] = ele_df1[ele_df1['symbol']==b]['number'].values[0]
                tmp["O_number"] = ele_df1[ele_df1['symbol']==o]['number'].values[0]
                tmp["A_Period"] = ele_df1[ele_df1['symbol']==a]['Period'].values[0]
                tmp["B_Period"] = ele_df1[ele_df1['symbol']==b]['Period'].values[0]
                tmp["O_Period"] = ele_df1[ele_df1['symbol']==o]['Period'].values[0]
                tmp["A_Electronegativity"] = ele_df1[ele_df1['symbol']==a]['Electronegativity'].values[0]
                tmp["B_Electronegativity"] = ele_df1[ele_df1['symbol']==b]['Electronegativity'].values[0]
                tmp["O_Electronegativity"] = ele_df1[ele_df1['symbol']==o]['Electronegativity'].values[0]
                tmp["A_number of s+p Electrons"] = ele_df2[ele_df2['symbol']==a]['Number of s electrons'].values[0] 
                + ele_df2[ele_df2['symbol']==a]['Number of p electrons'].values[0]
                tmp["B_number of s+p Electrons"] = ele_df2[ele_df2['symbol']==b]['Number of s electrons'].values[0] 
                + ele_df2[ele_df2['symbol']==b]['Number of p electrons'].values[0]
                tmp["O_number of s+p Electrons"] = ele_df2[ele_df2['symbol']==o]['Number of s electrons'].values[0] 
                + ele_df2[ele_df2['symbol']==o]['Number of p electrons'].values[0]
                tmp["A_number of d Electrons"] = ele_df2[ele_df2['symbol']==a]['Number of d electrons'].values[0]
                tmp["B_number of d Electrons"] = ele_df2[ele_df2['symbol']==b]['Number of d electrons'].values[0]
                tmp["O_number of d Electrons"] = ele_df2[ele_df2['symbol']==o]['Number of d electrons'].values[0]
                tmp["A_Mulliken EN"] = ele_df2[ele_df2['symbol']==a]['Mulliken EN'].values[0]
                tmp["B_Mulliken EN"] = ele_df2[ele_df2['symbol']==b]['Mulliken EN'].values[0]
                tmp["O_Mulliken EN"] = ele_df2[ele_df2['symbol']==o]['Mulliken EN'].values[0]
                result.append(tmp)
    return pd.DataFrame(result)
base_features_ABB2O4 = get_base_features_ABB2O4(b_data_spinel)
base_features_ABB2O4

Unnamed: 0,B_Density,A_Density,O_Density,B_dipole Polarizability,A_dipole Polarizability,O_dipole Polarizability,B_covalent Radius,A_covalent Radius,O_covalent Radius,B_atomic Radius,A_atomic Radius,O_atomic Radius,B_FirstIonization,A_FirstIonization,O_FirstIonization,B_number of Valence Electrons,A_number of Valence Electrons,O_number of Valence Electrons,B_number,A_number,O_number,B_Period,A_Period,O_Period,B_Electronegativity,A_Electronegativity,O_Electronegativity,B_number of s+p Electrons,A_number of s+p Electrons,O_number of s+p Electrons,B_number of d Electrons,A_number of d Electrons,O_number of d Electrons,B_Mulliken EN,A_Mulliken EN,O_Mulliken EN
0,20.694,0.971,0.00143,111.30,162.7,5.3,255.1,155,63,3.180,1.80,0.6,1460.63,495.8,1313.9,21.6,1,6,91.8,11,8,9.9,3,2,3.822,0.93,3.44,2.1,1,6,19.5,0,0,8.808,2.85,7.54
1,3.670,0.971,0.00143,312.32,162.7,5.3,336.8,155,63,3.560,1.80,0.6,1192.35,495.8,1313.9,4.5,1,6,40.5,11,8,8.0,3,2,2.055,0.93,3.44,4.0,1,6,0.5,0,0,4.552,2.85,7.54
2,17.255,0.971,0.00143,94.20,162.7,5.3,270.3,155,63,3.085,1.80,0.6,1720.55,495.8,1313.9,23.5,1,6,93.7,11,8,9.9,3,2,3.366,0.93,3.44,4.0,1,6,19.5,0,0,8.599,2.85,7.54
3,17.578,0.971,0.00143,111.30,162.7,5.3,222.8,155,63,2.705,1.80,0.6,1516.49,495.8,1313.9,17.8,1,6,53.8,11,8,8.0,3,2,3.727,0.93,3.44,4.0,1,6,13.8,0,0,8.542,2.85,7.54
4,14.329,0.971,0.00143,164.50,162.7,5.3,243.7,155,63,2.800,1.80,0.6,1312.24,495.8,1313.9,12.1,1,6,48.1,11,8,8.0,3,2,3.309,0.93,3.44,2.1,1,6,10.0,0,0,7.440,2.85,7.54
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85207,9.320,6.110,0.00143,144.00,87.0,5.3,164.0,134,63,1.750,1.35,0.6,596.70,650.9,1313.9,3.0,5,6,69.0,23,8,6.0,4,2,1.250,1.63,3.44,2.0,2,2,0.0,3,0,3.100,3.60,7.54
85208,6.110,6.110,0.00143,87.00,87.0,5.3,134.0,134,63,1.350,1.35,0.6,650.90,650.9,1313.9,5.0,5,6,23.0,23,8,4.0,4,2,1.630,1.63,3.44,2.0,2,2,3.0,3,0,3.600,3.60,7.54
85209,19.300,6.110,0.00143,68.00,87.0,5.3,137.0,134,63,1.350,1.35,0.6,770.00,650.9,1313.9,6.0,5,6,74.0,23,8,6.0,4,2,2.360,1.63,3.44,2.0,2,2,4.0,3,0,4.400,3.60,7.54
85210,4.470,6.110,0.00143,162.00,87.0,5.3,163.0,134,63,1.800,1.35,0.6,600.00,650.9,1313.9,3.0,5,6,39.0,23,8,5.0,4,2,1.220,1.63,3.44,2.0,2,2,1.0,3,0,3.190,3.60,7.54


In [38]:
base_features_AB2O4['formula'] = spinels['formula']
base_features_AAB2O4['formula'] = a_data_spinel['formula'].values
base_features_ABB2O4['formula'] = b_data_spinel['formula'].values
base_features_AAB2O4 = base_features_AAB2O4[base_features_AB2O4.columns]
base_features_ABB2O4 = base_features_ABB2O4[base_features_AB2O4.columns]
result_features = pd.concat([base_features_AB2O4,base_features_AAB2O4,base_features_ABB2O4],axis=0).drop_duplicates(keep='first').reset_index(drop=True)
formula = result_features['formula'].values
result_features = result_features.drop(columns='formula')
result_features

Unnamed: 0,A_Density,B_Density,O_Density,A_dipole Polarizability,B_dipole Polarizability,O_dipole Polarizability,A_covalent Radius,B_covalent Radius,O_covalent Radius,A_atomic Radius,B_atomic Radius,O_atomic Radius,A_FirstIonization,B_FirstIonization,O_FirstIonization,A_number of Valence Electrons,B_number of Valence Electrons,O_number of Valence Electrons,A_number,B_number,O_number,A_Period,B_Period,O_Period,A_Electronegativity,B_Electronegativity,O_Electronegativity,A_number of s+p Electrons,B_number of s+p Electrons,O_number of s+p Electrons,A_number of d Electrons,B_number of d Electrons,O_number of d Electrons,A_Mulliken EN,B_Mulliken EN,O_Mulliken EN
0,0.971,7.44,0.00143,162.7,68.00,5.3,155.0,119.0,63,1.80,1.40,0.6,495.8,717.3,1313.9,1.0,7.0,6,11.0,25.0,8,3.0,4.0,2,0.93,1.55,3.44,1.0,2.0,6,0.0,5.0,0,2.85,3.72,7.54
1,8.690,12.40,0.00143,46.0,66.00,5.3,136.0,125.0,63,1.55,1.35,0.6,867.8,719.7,1313.9,12.0,9.0,6,48.0,45.0,8,5.0,5.0,2,1.69,2.28,3.44,2.0,1.0,6,10.0,8.0,0,4.33,4.30,7.54
2,1.540,7.31,0.00143,160.8,65.00,5.3,171.0,142.0,63,1.80,1.55,0.6,589.8,558.3,1313.9,2.0,3.0,6,20.0,49.0,8,4.0,5.0,2,1.00,1.78,3.44,2.0,3.0,6,0.0,10.0,0,2.20,3.10,7.54
3,2.330,8.91,0.00143,37.3,49.00,5.3,116.0,110.0,63,1.10,1.35,0.6,786.5,737.1,1313.9,4.0,10.0,6,14.0,28.0,8,3.0,4.0,2,1.90,1.91,3.44,4.0,2.0,6,0.0,8.0,0,4.77,4.40,7.54
4,7.870,1.74,0.00143,62.0,71.20,5.3,116.0,139.0,63,1.40,1.50,0.6,762.5,737.7,1313.9,8.0,2.0,6,26.0,12.0,8,4.0,3.0,2,1.83,1.31,3.44,2.0,2.0,6,6.0,0.0,0,4.06,3.75,7.54
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113718,6.110,5.24,0.00143,87.0,184.00,5.3,134.0,168.0,63,1.35,1.85,0.6,650.9,547.1,1313.9,5.0,3.0,6,23.0,63.0,8,4.0,6.0,2,1.63,1.20,3.44,2.0,2.0,2,3.0,0.0,0,3.60,3.10,7.54
113719,6.110,8.80,0.00143,87.0,156.00,5.3,134.0,166.0,63,1.35,1.75,0.6,650.9,581.0,1313.9,5.0,3.0,6,23.0,67.0,8,4.0,6.0,2,1.63,1.23,3.44,2.0,2.0,2,3.0,0.0,0,3.60,3.10,7.54
113720,6.110,12.00,0.00143,87.0,26.14,5.3,134.0,120.0,63,1.35,1.40,0.6,650.9,804.4,1313.9,5.0,10.0,6,23.0,46.0,8,4.0,5.0,2,1.63,2.20,3.44,2.0,0.0,2,3.0,10.0,0,3.60,4.45,7.54
113721,6.110,7.26,0.00143,87.0,200.00,5.3,134.0,173.0,63,1.35,1.85,0.6,650.9,540.0,1313.9,5.0,3.0,6,23.0,61.0,8,4.0,6.0,2,1.63,1.13,3.44,2.0,2.0,2,3.0,0.0,0,3.60,3.10,7.54


In [39]:
# 为了体现差异，对点位特征做减法并取绝对值
def get_combined_features(data: pd.DataFrame):
    columns = data.columns
    A_features = []
    B_features = []
    O_features = []
    for column in columns:
        if str(column).__contains__('A'):
            A_features.append(column)
        elif str(column).__contains__('B'):
            B_features.append(column)
        elif str(column).__contains__('O'):
            O_features.append(column)

    for a_feature, b_feature, o_feature in zip(A_features, B_features, O_features):
        feature_name = str(a_feature).split('_')[1]
        feature_name = feature_name
        data['A-B_'+feature_name] = np.abs(data[a_feature] - data[b_feature])
        data['(A+B)-O_'+feature_name] = np.abs((data[a_feature] + data[b_feature])- data[o_feature])


    # tf = (np.sqrt(3) * (data['B_ion_radius'] + data['O_ion_radius'])) / (2 * (data['A_ion_radius'] + data['O_ion_radius']))  # 容忍因子
    # data['tf'] = tf
    # 去除常数列
    data = data.drop(columns=O_features)
    return data

features = get_combined_features(result_features)
features

Unnamed: 0,A_Density,B_Density,A_dipole Polarizability,B_dipole Polarizability,A_covalent Radius,B_covalent Radius,A_atomic Radius,B_atomic Radius,A_FirstIonization,B_FirstIonization,A_number of Valence Electrons,B_number of Valence Electrons,A_number,B_number,A_Period,B_Period,A_Electronegativity,B_Electronegativity,A_number of s+p Electrons,B_number of s+p Electrons,A_number of d Electrons,B_number of d Electrons,A_Mulliken EN,B_Mulliken EN,A-O_Density,A-B_Density,B-O_Density,(A+B)-O_Density,A-O_dipole Polarizability,A-B_dipole Polarizability,...,B-O_FirstIonization,(A+B)-O_FirstIonization,A-O_number of Valence Electrons,A-B_number of Valence Electrons,B-O_number of Valence Electrons,(A+B)-O_number of Valence Electrons,A-O_number,A-B_number,B-O_number,(A+B)-O_number,A-O_Period,A-B_Period,B-O_Period,(A+B)-O_Period,A-O_Electronegativity,A-B_Electronegativity,B-O_Electronegativity,(A+B)-O_Electronegativity,A-O_number of s+p Electrons,A-B_number of s+p Electrons,B-O_number of s+p Electrons,(A+B)-O_number of s+p Electrons,A-O_number of d Electrons,A-B_number of d Electrons,B-O_number of d Electrons,(A+B)-O_number of d Electrons,A-O_Mulliken EN,A-B_Mulliken EN,B-O_Mulliken EN,(A+B)-O_Mulliken EN
0,0.971,7.44,162.7,68.00,155.0,119.0,1.80,1.40,495.8,717.3,1.0,7.0,11.0,25.0,3.0,4.0,0.93,1.55,1.0,2.0,0.0,5.0,2.85,3.72,0.96957,6.469,7.43857,8.40957,157.4,94.70,...,596.6,100.8,5.0,6.0,1.0,2.0,3.0,14.0,17.0,28.0,1.0,1.0,2.0,5.0,2.51,0.62,1.89,0.96,5.0,1.0,4.0,3.0,0.0,5.0,5.0,5.0,4.69,0.87,3.82,0.97
1,8.690,12.40,46.0,66.00,136.0,125.0,1.55,1.35,867.8,719.7,12.0,9.0,48.0,45.0,5.0,5.0,1.69,2.28,2.0,1.0,10.0,8.0,4.33,4.30,8.68857,3.710,12.39857,21.08857,40.7,20.00,...,594.2,273.6,6.0,3.0,3.0,15.0,40.0,3.0,37.0,85.0,3.0,0.0,3.0,8.0,1.75,0.59,1.16,0.53,4.0,1.0,5.0,3.0,10.0,2.0,8.0,18.0,3.21,0.03,3.24,1.09
2,1.540,7.31,160.8,65.00,171.0,142.0,1.80,1.55,589.8,558.3,2.0,3.0,20.0,49.0,4.0,5.0,1.00,1.78,2.0,3.0,0.0,10.0,2.20,3.10,1.53857,5.770,7.30857,8.84857,155.5,95.80,...,755.6,165.8,4.0,1.0,3.0,1.0,12.0,29.0,41.0,61.0,2.0,1.0,3.0,7.0,2.44,0.78,1.66,0.66,4.0,1.0,3.0,1.0,0.0,10.0,10.0,10.0,5.34,0.90,4.44,2.24
3,2.330,8.91,37.3,49.00,116.0,110.0,1.10,1.35,786.5,737.1,4.0,10.0,14.0,28.0,3.0,4.0,1.90,1.91,4.0,2.0,0.0,8.0,4.77,4.40,2.32857,6.580,8.90857,11.23857,32.0,11.70,...,576.8,209.7,2.0,6.0,4.0,8.0,6.0,14.0,20.0,34.0,1.0,1.0,2.0,5.0,1.54,0.01,1.53,0.37,2.0,2.0,4.0,0.0,0.0,8.0,8.0,8.0,2.77,0.37,3.14,1.63
4,7.870,1.74,62.0,71.20,116.0,139.0,1.40,1.50,762.5,737.7,8.0,2.0,26.0,12.0,4.0,3.0,1.83,1.31,2.0,2.0,6.0,0.0,4.06,3.75,7.86857,6.130,1.73857,9.60857,56.7,9.20,...,576.2,186.3,2.0,6.0,4.0,4.0,18.0,14.0,4.0,30.0,2.0,1.0,1.0,5.0,1.61,0.52,2.13,0.30,4.0,0.0,4.0,2.0,6.0,6.0,0.0,6.0,3.48,0.31,3.79,0.27
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
113718,6.110,5.24,87.0,184.00,134.0,168.0,1.35,1.85,650.9,547.1,5.0,3.0,23.0,63.0,4.0,6.0,1.63,1.20,2.0,2.0,3.0,0.0,3.60,3.10,6.10857,0.870,5.23857,11.34857,81.7,97.00,...,766.8,115.9,1.0,2.0,3.0,2.0,15.0,40.0,55.0,78.0,2.0,2.0,4.0,8.0,1.81,0.43,2.24,0.61,0.0,0.0,0.0,2.0,3.0,3.0,0.0,3.0,3.94,0.50,4.44,0.84
113719,6.110,8.80,87.0,156.00,134.0,166.0,1.35,1.75,650.9,581.0,5.0,3.0,23.0,67.0,4.0,6.0,1.63,1.23,2.0,2.0,3.0,0.0,3.60,3.10,6.10857,2.690,8.79857,14.90857,81.7,69.00,...,732.9,82.0,1.0,2.0,3.0,2.0,15.0,44.0,59.0,82.0,2.0,2.0,4.0,8.0,1.81,0.40,2.21,0.58,0.0,0.0,0.0,2.0,3.0,3.0,0.0,3.0,3.94,0.50,4.44,0.84
113720,6.110,12.00,87.0,26.14,134.0,120.0,1.35,1.40,650.9,804.4,5.0,10.0,23.0,46.0,4.0,5.0,1.63,2.20,2.0,0.0,3.0,10.0,3.60,4.45,6.10857,5.890,11.99857,18.10857,81.7,60.86,...,509.5,141.4,1.0,5.0,4.0,9.0,15.0,23.0,38.0,61.0,2.0,1.0,3.0,7.0,1.81,0.57,1.24,0.39,0.0,2.0,2.0,0.0,3.0,7.0,10.0,13.0,3.94,0.85,3.09,0.51
113721,6.110,7.26,87.0,200.00,134.0,173.0,1.35,1.85,650.9,540.0,5.0,3.0,23.0,61.0,4.0,6.0,1.63,1.13,2.0,2.0,3.0,0.0,3.60,3.10,6.10857,1.150,7.25857,13.36857,81.7,113.00,...,773.9,123.0,1.0,2.0,3.0,2.0,15.0,38.0,53.0,76.0,2.0,2.0,4.0,8.0,1.81,0.50,2.31,0.68,0.0,0.0,0.0,2.0,3.0,3.0,0.0,3.0,3.94,0.50,4.44,0.84


In [40]:
features['formula'] = formula
features.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 113723 entries, 0 to 113722
Data columns (total 73 columns):
 #   Column                               Non-Null Count   Dtype  
---  ------                               --------------   -----  
 0   A_Density                            113723 non-null  float64
 1   B_Density                            113723 non-null  float64
 2   A_dipole Polarizability              113723 non-null  float64
 3   B_dipole Polarizability              113723 non-null  float64
 4   A_covalent Radius                    113723 non-null  float64
 5   B_covalent Radius                    113723 non-null  float64
 6   A_atomic Radius                      113723 non-null  float64
 7   B_atomic Radius                      113723 non-null  float64
 8   A_FirstIonization                    113723 non-null  float64
 9   B_FirstIonization                    113723 non-null  float64
 10  A_number of Valence Electrons        113723 non-null  float64
 11  B_number of V

In [41]:
features.to_csv('./data/cation_replacement_features.csv')