# Magpie: https://hachmannlab.github.io/chemml/chemml.chem.magpie_python.html

In [1]:
# Environment: dqn
# Documentation: https://hachmannlab.github.io/chemml/chemml.chem.magpie_python.html
import chemml.chem.magpie_python as magpie
import chemml
import pandas as pd

pd.set_option('display.max_rows', 1000)

## One compound

In [2]:
# Define chemical with chem composition
chemical = magpie.CompositionEntry(composition='BaTiO3')
chemical

<chemml.chem.magpie_python.data.materials.CompositionEntry.CompositionEntry at 0x7f0ac02b3df0>

In [3]:
# Generate Meredig embedding
meredig = chemml.chem.magpie_python.MeredigAttributeGenerator()
meredig.generate_features(entries = [chemical])

Unnamed: 0,mean_AtomicWeight,mean_Column,mean_Row,maxdiff_AtomicNumber,mean_AtomicNumber,maxdiff_CovalentRadius,mean_CovalentRadius,maxdiff_Electronegativity,mean_Electronegativity,mean_NsValence,mean_NpValence,mean_NdValence,mean_NfValence
0,46.63844,10.8,3.2,48.0,20.4,149.0,114.6,2.55,2.55,2.0,2.4,0.4,0.0


In [4]:
# Element Fraction
eleme_frac = chemml.chem.magpie_python.ElementFractionAttributeGenerator()
eleme_frac.generate_features(entries = [chemical])

Unnamed: 0,X_H,X_He,X_Li,X_Be,X_B,X_C,X_N,X_O,X_F,X_Ne,...,X_Lr,X_Rf,X_Db,X_Sg,X_Bh,X_Hs,X_Mt,X_Ds,X_Rg,X_Cn
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.6,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
# Valence Shell
val_shell = chemml.chem.magpie_python.ValenceShellAttributeGenerator()
val_shell.generate_features(entries = [chemical])

Unnamed: 0,frac_sValence,frac_pValence,frac_dValence,frac_fValence
0,0.416667,0.5,0.083333,0.0


In [6]:
# ChargeDependentAttributeGenerator
ChargeDependentAttributeGenerator = chemml.chem.magpie_python.ChargeDependentAttributeGenerator()
ChargeDependentAttributeGenerator.generate_features(entries = [chemical])

Unnamed: 0,min_Charge,max_Charge,maxdiff_Charge,mean_Charge,var_Charge,CumulativeIonizationEnergy,CumulativeElectronAffinity,AnionCationElectronegativityDiff
0,-2.0,4.0,6.0,2.4,0.64,53.189007,282.0,2.225


In [7]:
# Elemental properties
ElementalPropertyAttributeGenerator = chemml.chem.magpie_python.ElementalPropertyAttributeGenerator()
ElementalPropertyAttributeGenerator.generate_features(entries = [chemical])

Unnamed: 0,mean_Number,maxdiff_Number,dev_Number,max_Number,min_Number,most_Number,mean_MendeleevNumber,maxdiff_MendeleevNumber,dev_MendeleevNumber,max_MendeleevNumber,...,dev_GSmagmom,max_GSmagmom,min_GSmagmom,most_GSmagmom,mean_SpaceGroupNumber,maxdiff_SpaceGroupNumber,dev_SpaceGroupNumber,max_SpaceGroupNumber,min_SpaceGroupNumber,most_SpaceGroupNumber
0,20.4,48.0,14.88,56.0,8.0,8.0,62.6,78.0,29.28,87.0,...,7e-06,2.3e-05,0.0,0.0,91.8,217.0,95.76,229.0,12.0,12.0


In [8]:
# Ionicity
IonicityAttributeGenerator = chemml.chem.magpie_python.IonicityAttributeGenerator()
IonicityAttributeGenerator.generate_features(entries = [chemical])

Unnamed: 0,CanFormIonic,MaxIonicChar,MeanIonicChar
0,1,0.803211,0.343457


In [9]:
# StoichiometricAttributeGenerator
StoichiometricAttributeGenerator = chemml.chem.magpie_python.StoichiometricAttributeGenerator()
StoichiometricAttributeGenerator.generate_features(entries = [chemical])

Unnamed: 0,NComp,Comp_L2Norm,Comp_L3Norm,Comp_L5Norm,Comp_L7Norm,Comp_L10Norm
0,3,0.663325,0.614463,0.600984,0.600078,0.600002


In [10]:
# YangOmegaAttributeGenerator
YangOmegaAttributeGenerator = chemml.chem.magpie_python.YangOmegaAttributeGenerator()
YangOmegaAttributeGenerator.generate_features(entries = [chemical])

Unnamed: 0,Yang_Omega,Yang_delta
0,0.538034,0.569593


# One compound - combined features

In [11]:
# Define chemical with chem composition
chemical = magpie.CompositionEntry(composition='BaTiO3')

features =  {}

meredig = chemml.chem.magpie_python.MeredigAttributeGenerator()
elem_frac = chemml.chem.magpie_python.ElementFractionAttributeGenerator()
val_shell = chemml.chem.magpie_python.ValenceShellAttributeGenerator()
charge_dep = chemml.chem.magpie_python.ChargeDependentAttributeGenerator()
elem_prop = chemml.chem.magpie_python.ElementalPropertyAttributeGenerator()
ionicity = chemml.chem.magpie_python.IonicityAttributeGenerator()
stoichio = chemml.chem.magpie_python.StoichiometricAttributeGenerator()
yang_omega = chemml.chem.magpie_python.YangOmegaAttributeGenerator()

features['meredig']    = meredig.generate_features(entries = [chemical])
features['elem_frac']  = elem_frac.generate_features(entries = [chemical])
features['val_shell']  = val_shell.generate_features(entries = [chemical])
features['charge_dep'] = charge_dep.generate_features(entries = [chemical])
features['elem_prop']  = elem_prop.generate_features(entries = [chemical])
features['ionicity']   = ionicity.generate_features(entries = [chemical])
features['stoichio']   = stoichio.generate_features(entries = [chemical])
features['yang_omega'] = yang_omega.generate_features(entries = [chemical])

count = 0
for i in features.values():
    a = len(i.columns)
    # print(a)
    count += a

# print(count)
features

{'meredig':    mean_AtomicWeight  mean_Column  mean_Row  maxdiff_AtomicNumber  \
 0           46.63844         10.8       3.2                  48.0   
 
    mean_AtomicNumber  maxdiff_CovalentRadius  mean_CovalentRadius  \
 0               20.4                   149.0                114.6   
 
    maxdiff_Electronegativity  mean_Electronegativity  mean_NsValence  \
 0                       2.55                    2.55             2.0   
 
    mean_NpValence  mean_NdValence  mean_NfValence  
 0             2.4             0.4             0.0  ,
 'elem_frac':    X_H  X_He  X_Li  X_Be  X_B  X_C  X_N  X_O  X_F  X_Ne  ...  X_Lr  X_Rf  \
 0  0.0   0.0   0.0   0.0  0.0  0.0  0.0  0.6  0.0   0.0  ...   0.0   0.0   
 
    X_Db  X_Sg  X_Bh  X_Hs  X_Mt  X_Ds  X_Rg  X_Cn  
 0   0.0   0.0   0.0   0.0   0.0   0.0   0.0   0.0  
 
 [1 rows x 112 columns],
 'val_shell':    frac_sValence  frac_pValence  frac_dValence  frac_fValence
 0       0.416667            0.5       0.083333            0.0,
 'charge

In [12]:
feat_to_included = [
'meredig',
'elem_frac',
'val_shell',
'charge_dep',
'elem_prop',
'ionicity',
'stoichio',
'yang_omega',
]

concat  = pd.concat([features[feat] for feat in feat_to_included], axis = 1)
concat


Unnamed: 0,mean_AtomicWeight,mean_Column,mean_Row,maxdiff_AtomicNumber,mean_AtomicNumber,maxdiff_CovalentRadius,mean_CovalentRadius,maxdiff_Electronegativity,mean_Electronegativity,mean_NsValence,...,MaxIonicChar,MeanIonicChar,NComp,Comp_L2Norm,Comp_L3Norm,Comp_L5Norm,Comp_L7Norm,Comp_L10Norm,Yang_Omega,Yang_delta
0,46.63844,10.8,3.2,48.0,20.4,149.0,114.6,2.55,2.55,2.0,...,0.803211,0.343457,3,0.663325,0.614463,0.600984,0.600078,0.600002,0.538034,0.569593


# Matminer: https://hackingmaterials.lbl.gov/matminer/matminer.featurizers.html

In [1]:
from matminer.featurizers.base import MultipleFeaturizer
import matminer.featurizers.composition as cf
from pymatgen.core.composition import Composition
import pandas as pd

In [2]:
feature_calculators = MultipleFeaturizer([
    cf.element.Stoichiometry(),
    cf.composite.ElementProperty.from_preset("magpie"),
    cf.orbital.ValenceOrbital(props=["avg"]),
    cf.ion.IonProperty(fast=True)
])

In [3]:
chemical = Composition("Li1Li1")
features = feature_calculators.featurize(chemical)
features_labels = feature_calculators.feature_labels()

if 'compound possible' in feature_calculators.feature_labels(): # Encode 'compound possible' with 0 = False and 1 = True
    compound_poss = features[-3]
    if compound_poss == True:
        features[-3] = 1
    else:
        features[-3] = 0

print('features:', features)
# print('feature labels:', features_labels)

features: [1, 1.0, 1.0, 1.0, 1.0, 1.0, 3.0, 3.0, 0.0, 3.0, 0.0, 3.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 6.941, 6.941, 0.0, 6.941, 0.0, 6.941, 453.69, 453.69, 0.0, 453.69, 0.0, 453.69, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 2.0, 2.0, 0.0, 2.0, 0.0, 2.0, 128.0, 128.0, 0.0, 128.0, 0.0, 128.0, 0.98, 0.98, 0.0, 0.98, 0.0, 0.98, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 0.0, 1.0, 0.0, 1.0, 16.5933333333, 16.5933333333, 0.0, 16.5933333333, 0.0, 16.5933333333, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 229.0, 229.0, 0.0, 229.0, 0.0, 229.0, 1.0, 0.0, 0.0, 0.0, 1, 0, 0]


In [14]:
# Display pandas dataframe
pd.options.display.max_columns = 999
df = pd.DataFrame()
for val, col in zip(features, feature_calculators.feature_labels()):
    df[col] = pd.Series(val)
df


  df[col] = pd.Series(val)


Unnamed: 0,0-norm,2-norm,3-norm,5-norm,7-norm,10-norm,MagpieData minimum Number,MagpieData maximum Number,MagpieData range Number,MagpieData mean Number,MagpieData avg_dev Number,MagpieData mode Number,MagpieData minimum MendeleevNumber,MagpieData maximum MendeleevNumber,MagpieData range MendeleevNumber,MagpieData mean MendeleevNumber,MagpieData avg_dev MendeleevNumber,MagpieData mode MendeleevNumber,MagpieData minimum AtomicWeight,MagpieData maximum AtomicWeight,MagpieData range AtomicWeight,MagpieData mean AtomicWeight,MagpieData avg_dev AtomicWeight,MagpieData mode AtomicWeight,MagpieData minimum MeltingT,MagpieData maximum MeltingT,MagpieData range MeltingT,MagpieData mean MeltingT,MagpieData avg_dev MeltingT,MagpieData mode MeltingT,MagpieData minimum Column,MagpieData maximum Column,MagpieData range Column,MagpieData mean Column,MagpieData avg_dev Column,MagpieData mode Column,MagpieData minimum Row,MagpieData maximum Row,MagpieData range Row,MagpieData mean Row,MagpieData avg_dev Row,MagpieData mode Row,MagpieData minimum CovalentRadius,MagpieData maximum CovalentRadius,MagpieData range CovalentRadius,MagpieData mean CovalentRadius,MagpieData avg_dev CovalentRadius,MagpieData mode CovalentRadius,MagpieData minimum Electronegativity,MagpieData maximum Electronegativity,MagpieData range Electronegativity,MagpieData mean Electronegativity,MagpieData avg_dev Electronegativity,MagpieData mode Electronegativity,MagpieData minimum NsValence,MagpieData maximum NsValence,MagpieData range NsValence,MagpieData mean NsValence,MagpieData avg_dev NsValence,MagpieData mode NsValence,MagpieData minimum NpValence,MagpieData maximum NpValence,MagpieData range NpValence,MagpieData mean NpValence,MagpieData avg_dev NpValence,MagpieData mode NpValence,MagpieData minimum NdValence,MagpieData maximum NdValence,MagpieData range NdValence,MagpieData mean NdValence,MagpieData avg_dev NdValence,MagpieData mode NdValence,MagpieData minimum NfValence,MagpieData maximum NfValence,MagpieData range NfValence,MagpieData mean NfValence,MagpieData avg_dev NfValence,MagpieData mode NfValence,MagpieData minimum NValence,MagpieData maximum NValence,MagpieData range NValence,MagpieData mean NValence,MagpieData avg_dev NValence,MagpieData mode NValence,MagpieData minimum NsUnfilled,MagpieData maximum NsUnfilled,MagpieData range NsUnfilled,MagpieData mean NsUnfilled,MagpieData avg_dev NsUnfilled,MagpieData mode NsUnfilled,MagpieData minimum NpUnfilled,MagpieData maximum NpUnfilled,MagpieData range NpUnfilled,MagpieData mean NpUnfilled,MagpieData avg_dev NpUnfilled,MagpieData mode NpUnfilled,MagpieData minimum NdUnfilled,MagpieData maximum NdUnfilled,MagpieData range NdUnfilled,MagpieData mean NdUnfilled,MagpieData avg_dev NdUnfilled,MagpieData mode NdUnfilled,MagpieData minimum NfUnfilled,MagpieData maximum NfUnfilled,MagpieData range NfUnfilled,MagpieData mean NfUnfilled,MagpieData avg_dev NfUnfilled,MagpieData mode NfUnfilled,MagpieData minimum NUnfilled,MagpieData maximum NUnfilled,MagpieData range NUnfilled,MagpieData mean NUnfilled,MagpieData avg_dev NUnfilled,MagpieData mode NUnfilled,MagpieData minimum GSvolume_pa,MagpieData maximum GSvolume_pa,MagpieData range GSvolume_pa,MagpieData mean GSvolume_pa,MagpieData avg_dev GSvolume_pa,MagpieData mode GSvolume_pa,MagpieData minimum GSbandgap,MagpieData maximum GSbandgap,MagpieData range GSbandgap,MagpieData mean GSbandgap,MagpieData avg_dev GSbandgap,MagpieData mode GSbandgap,MagpieData minimum GSmagmom,MagpieData maximum GSmagmom,MagpieData range GSmagmom,MagpieData mean GSmagmom,MagpieData avg_dev GSmagmom,MagpieData mode GSmagmom,MagpieData minimum SpaceGroupNumber,MagpieData maximum SpaceGroupNumber,MagpieData range SpaceGroupNumber,MagpieData mean SpaceGroupNumber,MagpieData avg_dev SpaceGroupNumber,MagpieData mode SpaceGroupNumber,avg s valence electrons,avg p valence electrons,avg d valence electrons,avg f valence electrons,compound possible,max ionic char,avg ionic char
0,1,1.0,1.0,1.0,1.0,1.0,3.0,3.0,0.0,3.0,0.0,3.0,1.0,1.0,0.0,1.0,0.0,1.0,6.941,6.941,0.0,6.941,0.0,6.941,453.69,453.69,0.0,453.69,0.0,453.69,1.0,1.0,0.0,1.0,0.0,1.0,2.0,2.0,0.0,2.0,0.0,2.0,128.0,128.0,0.0,128.0,0.0,128.0,0.98,0.98,0.0,0.98,0.0,0.98,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,1.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0,0.0,1.0,16.593333,16.593333,0.0,16.593333,0.0,16.593333,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,229.0,229.0,0.0,229.0,0.0,229.0,1.0,0.0,0.0,0.0,1,0,0
