In [1]:
import numpy as np
import pandas as pd
import pymatgen as pmg

from matminer.datasets.dataframe_loader import load_elastic_tensor
from matminer.utils.conversions import str_to_composition

from matminer.featurizers import composition as cf
from matminer.featurizers import structure as struc
from matminer.featurizers.base import MultipleFeaturizer
from pymatgen import MPRester

key = 'ubX07DfzKy3mZAlw'

In [2]:
data = load_elastic_tensor() 

# Compute composition
data['composition'] = str_to_composition(data['formula'])

del_columns = ['formula', 'nsites', 'space_group', 
               'G_Reuss', 'G_Voigt', 'K_Reuss', 'K_Voigt',
               'compliance_tensor', 'elastic_tensor', 'elastic_tensor_original'
                ]

data = data.drop(del_columns, 1)

#### Holder Means of first 8 properties in Table 1 of De Jong et al. paper 
'group_number', 'atomic_mass', 'atomic_radius', 'row_number', 'boiling_temp', 'melting_temp', 'electronegativity', 'atomic_number'
$$\mu_p(x) = [\frac{(\Sigma^n_{i=1}w_ix_i^p)}{(\Sigma^n_{i=1}w_i)}]^\frac{1}{p}$$

In [6]:
ef = cf.ElementProperty(data_source='pymatgen', 
                        features=['group', 'atomic_mass', 'atomic_radius', 'row',
                                  'boiling_point', 'melting_point', 'X', 'Z'],  
                        stats=['holder_mean::%d'%d for d in range(-4, 4+1)] + ['geom_std_dev', 'std_dev'])
data = ef.featurize_dataframe(data, col_id='composition')


divide by zero encountered in double_scalars


invalid value encountered in double_scalars


divide by zero encountered in double_scalars


divide by zero encountered in double_scalars


invalid value encountered in double_scalars


invalid value encountered in double_scalars


divide by zero encountered in double_scalars


invalid value encountered in double_scalars



#### Cohesive Energy 

In [5]:
ft = cf.CohesiveEnergy(mapi_key=key)

data = ft.featurize_dataframe(data, col_id='composition', ignore_errors=True)

#### Formation energy per atom, Energy above hull, band gap, density 

In [6]:
# Formation energy per atom, Energy above hull, band gap, density
property_list = ['formation_energy_per_atom', 'e_above_hull', 'band_gap', 'density']

mp = MPRester(api_key=key)

n = data.shape[0]
E_f, E_hull, E_g, rho = pd.Series(index=np.arange(n)), pd.Series(index=np.arange(n)), pd.Series(index=np.arange(n)), pd.Series(index=np.arange(n))

for i in range(n):
    ls = mp.get_data(data['material_id'][i])
    if (ls==[]):
        E_f[i], E_hull[i], E_g[i], rho[i] = 0.0, 0.0, 0.0, 0.0
    else:
        E_f[i] = ls[0]['formation_energy_per_atom']
        E_hull[i] = ls[0]['e_above_hull']
        E_g[i] = ls[0]['band_gap']
        rho[i] = ls[0]['density']

data['formation energy per atom'] = E_f
data['energy above hull'] = E_hull
data['band gap'] = E_g
data['density'] = rho

#### log (V) per atom 

In [7]:
data['log volume per atom'] = pd.Series(index=np.arange(n))
for i in range(len(data['volume'])):
    data['log volume per atom'][i] = np.log(data['volume'][i]/data['composition'][i].num_atoms)

#### Voronoi based site coordination, bond lengths and bond angles 

In [8]:
ft = MultipleFeaturizer([
    struc.SiteStatsFingerprint.from_preset("CoordinationNumber_ward-prb-2017"),
    struc.StructuralHeterogeneity()
])

data = ft.featurize_dataframe(data, col_id='structure')

In [9]:
print ("FINAL SHAPE OF DATA: ", data.shape)
data.head(1)

Unnamed: 0,material_id,volume,structure,elastic_anisotropy,G_VRH,K_VRH,poisson_ratio,composition,group_number_-4,atomic_mass_-4,...,avg_dev CN_VoronoiNN,mean absolute deviation in relative bond length,max relative bond length,min relative bond length,minimum neighbor distance variation,maximum neighbor distance variation,range neighbor distance variation,mean neighbor distance variation,avg_dev neighbor distance variation,mean absolute deviation in relative cell size
0,mp-10003,194.419802,"[[0.94814328 2.07280467 2.5112 ] Nb, [5.273...",0.030688,97.141604,194.268884,0.285701,"(Nb, Co, Si)",7.335929,54.688924,...,1.451453,0.048863,1.036647,0.911985,0.009816,0.080081,0.070264,0.058781,0.028399,0.105329


Save data as pickle file

In [12]:
data.to_pickle('./dejong_featurized_data.pkl')