In [1]:
import qmpy_rester as qr
import requests
import pandas as pd
import numpy as np

We use **qmpy_rester** to query data from **OQMD**. Here we query Heusler alloy with formation energy less than 0 eV according to the **prototype** entry.  In this example, we first queried 10 Heusler alloys.

In [2]:
with qr.QMPYRester() as q:
    kwargs = {'delta_e':'<0','prototype':'L2_1_FullHeusler_Cu2MnAl','limit':'10','offset':'2500'}
    list_of_data = q.get_oqmd_phases(**kwargs)

Your filters are:
    limit=10
    offset=2500
    filter=delta_e<0 AND prototype=L2_1_FullHeusler_Cu2MnAl


The following is a Heusler alloy data queried by this way.

In [3]:
list_of_data['data'][0]

{'name': 'PrAsPt2',
 'entry_id': 365077,
 'calculation_id': 436937,
 'icsd_id': None,
 'formationenergy_id': 4193049,
 'duplicate_entry_id': 365077,
 'composition': 'As1 Pr1 Pt2',
 'composition_generic': 'ABC2',
 'prototype': 'L2_1_FullHeusler_Cu2MnAl',
 'spacegroup': 'Fm-3m',
 'volume': 79.4988,
 'ntypes': 3,
 'natoms': 4,
 'unit_cell': [[0.0, 3.412795, 3.412795],
  [3.412795, 0.0, 3.412795],
  [3.412795, 3.412795, 0.0]],
 'sites': ['As @ 0.25 0.25 0.25',
  'Pr @ 0.75 0.75 0.75',
  'Pt @ 0 0 0',
  'Pt @ 0.5 0.5 0.5'],
 'band_gap': 0.0,
 'delta_e': -0.529910223124999,
 'stability': 0.4825373198400049,
 'fit': 'standard',
 'calculation_label': 'standard'}

During the actual query process, due to the unstable network connection, we conducted the query part by part.

In [4]:
# i=0
# while True:
#     with qr.QMPYRester() as q:
#         kwargs = {'delta_e':'<0','prototype':'L2_1_FullHeusler_Cu2MnAl','limit':'2500','offset':str(i*2500)}
#         list_of_data = q.get_oqmd_phases(**kwargs)
#     print('data draw compeleted',end='')
#     if len(list_of_data['data'])==0:
#         break
#     for j in range(len(list_of_data['data'])):
#         for k in list_of_data['data'][0].keys():
#             Heusler_data[k].append(list_of_data['data'][j][k])
#             print('\r data read into dict finish',j,'/2500',end='')
#     print('\r Execution times',i,'Heusler data len',len(Heusler_data['name']))
#     i+=1

After the query is complete, we write the results to **DataFrame** for later filtering.

In [5]:
dq = {}
for key in list_of_data['data'][0].keys():
    dq[key] = []
for data in list_of_data['data']:
    for key in dq.keys():
        dq[key].append(data[key])

In [6]:
dfH = pd.DataFrame.from_dict(dq)

In [7]:
dfH[:1]

Unnamed: 0,name,entry_id,calculation_id,icsd_id,formationenergy_id,duplicate_entry_id,composition,composition_generic,prototype,spacegroup,volume,ntypes,natoms,unit_cell,sites,band_gap,delta_e,stability,fit,calculation_label
0,PrAsPt2,365077,436937,,4193049,365077,As1 Pr1 Pt2,ABC2,L2_1_FullHeusler_Cu2MnAl,Fm-3m,79.4988,3,4,"[[0.0, 3.412795, 3.412795], [3.412795, 0.0, 3....","[As @ 0.25 0.25 0.25, Pr @ 0.75 0.75 0.75, Pt ...",0.0,-0.52991,0.482537,standard,standard


The alloy is then further filtered for the specified element range and the data is further processed.

In [8]:
X=['Li','Mg','Ti','V','Mn','Fe','Co','Ni','Cu','Ru',
   'Rh','Pd','Ag','Cd','Ir','Pt','Au','Os','Sc','Cr']
Y=['Sc','Ti','V' ,'Cr','Mn','Fe','Co',
   'Ni','Cu','Zn','Y' ,'Zr','Nb','Mo','Ru','Ag','Hf',
   'Ta','W' ,'Pt','La','Ce','Pr','Nd','Sm','Gd','Tb',
   'Dy','Ho','Er','Tm','Yb','Lu']
Z=['Mg','Zn','B' ,'Al','Si','Ga','Ge','As','In','Sn',
   'Sb','Pb','Bi','Tl','P']

In [9]:
x=[];y=[];z=[];E_formation=[];E_above_hull=[];entry_id=[];lattice=[];calculation_id=[]
Heusler=[]
for i in range(len(dfH.index)):
    composition=dfH.at[i,'composition']
    a=[];b=[];c=[]
    for e in composition.split(' '):
        if e[-1]=='2' and e[:-1] in X:
            a.append(e[:-1])
        elif e[:-1] in Z:
            c.append(e[:-1])
        elif e[:-1] in Y:
            b.append(e[:-1])
    if len(a)==1 and len(b)==1 and len(c)==1:
        x.append(a[0]);y.append(b[0]);z.append(c[0])
        Heusler.append(a[0]+'2'+b[0]+c[0])
        E_formation.append(dfH.at[i,'delta_e'])
        E_above_hull.append(dfH.at[i,'stability'])
        entry_id.append(dfH.at[i,'entry_id'])
        calculation_id.append(dfH.at[i,'calculation_id'])
        v=float(dfH.at[i,'volume'])
        lattice.append(np.cbrt(v*4))

In [10]:
dfH=pd.DataFrame.from_dict({'entry_id':entry_id,'calculation_id':calculation_id,'composition':Heusler,'lattice_constant':lattice,'X':x,'Y':y,'Z':z,'E_formation':E_formation,'E_above_hull':E_above_hull})
dfH.sort_values(by=['X','Y','Z'],inplace=True)
dfH.reset_index(inplace=True,drop=True)
dfH

Unnamed: 0,entry_id,calculation_id,composition,lattice_constant,X,Y,Z,E_formation,E_above_hull
0,365077,436937,Pt2PrAs,6.82559,Pt,Pr,As,-0.52991,0.482537
1,365080,436949,Pt2YTl,6.817099,Pt,Y,Tl,-0.682228,0.135337


In order to obtain information such as charge transfer and magnetic distance, we grab data directly from the web page based on id. Of course, it doesn't feel like the best solution. Perhaps you can find other formal channels to access this data.

In [11]:
# define the grab function
def draw_information(htp):
    wtext = requests.get(htp).text
    tabel=[];input_setting=[]
    next_mag=False;grep_set_line=None
    text=wtext.split("\n")
    for i,line in enumerate(text):
        if '<td>' in line and '</td>' in line:
            char=line.split('<td>')[1].split('</td>')[0]
            tabel.append(char)
        elif '<h2>' in line and '</h2>' in line:
            char=line.split('<h2>')[1].split('</h2>')[0]
            if char == ' INCAR settings ' or char == ' Potentials ':
                grep_set_line=i+2
        elif i == grep_set_line:
            input_setting.append(line)
    
    if tabel[5]==' ':
        net_magnetic_moment=0
    else:
        net_magnetic_moment=float(tabel[5])
    elements=[tabel[35],tabel[44],tabel[53],tabel[62]]
    chargs=[tabel[42],tabel[51],tabel[60],tabel[69]]
    atom_mags=[tabel[43],tabel[52],tabel[61],tabel[70]]
    magmoms=[]
    if 'MAGMOM = ' not in input_setting[0]:
        magmoms=[0,0,0,0]
    else :
        for m in input_setting[0].split('MAGMOM = ')[1].split('</p>')[0].split(' '):
            for j in range(int(m.split('*')[0])):
                magmoms.append(m.split('*')[1])
    potentials=[]
    for p in input_setting[1].split(','):
        potentials.append(p.split('PBE')[0].split()[0])
    
    result={'net_magnetic_moment':net_magnetic_moment,'elements':elements,'chargs':chargs,
            'atom_mags':atom_mags,'magmoms':magmoms,'potentials':potentials}
    return result

In [12]:
# grab data from website
net_magnetic_moment=[];X_charge=[];Y_charge=[];Z_charge=[];X_mag=[];Y_mag=[];Z_mag=[]
X_potential=[];Y_potential=[];Z_potential=[];X_magi=[];Y_magi=[];Z_magi=[]

for i in range(len(dfH.index)):
    cal_id=dfH.at[i,'calculation_id']
    X=dfH.at[i,'X'];Y=dfH.at[i,'Y'];Z=dfH.at[i,'Z']
    htp='http://oqmd.org/analysis/calculation/'+str(cal_id)
    add=draw_information(htp)
    net_magnetic_moment=add['net_magnetic_moment']
    for j in range(4):
        if add['elements'][j].split()[0]==X:
            Xc=add['chargs'][j];Xm=add['atom_mags'][j];Xmi=add['magmoms'][j]
        elif add['elements'][j].split()[0]==Y:
            Yc=add['chargs'][j];Ym=add['atom_mags'][j];Ymi=add['magmoms'][j]
        elif add['elements'][j].split()[0]==Z:
            Zc=add['chargs'][j];Zm=add['atom_mags'][j];Zmi=add['magmoms'][j]
    for j in range(3):
        if add['potentials'][j].split('_')[0]==X:
            Xp=add['potentials'][j]
        elif add['potentials'][j].split('_')[0]==Y:
            Yp=add['potentials'][j]
        elif add['potentials'][j].split('_')[0]==Z:
            Zp=add['potentials'][j]
                
    X_charge.append(Xc);Y_charge.append(Yc);Z_charge.append(Zc)
    X_mag.append(Xm);Y_mag.append(Ym);Z_mag.append(Zm)
    X_magi.append(Xmi);Y_magi.append(Ymi);Z_magi.append(Zmi)
    X_potential.append(Xp);Y_potential.append(Yp);Z_potential.append(Zp)

Combine the two pieces of information to get the final data.

In [13]:
dd={'net_magnetic_moment':net_magnetic_moment,'X_charge':X_charge,'Y_charge':Y_charge,'Z_charge':Z_charge,'X_mag':X_mag,'Y_mag':Y_mag,'Z_mag':Z_mag,'X_magi':X_magi,'Y_magi':Y_magi,'Z_magi':Z_magi,'X_potential':X_potential,'Y_potential':Y_potential,'Z_potential':Z_potential}
dfi=pd.DataFrame.from_dict(dd)

In [14]:
dfi

Unnamed: 0,net_magnetic_moment,X_charge,Y_charge,Z_charge,X_mag,Y_mag,Z_mag,X_magi,Y_magi,Z_magi,X_potential,Y_potential,Z_potential
0,0,8.757,8.936,2.583,0,0,0,5.0,7.0,0.0,Pt,Pr_3,As
1,0,8.766,9.187,11.281,0,0,0,5.0,5.0,0.0,Pt,Y_sv,Tl_d


In [15]:
df = pd.concat([dfH, dfi], axis=1)

In [16]:
df

Unnamed: 0,entry_id,calculation_id,composition,lattice_constant,X,Y,Z,E_formation,E_above_hull,net_magnetic_moment,...,Z_charge,X_mag,Y_mag,Z_mag,X_magi,Y_magi,Z_magi,X_potential,Y_potential,Z_potential
0,365077,436937,Pt2PrAs,6.82559,Pt,Pr,As,-0.52991,0.482537,0,...,2.583,0,0,0,5.0,7.0,0.0,Pt,Pr_3,As
1,365080,436949,Pt2YTl,6.817099,Pt,Y,Tl,-0.682228,0.135337,0,...,11.281,0,0,0,5.0,5.0,0.0,Pt,Y_sv,Tl_d


Finally, **pymatgen** is used to create the block structure, fixing the position of each atom to ensure the efficiency of batch cutting.

In [17]:
from pymatgen.core import Lattice,Structure
from pymatgen.symmetry.analyzer import SpacegroupAnalyzer

def make_heusler(elements,lattice_constant):
    X = elements[0] ; Y = elements[1] ; Z = elements[2]

    coordinate = [
        [0.75]*2+[0.25],[0.25]*2+[0.75],[0.25]*3,[0.75]*3,
        [0.25]+[0.75]*2,[0.75]+[0.25]*2,[0.75,0.25,0.75],[0.25,0.75,0.25],
        [0]*3,[0]+[0.5]*2,[0.5,0,0.5],[0.5]*2+[0],
        [0]*2+[0.5],[0.5]+[0]*2,[0,0.5,0],[0.5]*3
    ]
    lattice=Lattice.from_parameters(a=lattice_constant,b=lattice_constant,c=lattice_constant,
                                   alpha=90,beta=90,gamma=90)
    element=[X]*8+[Y]*4+[Z]*4

    structure = Structure(lattice,element,coordinate)
    structure_tp = structure.get_primitive_structure()
    
    return structure

In [18]:
make_heusler([df.at[0, 'X'], df.at[0, 'Y'], df.at[0, 'Z']], df.at[0, 'lattice_constant'])

Structure Summary
Lattice
    abc : 6.825589853621066 6.825589853621066 6.825589853621066
 angles : 90.0 90.0 90.0
 volume : 317.9951999999999
      A : 6.825589853621066 0.0 4.179468383264845e-16
      B : 1.0976385641011075e-15 6.825589853621066 4.179468383264845e-16
      C : 0.0 0.0 6.825589853621066
    pbc : True True True
PeriodicSite: Pt (5.1192, 5.1192, 1.7064) [0.7500, 0.7500, 0.2500]
PeriodicSite: Pt (1.7064, 1.7064, 5.1192) [0.2500, 0.2500, 0.7500]
PeriodicSite: Pt (1.7064, 1.7064, 1.7064) [0.2500, 0.2500, 0.2500]
PeriodicSite: Pt (5.1192, 5.1192, 5.1192) [0.7500, 0.7500, 0.7500]
PeriodicSite: Pt (1.7064, 5.1192, 5.1192) [0.2500, 0.7500, 0.7500]
PeriodicSite: Pt (5.1192, 1.7064, 1.7064) [0.7500, 0.2500, 0.2500]
PeriodicSite: Pt (5.1192, 1.7064, 5.1192) [0.7500, 0.2500, 0.7500]
PeriodicSite: Pt (1.7064, 5.1192, 1.7064) [0.2500, 0.7500, 0.2500]
PeriodicSite: Pr (0.0000, 0.0000, 0.0000) [0.0000, 0.0000, 0.0000]
PeriodicSite: Pr (0.0000, 3.4128, 3.4128) [0.0000, 0.5000, 0.5000]