In [1]:
import numpy as np
import pandas as pd
import json
import re
import os
import ast
import matplotlib.pyplot as plt
from pymatgen.core.composition import Composition
from pymatgen.core.periodic_table import Species
from disorder.composition import merged_comp
from disorder.composition import composition_from_formula

In [2]:
df1=pd.read_json('data/disorder_results_no_H_new.json',orient='column')
df2=pd.read_json('data/disorder_results_H_files.json',orient='index')

In [3]:
df2=df2.loc[~df2['label'].isna()]

In [4]:
df=pd.concat([df1,df2])

In [5]:
structure=pd.read_csv('data/ICSD-info.csv')

In [6]:
disorder=df.merge(structure,how='inner',left_on='ICSD_ID',right_on='Col_code')
disorder=disorder.loc[disorder['Temperature']<310]
disorder=disorder.loc[disorder['Temperature']>270]
disorder=disorder.loc[disorder['Pressure']<0.11]
disorder.reset_index(drop=True,inplace=True)

In [7]:
len(disorder)

154025

In [8]:
disorder_sets=[]
for i in range(len(disorder)):
    disorder_sets.append(set(disorder.iloc[i]['orbit_disorder'].values()))
disorder['disorder_set']=disorder_sets
diff_comp=[]
for i in range(len(disorder)):
    comp_el=[]
    for el in Composition(disorder.iloc[i]['formula']).elements:
        comp_el.append(str(el))
    comp_el.sort()
    struct_el=[]
    for spec in disorder.iloc[i]['species'].values():
        for el in spec.keys():
            el=str(Species(el).element)
            struct_el.append(el)
    struct_el=list(set(struct_el))
    struct_el.sort()
    if(comp_el!=struct_el):
        diff_comp.append(i)
disorder.drop(index=diff_comp,inplace=True)
disorder.reset_index(inplace=True,drop=True)
num_el=[]
num_orb=[]
for i in range(len(disorder)):
    num_el.append(len(Composition(disorder.iloc[i]['formula'])))
    num_orb.append(len(disorder.iloc[i]['intersect_orbit_connected']))
disorder['num_el']=num_el
disorder['num_orb']=num_orb
periodic_table=pd.read_csv('data/periodic_table.csv')
elem_list=list(periodic_table['element'].values)

compositions=[]
exc=[]
for i in range(len(disorder)):
    form=disorder.iloc[i]['formula']
    try:
        comp=composition_from_formula(form)
        compositions.append(comp)
    except:
        print('composition error:',i)
        
normalised_compositions=[]
for i,comp in enumerate(compositions):
    try:
        index=len(elem_list)-1
        switch=0
        for el in comp.keys():
            if el not in elem_list:
                switch=1   
        if(switch==0):
            for el in comp.keys():    
                if(elem_list.index(el)<index+1):
                    index=elem_list.index(el)
            devider=float(comp[elem_list[index]])
            for el in comp.keys(): 
                comp[el]=round(float(comp[el])/devider*10000)
            normalised_compositions.append(comp) 
        else:
            print(comp)
            normalised_compositions.append({})
            
    except:
        print(comp)
        normalised_compositions.append({})
        exc.append(i)
merged_compositions=merged_comp(disorder)
disorder['ordered_formula']=merged_compositions
disorder['normalised_composition']=normalised_compositions
disorder=disorder.loc[disorder['normalised_composition']!={}]
disorder.reset_index(drop=True,inplace=True)
com_index=[]
for i in range(len(disorder)):
    orb=disorder.iloc[i]['orbit_disorder'].values()
    if('COM' in orb):
        com_index.append(i)

{'Y': 1.0, 'D': 3.7, 'Fe': 2.0}
{'Mo': 1.0, 'D': 4.0, 'O': 5.0}
{'D': 5.0, 'O': 4.0, 'P': 1.0, 'C': 1.0, 'Mn': 1.0}
{'Si': 4.0, 'D': 1.14, 'Ba': 3.0}
{'La': 1.0, 'D': 5.9, 'Ni': 5.0}
{'Cl': 2.0, 'D': 2.0, 'Ba': 1.0, 'O': 1.0}
{'D': 3.88, 'Cr': 2.0, 'Zr': 1.0}
{'D': 6.0, 'C': 2.0, 'O': 4.0, 'N': 2.0}
{'D': 1.0, 'Sc': 1.0, 'Ba': 1.0, 'O': 2.0}
{'Pr': 1.0, 'D': 12.0, 'B': 3.0}
{'Ce': 2.0, 'D': 4.43, 'Ni': 7.0}
{'D': 1.0, 'Cr': 1.0, 'O': 2.0}
{'Y': 2.0, 'D': 2.41, 'Ni': 7.0}
{'La': 2.0, 'D': 3.0, 'Se': 1.0}
{'D': 4.0, 'Rb': 1.0, 'Li': 1.0, 'N': 2.0}
{'V': 2.0, 'D': 4.5, 'Zr': 1.0}
{'D': 1.0, 'Nb': 0.2, 'Zr': 0.8}
{'D': 1.0, 'Ge': 1.0, 'Ba': 1.0, 'Ga': 1.0}
{'D': 1.5, 'Ni': 1.0, 'Zr': 0.36, 'Ti': 0.64}
{'D': 1.95, 'Mg': 1.0}
{'D': 10.0, 'Cs': 3.0, 'Re': 1.0}
{'D': 1.27, 'Fe': 1.0, 'Zr': 3.0}
{'Si': 1.0, 'D': 3.0, 'Rb': 1.0}
{'D': 3.78, 'Ni': 1.0, 'Zr': 2.0}
{'D': 4.0, 'Cs': 2.0, 'Ca': 1.0}
{'D': 4.0, 'Pd': 1.0, 'Rb': 2.0}
{'D': 4.0, 'Sr': 1.0, 'Mg': 1.0}
{'D': 1.0, 'N': 1.0, 'Ca': 2.0}
{'In

In [11]:
disorder=disorder.drop(index=com_index)
disorder.reset_index(drop=True,inplace=True)

In [21]:
list_of_compounds=list(set(disorder['ordered_formula']))
keep_max=[]
keep_min=[]

for comp in list_of_compounds:
    dx=disorder.loc[disorder['ordered_formula']==comp].copy()
    if(len(dx)>0):
        ind=dx.index.values
        imax=np.argmax(dx['conf_entropy'].values)
        imin=np.argmin(dx['conf_entropy'].values)
        keep_max.append(ind[imax])
        keep_min.append(ind[imin])

            
dbmin=disorder.iloc[keep_min]
dbmax=disorder.iloc[keep_max]

dbmax.reset_index(drop=True,inplace=True)
dbmin.reset_index(drop=True,inplace=True)

dbmax.to_json('data/dbmax.json')
dbmin.to_json('data/dbmin.json')

In [23]:
disorder=dbmax[['formula','disorder_set']]

In [36]:
general_disorder=(disorder['disorder_set'].values!={'O'}).astype(int)

In [38]:
g_disorder=pd.DataFrame()
g_disorder['formula']=disorder['formula']
g_disorder['disorder']=general_disorder

In [42]:
g_disorder.to_csv('data/general_disorder.csv')

In [40]:
len(g_disorder)

104294

In [41]:
g_disorder

Unnamed: 0,formula,disorder
0,Ge2.5 Pr6 S14,1
1,Cu1 Hf2 Si4,0
2,H4 Ca2 Cs2 O20 P6,0
3,Ho1 Ni3,0
4,Ca1 Fe1 Ni2 O12 P3,0
...,...,...
104289,Br8 Cs3 Mo2,1
104290,Mo5 O8 Pb1,0
104291,Ba0.4 Hf1 O3 Sr0.6,1
104292,H4 F1 Mn1 O5 P1,0
