# Selection of gamma-ray compounds from aflowlib

In [1]:
import re
import os
import csv
import glob
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

pd.set_option('display.width', 1000)
pd.set_option('display.max_columns', 100)
pd.set_option('display.notebook_repr_html', True)

## Load aflowlib icsd data set into a pandas dataframe

In [2]:
BRAVAIS = ['CUB','FCC','BCC',\
           'TET','BCT','ORC',\
           'ORCF','ORCI','ORCC',\
           'HEX','RHL','MCL',\
           'MCLC','TRI']

df = pd.read_csv("structure_info_bravais_"+BRAVAIS[0]+".csv")
for brav in range(1,len(BRAVAIS)):
    df_buf = pd.read_csv("structure_info_bravais_"+BRAVAIS[brav]+".csv")
    df = pd.concat([df,df_buf],ignore_index=True)
    print "Done "+BRAVAIS[brav]
    

Done FCC
Done BCC
Done TET
Done BCT
Done ORC
Done ORCF
Done ORCI
Done ORCC
Done HEX
Done RHL
Done MCL
Done MCLC
Done TRI


### Some data exploration

In [3]:
#features = ['prototype','files','density','Egap','species_pp','geometry','positions_cartesian']

print df.columns.values

df.head(5)

['prototype' 'compound' 'natoms' 'nspecies' 'spacegroup_relax' 'geometry'
 'positions_cartesian' 'positions_fractional' 'ldau_TLUJ' 'species_pp'
 'species_pp_version' 'density' 'valence_cell_std' 'dft_type' 'Egap'
 'Egap_fit' 'Egap_type' 'spin_cell' 'scintillation_attenuation_length'
 'aurl']


Unnamed: 0,prototype,compound,natoms,nspecies,spacegroup_relax,geometry,positions_cartesian,positions_fractional,ldau_TLUJ,species_pp,species_pp_version,density,valence_cell_std,dft_type,Egap,Egap_fit,Egap_type,spin_cell,scintillation_attenuation_length,aurl
0,Si1V3_ICSD_87329,Si2V6,8,2,223,"4.747577,4.747577,4.747577,90,90,90","0,0,0;2.37379,2.37379,2.37379;3.56068,0,2.3737...",,"2;0,2;0,2.7;0,0","Si,V_sv","Si:PAW_PBE:05Jan2001,V_sv:PAW_PBE:07Sep2000",5.61469,38,PAW_PBE,0.0,0.0,metal,1.92304,2.23188,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Si1V3...
1,F1Rb1_ICSD_61562,F1Rb1,2,2,221,"3.449722,3.449722,3.449722,90,90,90","1.72486,1.72486,1.72486;0,0,0",,,"F,Rb_sv","F:PAW_PBE:08Apr2002,Rb_sv:PAW_PBE:06Sep2000",4.22545,8,PAW_PBE,6.0021,9.00383,insulator_direct,0.0,2.95372,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/F1Rb1...
2,Cl3Mn1Tl1_ICSD_23167,Cl3Mn1Tl1,5,3,221,"5.095861,5.095861,5.095861,90,90,90","0,2.54793,2.54793;2.54793,0,2.54793;2.54793,2....",,"2;0,2,0;0,4,0;0,0,0","Cl,Mn_pv,Tl_d","Cl:PAW_PBE:17Jan2003,Mn_pv:PAW_PBE:07Sep2000,T...",4.58862,31,PAW_PBE,2.652,4.4879,insulator_indirect,5.00065,1.88379,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Cl3Mn...
3,Hf1Ir3_ICSD_638580,Hf1Ir3,4,2,221,"3.936467,3.936467,3.936467,90,90,90","0,0,0;0,1.96823,1.96823;1.96823,0,1.96823;1.96...",,"2;0,2;0,2.8;0,0","Hf_pv,Ir","Hf_pv:PAW_PBE:06Sep2000,Ir:PAW_PBE:06Sep2000",20.5571,31,PAW_PBE,0.0,0.0,metal,0.0,0.377047,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Hf1Ir...
4,Sn1V3_ICSD_652830,Sn2V6,8,2,223,"5.025984,5.025984,5.025984,90,90,90","0,0,0;2.51299,2.51299,2.51299;3.76949,0,2.5129...",,"2;2,2;3.5,2.7;0,0","Sn,V_sv","Sn:PAW_PBE:08Apr2002,V_sv:PAW_PBE:07Sep2000",7.10246,38,PAW_PBE,0.0,0.0,metal,0.0,1.70717,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Sn1V3...


In [4]:
print 'Total number of entries in the data set:',  df.shape[0]

Total number of entries in the data set: 52675


In [5]:
csv_file='elementlist.csv'
f=open(csv_file,'r')
reader = csv.reader(f)
Z={}
for row in reader:
    element = row[1]
    Zelement = int(row[0])
    Z[element] = Zelement

In [6]:
# Add columns to the dataframe corresponding to two features derived from features contained in the original dataset, 
# the chemical species and the atomic numbers 


df['species'] = df.prototype.str.split('_').str.get(0).str.findall("[a-zA-Z]+")
df['atomic_numbers']= df['species'].apply(lambda x: [Z[s] for s in x])

print df.columns.values

df.head(5)

['prototype' 'compound' 'natoms' 'nspecies' 'spacegroup_relax' 'geometry'
 'positions_cartesian' 'positions_fractional' 'ldau_TLUJ' 'species_pp'
 'species_pp_version' 'density' 'valence_cell_std' 'dft_type' 'Egap'
 'Egap_fit' 'Egap_type' 'spin_cell' 'scintillation_attenuation_length'
 'aurl' 'species' 'atomic_numbers']


Unnamed: 0,prototype,compound,natoms,nspecies,spacegroup_relax,geometry,positions_cartesian,positions_fractional,ldau_TLUJ,species_pp,species_pp_version,density,valence_cell_std,dft_type,Egap,Egap_fit,Egap_type,spin_cell,scintillation_attenuation_length,aurl,species,atomic_numbers
0,Si1V3_ICSD_87329,Si2V6,8,2,223,"4.747577,4.747577,4.747577,90,90,90","0,0,0;2.37379,2.37379,2.37379;3.56068,0,2.3737...",,"2;0,2;0,2.7;0,0","Si,V_sv","Si:PAW_PBE:05Jan2001,V_sv:PAW_PBE:07Sep2000",5.61469,38,PAW_PBE,0.0,0.0,metal,1.92304,2.23188,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Si1V3...,"[Si, V]","[14, 23]"
1,F1Rb1_ICSD_61562,F1Rb1,2,2,221,"3.449722,3.449722,3.449722,90,90,90","1.72486,1.72486,1.72486;0,0,0",,,"F,Rb_sv","F:PAW_PBE:08Apr2002,Rb_sv:PAW_PBE:06Sep2000",4.22545,8,PAW_PBE,6.0021,9.00383,insulator_direct,0.0,2.95372,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/F1Rb1...,"[F, Rb]","[9, 37]"
2,Cl3Mn1Tl1_ICSD_23167,Cl3Mn1Tl1,5,3,221,"5.095861,5.095861,5.095861,90,90,90","0,2.54793,2.54793;2.54793,0,2.54793;2.54793,2....",,"2;0,2,0;0,4,0;0,0,0","Cl,Mn_pv,Tl_d","Cl:PAW_PBE:17Jan2003,Mn_pv:PAW_PBE:07Sep2000,T...",4.58862,31,PAW_PBE,2.652,4.4879,insulator_indirect,5.00065,1.88379,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Cl3Mn...,"[Cl, Mn, Tl]","[17, 25, 81]"
3,Hf1Ir3_ICSD_638580,Hf1Ir3,4,2,221,"3.936467,3.936467,3.936467,90,90,90","0,0,0;0,1.96823,1.96823;1.96823,0,1.96823;1.96...",,"2;0,2;0,2.8;0,0","Hf_pv,Ir","Hf_pv:PAW_PBE:06Sep2000,Ir:PAW_PBE:06Sep2000",20.5571,31,PAW_PBE,0.0,0.0,metal,0.0,0.377047,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Hf1Ir...,"[Hf, Ir]","[72, 77]"
4,Sn1V3_ICSD_652830,Sn2V6,8,2,223,"5.025984,5.025984,5.025984,90,90,90","0,0,0;2.51299,2.51299,2.51299;3.76949,0,2.5129...",,"2;2,2;3.5,2.7;0,0","Sn,V_sv","Sn:PAW_PBE:08Apr2002,V_sv:PAW_PBE:07Sep2000",7.10246,38,PAW_PBE,0.0,0.0,metal,0.0,1.70717,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Sn1V3...,"[Sn, V]","[50, 23]"


## Apply filters to select candidate materials

### Exclude entries containing selected species

In [10]:
excluded_species = range(58,72)+[8,7,6,9,72,73,74,75,76,77,78]

In [11]:
#df.atomic_numbers.select(lambda x: [s not in range(58,72) and s != 8 for s in x])
#df['atomic_numbers'].apply(lambda x: [s not in range(58,72) and s != 8 for s in x])
#
# ##################################################
# Filters applied on the atomic numbers:
# 1) exclude materials that contain oxigen
# 2) exclude materials that contain f elements
# 3) exclude materials that contain Pt, Ir, Os
#
#index = df['atomic_numbers'].apply(lambda x: all([test==True for test in [s not in range(58,72) and s!=8 and s not in [76,77,78] for s in x]]))
index = df['atomic_numbers'].apply(lambda x: all([test==True for test in [s not in excluded_species for s in x]]))

df1 = df[index]

### Filter materials with the band gap in a given window

In [15]:
# ##################################################
# Filters applied on the dft band gap:
# 1) consider entries with band gaps within 
#    the following interval

gap_min = 0.8
gap_max = 3.5

#index2 = (df1.Egap > gap_min)
index2 = df1.Egap.apply(lambda x: (x > gap_min) & (x < gap_max))

df2 = df1[index2]
df2

Unnamed: 0,prototype,compound,natoms,nspecies,spacegroup_relax,geometry,positions_cartesian,positions_fractional,ldau_TLUJ,species_pp,species_pp_version,density,valence_cell_std,dft_type,Egap,Egap_fit,Egap_type,spin_cell,scintillation_attenuation_length,aurl,species,atomic_numbers
2,Cl3Mn1Tl1_ICSD_23167,Cl3Mn1Tl1,5,3,221,"5.095861,5.095861,5.095861,90,90,90","0,2.54793,2.54793;2.54793,0,2.54793;2.54793,2....",,"2;0,2,0;0,4,0;0,0,0","Cl,Mn_pv,Tl_d","Cl:PAW_PBE:17Jan2003,Mn_pv:PAW_PBE:07Sep2000,T...",4.58862,31,PAW_PBE,2.6520,4.48790,insulator_indirect,5.00065,1.88379,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Cl3Mn...,"[Cl, Mn, Tl]","[17, 25, 81]"
5,Br6Hg7P4Sn1_ICSD_411860,Br24Hg28P16Sn4,72,4,198,"12.80362,12.80362,12.80362,90,90,90","10.42504,6.18291,3.37381;8.78039,6.62071,9.775...",,"2;0,0,0,2;0,0,0,3.5;0,0,0,0","Br,Hg,P,Sn","Br:PAW_PBE:06Sep2000,Hg:PAW_PBE:06Sep2000,P:PA...",6.72825,600,PAW_PBE,1.6801,3.17777,insulator_direct,0.00000,1.24850,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Br6Hg...,"[Br, Hg, P, Sn]","[35, 80, 15, 50]"
21,Fe1S2_ICSD_633254,Fe4S8,12,2,205,"5.522029,5.522029,5.522029,90,90,90","0,0,0;2.76101,0,2.76101;0,2.76101,2.76101;2.76...",,"2;2,0;4.6,0;0,0","Fe_pv,S","Fe_pv:PAW_PBE:06Sep2000,S:PAW_PBE:17Jan2003",4.73233,80,PAW_PBE,2.0751,3.71023,insulator_indirect,0.00000,2.51425,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Fe1S2...,"[Fe, S]","[26, 16]"
26,Br4Ge1_ICSD_409856,Br32Ge8,40,2,205,"12.10163,12.10163,12.10163,90,90,90","9.16969,9.16969,2.93194;8.98276,2.93194,8.9827...",,,"Br,Ge_h","Br:PAW_PBE:06Sep2000,Ge_h:PAW_PBE:09Apr2002",2.93983,256,PAW_PBE,2.9471,4.88569,insulator_direct,0.00000,4.27508,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Br4Ge...,"[Br, Ge]","[35, 32]"
32,Bi1Cs3Se3_ICSD_85410,Bi4Cs12Se12,28,3,198,"10.5833,10.5833,10.5833,90,90,90","10.26464,10.26464,0.31866;5.61031,0.31866,5.61...",,,"Bi,Cs,Se","Bi:GGA:01Apr2000,Cs:GGA:01Apr2000,Se:GGA:01Apr...",4.73244,104,GGA,2.0976,3.74056,insulator_direct,0.00000,2.07779,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Bi1Cs...,"[Bi, Cs, Se]","[83, 55, 34]"
49,Br3Cs1Sn1_ICSD_4071,Br3Cs1Sn1,5,3,221,"5.922506,5.922506,5.922506,90,90,90","0,2.96125,2.96125;2.96125,0,2.96125;2.96125,2....",,"2;0,0,2;0,0,3.5;0,0,0","Br,Cs_sv,Sn","Br:PAW_PBE:06Sep2000,Cs_sv:PAW_PBE:08Apr2002,S...",3.92723,26,PAW_PBE,0.9881,2.24496,insulator_direct,0.00000,3.00321,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Br3Cs...,"[Br, Cs, Sn]","[35, 55, 50]"
69,As1Br3Ca3_ICSD_426,As1Br3Ca3,7,3,221,"5.962227,5.962227,5.962227,90,90,90","0,0,0;0,2.98111,2.98111;2.98111,0,2.98111;2.98...",,,"As,Br,Ca_sv","As:PAW_PBE:06Sep2000,Br:PAW_PBE:06Sep2000,Ca_s...",3.40712,32,PAW_PBE,1.7512,3.27362,insulator_direct,0.00000,3.60102,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/As1Br...,"[As, Br, Ca]","[33, 35, 20]"
78,Fe1Si1_ICSD_633524,Fe4Si4,8,2,198,"4.422519,4.422519,4.422519,90,90,90","3.82169,3.82169,0.60083;2.81209,0.60083,2.8120...",,"2;2,0;4.6,0;0,0","Fe_pv,Si","Fe_pv:PAW_PBE:06Sep2000,Si:PAW_PBE:05Jan2001",6.44511,48,PAW_PBE,0.8835,2.10396,insulator_indirect,0.00000,1.86782,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Fe1Si...,"[Fe, Si]","[26, 14]"
80,H3K1Mg1_ICSD_159175,H3K1Mg1,5,3,221,"4.006441,4.006441,4.006441,90,90,90","0,2.00322,2.00322;2.00322,0,2.00322;2.00322,2....",,,"H,K_sv,Mg_pv","H:PAW_PBE:15Jun2001,K_sv:PAW_PBE:06Sep2000,Mg_...",1.71521,6,PAW_PBE,2.6174,4.44126,insulator_indirect,0.00000,6.58184,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/H3K1M...,"[H, K, Mg]","[1, 19, 12]"
87,Ru1S2_ICSD_657507,Ru4S8,12,2,205,"5.677962,5.677962,5.677962,90,90,90","0,0,0;2.83898,0,2.83898;0,2.83898,2.83898;2.83...",,"2;2,0;3,0;0,0","Ru_pv,S","Ru_pv:PAW_PBE:06Sep2000,S:PAW_PBE:17Jan2003",5.99399,80,PAW_PBE,1.3567,2.74183,insulator_indirect,0.00000,1.95622,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Ru1S2...,"[Ru, S]","[44, 16]"


### Filter materials with the maximum value of Z in a given window

In [16]:
# ##################################################
# Filters applied on the dft atomic numbers:
# 1) consider atomic number in a certain interval

Zmax_lw = 32 # Ge
Zmax_hi = 84 # Po

index3 = df2.atomic_numbers.apply(lambda x: (max(x) > Zmax_lw) & (max(x) < Zmax_hi))

df3 = df2[index3]

### Select materials with density larger than a given lower bound

In [17]:
# ##################################################
# Filters applied on the dft atomic numbers:
# 1) minimum density

density_min = 5.0

index4 = df3.density > density_min

df4 = df3[index4]

print 'Number of entries after appliation of filter: ',df4.shape[0]

pd.set_option('display.max_colwidth', 2000)

df4[['prototype','density','Egap','aurl']].head(200)

#df4[['prototype','density','Egap','ldau_TLUJ','spin_cell']]
#df4[['prototype','spacegroup_relax','positions_fractional']]
#df4[df4['spin_cell'] != 0]
#df3[['species','species_pp']]

#Zmax_delta = 70

Number of entries after appliation of filter:  1113


Unnamed: 0,prototype,density,Egap,aurl
5,Br6Hg7P4Sn1_ICSD_411860,6.72825,1.6801,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Br6Hg7P4Sn1_ICSD_411860
87,Ru1S2_ICSD_657507,5.99399,1.3567,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Ru1S2_ICSD_657507
124,Ru1S2_ICSD_52374,5.99478,1.3597,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Ru1S2_ICSD_52374
230,Ba1Te1_ICSD_616164,5.67631,0.9123,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Ba1Te1_ICSD_616164
374,Cl1Tl1_ICSD_53852,6.72718,2.3098,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Cl1Tl1_ICSD_53852
442,Cd4I3Sb2_ICSD_80589,5.42233,1.2175,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Cd4I3Sb2_ICSD_80589
472,Ag7As1S6_ICSD_604743,5.57954,0.9185,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Ag7As1S6_ICSD_604743
491,Br1Tl1_ICSD_61532,7.18499,2.0831,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Br1Tl1_ICSD_61532
552,Ru1S2_ICSD_41996,5.99561,1.3581,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Ru1S2_ICSD_41996
664,Br7Hg6Sb5_ICSD_411219,6.64131,0.9831,aflowlib.duke.edu:AFLOWDATA/ICSD_WEB/CUB/Br7Hg6Sb5_ICSD_411219


In [22]:
nentries = df4.shape[0]

print "No. entries found applying the criteria set above: "+str(nentries)

No. entries found applying the criteria set above: 1113


In [25]:
file_data_set = 'screening_result.csv'
base_labels=['aurl','prototype']
extra_labels=['Egap','density']
labels=base_labels+extra_labels
df4[labels].to_csv(file_data_set,index=False)