In [1]:
from selenium import webdriver
from selenium.webdriver.firefox.firefox_binary import FirefoxBinary
import re
import numpy as np
import scipy as sp
import scipy.io
import os
import pickle
import pandas as pd
from itertools import permutations
import urllib.request
import pymatgen as mg

# Download the international tables

In [101]:
# Had to download geckodriver from https://github.com/mozilla/geckodriver/releases
# Moved it to a known location, and put that location in PATH
# Still had problems, so pointed selenium to the correct firefox executable
# Works!
binary = FirefoxBinary(r'C:\Program Files\Mozilla Firefox\firefox.exe')
driver = webdriver.Firefox(firefox_binary=binary)

In [203]:
ngroups = 230
international_tables = [{} for _ in range(ngroups+1)] # We will index this by the actual number in the list

for igroup in range(1, ngroups+1):
    
    # Pull up the page corresponding to the group number
    driver.get('http://www.cryst.ehu.es/cgi-bin/cryst/programs/nph-wp-list') # Waits for page to load
    
    text_box = driver.find_element_by_name('gnum') # Found empirically
    text_box.clear() # Clear in case there's any text still there
    text_box.send_keys('%i' %igroup)
    standard_button = driver.find_element_by_name('standard')
    standard_button.click()
    
    # Extract information from the page
    heading1 = driver.find_element_by_tag_name('h2') # Found empirically
    head = heading1.text
    hm_name = re.search('Group (.+?) \(', head).group(1)
    table = driver.find_elements_by_tag_name('table')[1] # Second table on the page, found empirically
    table_text = table.text
    
    # Parse the information from the table
    table_lines = table_text.split('\n')
    r = re.compile("^[0-9]+") # Find the elements that begin with a number (which is Wycoff multiplicity)
    newlist = list(filter(r.match, table_lines)) # Read Note
    split_list = np.array([element.split(" ", 2) for element in newlist]) # Split into 3 groups of str
    
    row_names = ["multiplicity", "letter", "symmetry"]
    international_tables[igroup]['name'] = hm_name
    international_tables[igroup]['wyckoff table'] = {name: split_list[:,i] for i, name in enumerate(row_names)}
    
path = os.getcwd()
file_name = "international_tables.pickle"
with open(os.path.join(path, file_name), 'wb') as f:
        pickle.dump(international_tables, f, pickle.HIGHEST_PROTOCOL)

In [204]:
international_tables[136]['wyckoff table']

{'letter': array(['k', 'j', 'i', 'h', 'g', 'f', 'e', 'd', 'c', 'b', 'a'], dtype='<U5'),
 'multiplicity': array(['16', '8', '8', '8', '4', '4', '4', '4', '4', '2', '2'],
       dtype='<U5'),
 'symmetry': array(['1', '..m', 'm..', '2..', 'm.2 m', 'm.2 m', '2.m m', '-4..',
        '2/m..', 'm.m m', 'm.m m'], dtype='<U5')}

In [238]:
international_dict = {str(i): entry for i, entry in enumerate(international_tables) if i>0}

{'name': 'P1',
 'wyckoff table': {'letter': array(['a'], dtype='<U1'),
  'multiplicity': array(['1'], dtype='<U1'),
  'symmetry': array(['1'], dtype='<U1')}}

# Search the international tables for good symmetries

In [2]:
best_sym = ['C3h', 'D2', 'D2d', 'D3', 'D3h', 'D4', 'D6', 'S4', 'T']
good_sym = ['Ci', 'C2h', 'C4h', 'C6h', 'D2h', 'D3d', 'D4h', 'D6h', 'S6', 'Th']

In [3]:
path = r'.'
file_name = "international_tables.pickle"
with open(os.path.join(path, file_name), 'rb') as fp:
    table = pickle.load(fp)

In [4]:
path = r'.'
file_name = "hm_schoenflies.csv"
conversion_df = pd.read_csv(os.path.join(path, file_name), header=None)

In [5]:
sh_to_hm = dict(zip(conversion_df[0], conversion_df[1]))

In [6]:
good_sym_hm = [sh_to_hm[sym] for sym in good_sym]
best_sym_hm = [sh_to_hm[sym] for sym in best_sym]
best_sym_hm

['-6', '222', '-42m', '32', '-62m', '422', '622', '-4', '23']

In [7]:
# Define some helper functions to query this inernational tables for a list of point groups

def str_remove(string, remove_list):
    """ Removes all instances of all characters in remove_list from string """
    
    return string.translate({ord(x): '' for x in remove_list})

def sym_pos(sym_list, group_num):
    """ returns a list of the wyckoff positions in group_num that have a symmetry element in sym_list"""
    
    sym_list_perm = ["".join(arr) for sym in sym_list for arr in list(permutations(sym))] # All permutations
    # Empirically, seemed that adding permutations didn't change the list, but let's keep it anyway
    curr_table = table[group_num]['wyckoff table']
    regex = r"(?=("+'|'.join(sym_list_perm)+r"))"
    clean_sym = [str_remove(el, [' ', '.']) for el in curr_table['symmetry']]
    sym_el = [re.findall(regex, sym) for sym in clean_sym]
    pts = [(curr_table['multiplicity'][i] + curr_table['letter'][i], curr_table['symmetry'][i]) 
           for i, val in enumerate(sym_el) if val]
    
    return pts

In [8]:
best_groups = []
ngroups = 230

for space_group in range(1, ngroups+1):
    pts = sym_pos(best_sym_hm, space_group)
    if pts:
        best_groups.append(space_group)
        
good_groups = []
ngroups = 230

for space_group in range(1, ngroups+1):
    pts = sym_pos(np.concatenate([good_sym_hm, best_sym_hm]), space_group)
    if pts:
        good_groups.append(space_group)

In [9]:
best_groups

[16,
 21,
 22,
 23,
 48,
 49,
 50,
 66,
 67,
 68,
 69,
 70,
 72,
 81,
 82,
 84,
 85,
 86,
 87,
 88,
 89,
 90,
 93,
 94,
 97,
 98,
 111,
 112,
 113,
 114,
 115,
 116,
 117,
 118,
 119,
 120,
 121,
 122,
 124,
 125,
 126,
 128,
 129,
 130,
 131,
 132,
 133,
 134,
 135,
 136,
 137,
 138,
 139,
 140,
 141,
 142,
 149,
 150,
 155,
 162,
 163,
 165,
 167,
 174,
 175,
 176,
 177,
 180,
 181,
 182,
 187,
 188,
 189,
 190,
 191,
 192,
 193,
 194,
 195,
 196,
 197,
 201,
 202,
 203,
 207,
 208,
 209,
 210,
 211,
 212,
 213,
 214,
 215,
 216,
 217,
 218,
 219,
 220,
 222,
 223,
 224,
 225,
 226,
 227,
 228,
 229,
 230]

In [10]:
bad_groups = [i for i in range(1, ngroups+1) if i not in best_groups]
len(bad_groups)

123

In [11]:
sym_pos(['2mm'], 137)

[('4d', '2mm .'), ('4c', '2mm .')]

In [12]:
len(good_groups)

145

In [13]:
len(best_groups)

107

# Make a list of the bad elements

In [14]:
# Download the file to scrape from, and save it for later in case I/it goes offline
url = "http://easyspin.org/easyspin/documentation/isotopetable.html"
f = urllib.request.urlopen(url)
content = f.read()
content_str = content.decode('utf8', errors='ignore')
file_name = 'isotopetable.html'
# with open('isotopetable.html', 'w') as f:
#     f.write(content_str)

In [15]:
file_name = 'isotopetable.html'
with open('isotopetable.html', 'r') as f:
    content = f.readlines()

In [16]:
# Extract data
r = re.compile("^ *[0-9]+") # Find lines of the table, which being with any number of spaces then a number
table_lines = list(filter(r.match, content))
table_lines_split = [line.split() for line in table_lines]

In [17]:
# Extract header
r = re.compile(".*Column.*") # Find lines of the table, which being with any number of spaces then a number
header_lines = list(filter(r.match, content))
header = [line.split(": ")[1].rstrip() for line in header_lines]

In [18]:
isotope_table =  pd.DataFrame(table_lines_split)
isotope_table.columns = header
isotope_table

Unnamed: 0,#protons,#nucleons,"radioactive *, stable -",symbol,name,spin quantum number,nuclear g factor gn,"natural abundance, in percent","electric quadrupole moment, in barn (10^-28 m^2)"
0,1,1,-,H,hydrogen,0.5,+5.58569468,99.9885,0
1,1,2,-,H,hydrogen,1.0,+0.8574382,0.0115,+0.00286
2,1,3,*,H,hydrogen,0.5,+5.95799369,0.0,0
3,2,3,-,He,helium,0.5,-4.25499544,0.000137,0
4,2,4,-,He,helium,0.0,0.0,99.999863,0
5,3,6,-,Li,lithium,1.0,+0.8220473,7.59,-0.000806
6,3,7,-,Li,lithium,1.5,+2.170951,92.41,-0.0400
7,4,9,-,Be,beryllium,1.5,-0.78495,100.0,+0.0529
8,5,10,-,B,boron,3.0,+0.600215,19.9,+0.0845
9,5,11,-,B,boron,1.5,+1.7924326,80.1,+0.04059


In [19]:
# We will use the fact that the list is already sorted by the symbol and just cut it up from there
# In future, might be able to do more robustly if we can somehow use this method on all the columns at once:
# isotope_table.groupby("symbol")['name'].apply(list)

In [20]:
# TODO: This is an ugly way of doing this, should figure out the pandas way of doing it

split_idx = [i for i in range(1,len(isotope_table["symbol"])) 
             if isotope_table["symbol"][i] != isotope_table["symbol"][i-1]]
split_table = np.split(isotope_table, split_idx)

In [21]:
# Make all the numbers floats
element_table = {}
for table in split_table:
    symbol = table['symbol'].iloc[0]
    element_table[symbol] = {}
    for j, col_label in enumerate(header):
        try:
            element_table[symbol][col_label] = table.values.T[j].astype('float')
        except ValueError:
            element_table[symbol][col_label] = table.values.T[j]

In [22]:
# Take advantage of the fact that spin is always positive

for key, table in element_table.items():
    table['percent spin'] = np.sum(
        np.heaviside(np.abs(table['spin quantum number']),0)*table['natural abundance, in percent'])
    
    table['mean g'] = np.sum(
        np.abs(table['nuclear g factor gn'])*table['natural abundance, in percent']/100)


In [23]:
bad_el = [key for key, table in element_table.items() if table['percent spin'] > 20]

# Search Materials Project for materials with good space groups/elements

In [24]:
api_key =  'fgkhjrsHTKukiIA6' # Found this from https://materialsproject.org/open in 'API keys' section
with mg.MPRester(api_key) as m:
    mat_data =  m.query(
        criteria={
            "spacegroup.number": {"$in": best_groups},
            "elements": {"$nin": bad_el},
            "band_gap": {"$gt": 1},
            "nelements": {"$lte": 2},
            "e_above_hull": {"$lte": 0.2} # Apparently lower is more stable. Diamond is 0.135
        },
        properties=["pretty_formula", "spacegroup.number", "e_above_hull", "exp", "doi", "spacegroup.symbol"]
    )

print(len(mat_data))
# [print(entry) for entry in mat_data];
# pd.DataFrame(mat_data)

234


In [25]:
with pd.option_context('display.max_rows', None, 'display.max_columns', None):  # more options can be specified also
    display(pd.DataFrame(mat_data).sort_values("pretty_formula"))

Unnamed: 0,doi,e_above_hull,exp,pretty_formula,spacegroup.number,spacegroup.symbol
0,10.17188/1315564,0.036174,{'tags': []},Ac2S3,167,R-3c
1,10.17188/1206386,0.0,{'tags': []},Ac2S3,122,I-42d
2,10.17188/1274272,0.000431,"{'tags': ['Argon - HP', 'High pressure experim...",Ar,194,P6_3/mmc
3,10.17188/1199275,0.0,{'tags': ['Argon']},Ar,225,Fm-3m
11,10.17188/1192460,0.159781,"{'tags': ['Barium carbide (1/2)', 'High pressu...",BaC2,139,I4/mmm
8,10.17188/1288490,0.13165,{'tags': ['High pressure experimental phase']},BaO,129,P4/nmm
9,10.17188/1325047,0.025751,{'tags': ['High pressure experimental phase']},BaO,194,P6_3/mmc
10,10.17188/1189576,0.0,"{'tags': ['Calcium oxide', 'Barium oxide (1/1)...",BaO,225,Fm-3m
4,10.17188/1187544,0.0,"{'tags': ['Barium peroxide', 'High pressure ex...",BaO2,139,I4/mmm
6,10.17188/1190987,0.0,"{'tags': ['Barium sulfide', 'Barium sulfide (1...",BaS,225,Fm-3m
