In [1]:
import requests
from bs4 import BeautifulSoup
import json
import re

In [2]:
# Takes about 20 seconds
response = requests.get('http://www.quantum-espresso.org/pseudo-search-results/')

In [3]:
soup = BeautifulSoup(response.text, "html5lib")

In [8]:
pseudopotentials = soup.select('table.tablewithstyle tbody tr > td:nth-of-type(1)')[1:]

functionals = {
    'BP' : 'BP',
    'Becke-Lee-Yang-Parr (BLYP) exch-corr ': 'BLYP',
    'PBESOL': 'PBESOL',
    'Perdew-Burke-Ernzerhof (PBE) exch-corr': 'PBE',
    'Perdew-Wang 91 gradient-corrected functional ': 'PW91',
    'Perdew-Wang LDA': 'LDA',
    'Perdew-Zunger (LDA) exch-corr': 'LDA exch-coor',
    'Tao-Perdew-Staroverov-Scuseria (TPSS) meta-GGA': 'meta-GGA'
}

pps = []
for i, pseudopotential in enumerate(pseudopotentials):
    title = pseudopotential.a.text
    symbol = title[:title.find('.')]
    description = pseudopotential.pre.text
    pp_type_regex = 'Pseudopotential type: (\w+)'
    functional_regex = 'Functional type: (.*)'
    pp_type = re.search(pp_type_regex, description).group(1)
    functional = functionals[re.search(functional_regex, description).group(1)]
    pps.append({
        'index': i,
        'title': title,
        'symbol': symbol,
        'pp_type': pp_type,
        'link': pseudopotential.a.attrs['href'],
        'name': pseudopotential.a.text,
        'functional': functional,
        'description': description,
        'verified': description.find('Classification unverified') == -1
    })

In [9]:
# Write pseudo potentials in json format
filename = '../scripts/cli/espresso/data/pseudopotentials.json'
json.dump(pps, open(filename, 'w'))