In [35]:
import os
from tabulate import tabulate
from bs4 import BeautifulSoup 

In [69]:
def parse_and_extract(html_file):
    """Parses HTML table contents and extracts AMBER paramters

    Parameters
    ----------
    contents : str
        Contents of HTML table

    Returns
    -------
    list
        list of list containing parameter and description

    """
    contents = open(html_file).read()
    soup = BeautifulSoup(contents)
    table_rows = soup.find_all("tr")

    amber_params = []
    amber_params.append(["parameter", "description"])
    for row in table_rows:
        table_data = row.find_all("td")
        param = table_data[0].text.replace("\n", "")
        description_list = table_data[1].text.replace("\n", "").split()
        # formating lines
        description_chunks = [(" ".join(description_list[i:i+15])) for i in range(0, len(description_list), 15)]
        description = "\n".join(description_chunks)
        results = [param, description]
        amber_params.append(results)
    return amber_params
        
    

In [74]:
html_path = "./amber18_params.html"
results = parse_and_extract(html_path)

dit = dict(results[1:])
dit

{'barostat': 'Specifies the barostat for pressure control. = 1: Berendsen [Default] = 2: Monte Carlo',
 'cut': 'Specifies the nonbonded cutoff (in Å). Any value can be specified. = 9999.0: Effectively an\ninfinite cutoff. [Default when igb > 0] = 8.0: A “good value.” [Default when igb\n== 0]',
 'dt': 'Specifies the time step in picoseconds. = 0.001: Recommended maximum for runs without SHAKE (equivalent\nto 1 fs). [Default] = 0.002: Recommended maximum for runs with SHAKE (equivalent to 2\nfs).',
 'gamma_ln': 'Specifies the collision frequency, γ, in ps-1 when using ntt = 3. Also specifies constants\nwhen ntt = 9 and ntt = 10 = 0: [Default]',
 'ibelly': 'Specifies belly type dynamics. = 1: Some atoms in the system are allowed to move,\nand the rest will be frozen. The moving atoms are specified through bellymask. Note: ibelly\n≠ 0 is not supported by GPUs.',
 'ig': 'The random seed. MD starting velocities are dependent on the random number. Setting a specific\nrandom seed can be done t

In [71]:
print(tabulate(results, headers="firstrow", tablefmt="fancy_grid"))

╒═══════════════╤════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╕
│ parameter     │ description                                                                                                                        │
╞═══════════════╪════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════════╡
│ barostat      │ Specifies the barostat for pressure control. = 1: Berendsen [Default] = 2: Monte Carlo                                             │
├───────────────┼────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┤
│ cut           │ Specifies the nonbonded cutoff (in Å). Any value can be specified. = 9999.0: Effectively an                                        │
│               │ infinite cutoff. [Default when igb > 0] = 8.0: A “good value.” [Default when

In [76]:
ls = ["the"]
for i in ls:
    print(i)

the


In [77]:
from collections import defaultdict

In [81]:
ls = []
for x, y in results:
    ls.append(x)

In [82]:
ls

['parameter',
 'barostat',
 'cut',
 'dt',
 'gamma_ln',
 'ibelly',
 'ig',
 'imin',
 'ioutfm',
 'irest',
 'iwrap',
 'maxcyc',
 'ncyc',
 'nmropt',
 'nsnb',
 'nstlim',
 'ntb',
 'ntc',
 'ntf',
 'ntp',
 'ntpr',
 'ntr',
 'ntt',
 'ntwv',
 'ntwx',
 'ntx',
 'pres0',
 'restraint_wt',
 'restraintmask',
 't',
 'taup',
 'tautp',
 'temp0',
 'tempi',
 'vlimit']