# Setup

## Setup that is specific only to Jupyter notebooks

In [1]:
from pathlib import Path
import sys

notebook_directory_parent = Path.cwd().resolve().parent
if str(notebook_directory_parent) not in sys.path:
    sys.path.append(str(notebook_directory_parent))

## Setup to use Python libraries/modules

In [2]:
from T1000.utilities.configure_paths import (DataPaths, NISTChemistryWebbookPaths)
from T1000.NistChemBook import species_list

from bs4 import BeautifulSoup

import csv
import requests

# Species List

Download the Species list.

https://webbook.nist.gov/chemistry/download/

In [3]:
csv_as_list = species_list.ReadAndClean.to_list()

In [6]:
csv_as_list[:5]

[['Electron', 'e-', 'N/A'],
 ['Iron(2) oxide anion', 'FeO-', 'N/A'],
 ['AsF3..Cl anion', 'AsClF3-', 'N/A'],
 ['AgH2-', 'H2Ag-', 'N/A'],
 ['HAg(H2)', 'H3Ag', 'N/A'],
 ['AgNO+', 'AgNO+', 'N/A'],
 ['AgNO', 'AgNO', 'N/A'],
 ['AgNO-', 'AgNO-', 'N/A'],
 ['AgOO-', 'AgO2-', 'N/A'],
 ['AgAgH', 'HAg2', 'N/A'],
 ['AgAgH-', 'HAg2-', 'N/A'],
 ['AgOAg', 'Ag2O', 'N/A'],
 ['Ag4', 'Ag4', 'N/A'],
 ['Ag5', 'Ag5', 'N/A'],
 ['Ag8', 'Ag8', 'N/A'],
 ['HAlClBr', 'HAlBrCl', 'N/A'],
 ['AlCl2Br', 'AlBrCl2', 'N/A'],
 ['HAlFBr', 'HAlBrF', 'N/A'],
 ['AlF2Br', 'AlBrF2', 'N/A'],
 ['Br(cyc-AlO2)', 'AlBrO2', 'N/A'],
 ['BrAlO2', 'AlBrO2', 'N/A'],
 ['BrAl(O2)2', 'AlBrO4', 'N/A'],
 ['AlClBr2', 'AlBr2Cl', 'N/A'],
 ['AlFBr2', 'AlBr2F', 'N/A'],
 ['HAlBr2', 'HAlBr2', 'N/A'],
 ['HAlFCl', 'HAlClF', 'N/A'],
 ['AlF2Cl', 'AlClF2', 'N/A'],
 ['AlH2Cl', 'H2AlCl', 'N/A'],
 ['Cl(cyc-AlO2)', 'AlClO2', 'N/A'],
 ['ClAlO2', 'AlClO2', 'N/A'],
 ['ClAl(O2)2', 'AlClO4', 'N/A'],
 ['AlFCl2', 'AlCl2F', 'N/A'],
 ['F(cyc-AlO2)', 'AlFO2', 'N/A'],
 [

### Scratch work

In [3]:
DataPaths().raw()

PosixPath('/home/topolo/PropD/Propulsion/T1000/data/raw')

In [4]:
list(DataPaths().raw().glob('**/*'))

[PosixPath('/home/topolo/PropD/Propulsion/T1000/data/raw/species.zip'),
 PosixPath('/home/topolo/PropD/Propulsion/T1000/data/raw/species.txt')]

In [9]:
species_txt_path = \
    next(path for path in list(DataPaths().raw().glob('**/*')) if "species" in str(path) and ".txt" in str(path))

In [16]:
f = open(species_txt_path, 'r', newline='')
csv_reader = csv.reader(f, delimiter="\t")

In [17]:
csv_as_list = list(csv_reader)
f.close()

In [22]:
for row in csv_as_list:
    if len(row) != 3:
        print(len(row))

## Scrape Species List (Download) page

cf. https://www.digitalocean.com/community/tutorials/how-to-scrape-web-pages-with-beautiful-soup-and-python-3

In [3]:
species_list_columns = species_list.ScrapeWebpage.species_list_columns()

In [4]:
species_list_columns

['Species name', 'Species formula', 'CAS registry number (if known)']

In [5]:
ready_data = species_list.ReadAndClean.to_database_ready_data(species_list_columns)

In [7]:
ready_data[:5]

[OrderedDict([('Species name', 'Electron'),
              ('Species formula', 'e-'),
              ('CAS registry number (if known)', None)]),
 OrderedDict([('Species name', 'Iron(2) oxide anion'),
              ('Species formula', 'FeO-'),
              ('CAS registry number (if known)', None)]),
 OrderedDict([('Species name', 'AsF3..Cl anion'),
              ('Species formula', 'AsClF3-'),
              ('CAS registry number (if known)', None)]),
 OrderedDict([('Species name', 'AgH2-'),
              ('Species formula', 'H2Ag-'),
              ('CAS registry number (if known)', None)]),
 OrderedDict([('Species name', 'HAg(H2)'),
              ('Species formula', 'H3Ag'),
              ('CAS registry number (if known)', None)])]

### Scratchwork

In [3]:
NISTChemistryWebbookPaths.species_list()

'https://webbook.nist.gov/chemistry/download/'

In [4]:
page = requests.get(NISTChemistryWebbookPaths.species_list())

In [5]:
# Use Python's built-in html.parser
soup = BeautifulSoup(page.text, 'html.parser')

In [13]:
soup.find(id="main").find('ul').find_all('li')[0].text

'Species name'