In [40]:
import numpy as np
import pandas as pd
from bs4 import BeautifulSoup
import requests
import sys

In [None]:
# Collect form data from CLI
reference = "Allen08"
abundances = "Allen2008_Solar"
density = "1"
magnetic = "1"

# Form data
form_data = {
    "reference": reference,
    "abundances": abundances,
    "density": density,
    "magnetic": magnetic,
    "submitbutton": "print table"
}

# Send a POST request to url with form_data
url = "http://3mdb.astro.unam.mx:3686/emisstable"
response = requests.post(url, data=form_data)

# Create list to store rows
emission_rows = []

# Check if the request was successful
if response.status_code == 200:
    # Parse the HTML content with BeautifulSoup
    soup = BeautifulSoup(response.text, "html.parser")
    
    # Find the table with the specified class
    table = soup.find("table", class_="table table-hover")
    
    if table:
        # Extract all rows from the table
        rows = table.find_all("tr")
        for i, row in enumerate(rows):

            # Get all cells in the current row
            cells = row.find_all(["td", "th"])
            # Extract text from each cell and strip extra whitespace
            cell_data = [cell.get_text(strip=True) for cell in cells]

            # Extract first row as header
            if i == 0:
                header = cell_data
            # Otherwise store row data as lists
            else:
                emission_label = [cell_data[0]]
                emission_data = list(np.asarray(cell_data[1:], dtype=float))  # Convert str to float
                emission_row = emission_label + emission_data
                emission_rows.append(emission_row)  # Add to master list
    else:
        print("Table with class 'table table-hover' not found on the page.")
else:
    print(f"Failed to retrieve the page. Status code: {response.status_code}")

In [72]:
df = pd.DataFrame(emission_rows, columns=header)
df.head()

Unnamed: 0,Emission lines,100,125,150,175,200,225,250,275,300,...,775,800,825,850,875,900,925,950,975,1000
0,C III λ977,20.172,10.78,4.766,4.647,4.137,3.494,3.022,2.666,2.379,...,0.665,0.638,0.612,0.587,0.563,0.539,0.517,0.495,0.474,0.454
1,N III λ990,1.103,1.425,0.749,0.504,0.462,0.392,0.341,0.302,0.269,...,0.068,0.065,0.062,0.059,0.056,0.054,0.051,0.049,0.046,0.044
2,"O VI λλ1032, 1037",0.0,0.162,6.063,33.328,36.73,33.176,30.63,28.256,26.267,...,9.54,9.184,8.839,8.505,8.18,7.864,7.558,7.261,6.979,6.703
3,Lyα λ1215,50.712,31.92,27.234,26.874,26.623,26.343,26.206,26.23,26.464,...,32.463,32.726,33.027,33.367,33.744,34.155,34.601,35.064,35.537,36.001
4,N V λ1239,0.045,0.959,2.483,1.423,1.441,1.271,1.158,1.064,0.985,...,0.314,0.302,0.291,0.28,0.27,0.259,0.249,0.24,0.23,0.221


In [73]:
filename = 'test.csv'

df.to_csv(filename, index=False)

In [74]:
df = pd.read_csv(filename)
df.head()

Unnamed: 0,Emission lines,100,125,150,175,200,225,250,275,300,...,775,800,825,850,875,900,925,950,975,1000
0,C III λ977,20.172,10.78,4.766,4.647,4.137,3.494,3.022,2.666,2.379,...,0.665,0.638,0.612,0.587,0.563,0.539,0.517,0.495,0.474,0.454
1,N III λ990,1.103,1.425,0.749,0.504,0.462,0.392,0.341,0.302,0.269,...,0.068,0.065,0.062,0.059,0.056,0.054,0.051,0.049,0.046,0.044
2,"O VI λλ1032, 1037",0.0,0.162,6.063,33.328,36.73,33.176,30.63,28.256,26.267,...,9.54,9.184,8.839,8.505,8.18,7.864,7.558,7.261,6.979,6.703
3,Lyα λ1215,50.712,31.92,27.234,26.874,26.623,26.343,26.206,26.23,26.464,...,32.463,32.726,33.027,33.367,33.744,34.155,34.601,35.064,35.537,36.001
4,N V λ1239,0.045,0.959,2.483,1.423,1.441,1.271,1.158,1.064,0.985,...,0.314,0.302,0.291,0.28,0.27,0.259,0.249,0.24,0.23,0.221


In [90]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

# URL for the form page
url = "http://3mdb.astro.unam.mx:3686/emisstable"

# Function to get options from a select element
def get_select_options(soup, select_id):
    options = {}
    select_element = soup.find("select", id=select_id)
    if select_element:
        for option in select_element.find_all("option"):
            value = option.get("value")
            text = option.get_text(strip=True)
            if value:  # Skip empty values
                options[value] = text
    return options

# Function to submit form and return the response soup
def submit_form(form_data):
    response = requests.post(url, data=form_data)
    if response.status_code == 200:
        return BeautifulSoup(response.text, "html.parser")
    else:
        print(f"Form submission failed. Status code: {response.status_code}")
        return None

# Function to extract table data and convert to DataFrame
def extract_table_data(soup):
    table = soup.find("table", class_="table table-hover")

    if not table:
        return None
    
    else:
        # Extract all rows from the table
        emission_rows = []
        rows = table.find_all("tr")
        for i, row in enumerate(rows):

            # Get all cells in the current row
            cells = row.find_all(["td", "th"])
            # Extract text from each cell and strip extra whitespace
            cell_data = [cell.get_text(strip=True) for cell in cells]

            # Extract first row as header
            if i == 0:
                header = cell_data
            # Otherwise store row data as lists
            else:
                emission_label = [cell_data[0]]
                emission_data = list(np.asarray(cell_data[1:], dtype=float))  # Convert str to float
                emission_row = emission_label + emission_data
                emission_rows.append(emission_row)  # Add to master list
        
        # Create DataFrame
        return pd.DataFrame(emission_rows, columns=header)

# Start by getting the initial page
initial_response = requests.get(url)
if initial_response.status_code == 200:
    current_soup = BeautifulSoup(initial_response.text, "html.parser")

    # Submit form with selected reference
    ref_value = "Allen08"
    form_data = {
        "reference": ref_value,
        "submitbutton": "submit"
    }
    ref_soup = submit_form(form_data)
        
    # Get abundances options based on selected reference
    abundances_options = get_select_options(ref_soup, "abundances")
    print(f"  Abundances options for {ref_value}:", abundances_options)
        
    # Iterate through each abundances option
    for abund_value, abund_text in abundances_options.items():
        print(f"  Testing abundances: {abund_value} ({abund_text})")
        
        # Submit form with selected reference and abundances
        form_data = {
            "reference": ref_value,
            "abundances": abund_value,
            "submitbutton": "submit"
        }
        
        abund_soup = submit_form(form_data)
        if not abund_soup:
            continue
        
        # Get density options based on selected reference and abundances
        density_options = get_select_options(abund_soup, "density")
        print(f"    Density options for {ref_value}, {abund_value}:", density_options)
        
        # Iterate through each density option
        for dens_value, dens_text in density_options.items():
            print(f"    Testing density: {dens_value}")
            
            # Submit form with selected reference, abundances, and density
            form_data = {
                "reference": ref_value,
                "abundances": abund_value,
                "density": dens_value,
                "submitbutton": "submit"
            }
            
            dens_soup = submit_form(form_data)
            if not dens_soup:
                continue
            
            # Get magnetic options based on previous selections
            magnetic_options = get_select_options(dens_soup, "magnetic")
            print(f"      Magnetic options for {ref_value}, {abund_value}, {dens_value}:", magnetic_options)
            
            # Iterate through each magnetic option
            for mag_value, mag_text in magnetic_options.items():
                print(f"      Testing magnetic: {mag_value}")
                
                # Submit form with all selected values to get the table
                form_data = {
                    "reference": ref_value,
                    "abundances": abund_value,
                    "density": dens_value,
                    "magnetic": mag_value,
                    "submitbutton": "print table"  # Now we want the table
                }
                
                final_soup = submit_form(form_data)
                if not final_soup:
                    continue
                
                # Extract table data into DataFrame
                df = extract_table_data(final_soup)
                
                # Save DataFrame in specified directory
                if df is not None:
                    # Generate filename and filepath for this combination
                    filename = f"{abund_value}_{dens_value}_{mag_value}.csv"
                    filepath = f"3mdbs_data/{ref_value}/{abund_value}/" + filename
                    df.to_csv(filepath, index=False)
                else:
                    print(f"      No table found for this combination")
                
                # Add a short delay to avoid overloading the server
                time.sleep(1)
else:
    print(f"Failed to retrieve the initial page. Status code: {initial_response.status_code}")

  Abundances options for Allen08: {'Allen2008_Dopita2005': 'Allen2008_Dopita2005', 'Allen2008_LMC': 'Allen2008_LMC', 'Allen2008_SMC': 'Allen2008_SMC', 'Allen2008_Solar': 'Allen2008_Solar', 'Allen2008_TwiceSolar': 'Allen2008_TwiceSolar'}
  Testing abundances: Allen2008_Dopita2005 (Allen2008_Dopita2005)
    Density options for Allen08, Allen2008_Dopita2005: {'1': '1'}
    Testing density: 1
      Magnetic options for Allen08, Allen2008_Dopita2005, 1: {'0.0001': '0.0001', '0.5': '0.5', '1': '1', '2': '2', '3.23': '3.23', '4': '4', '5': '5', '10': '10'}
      Testing magnetic: 0.0001
      Testing magnetic: 0.5
      Testing magnetic: 1
      Testing magnetic: 2
      Testing magnetic: 3.23
      Testing magnetic: 4


KeyboardInterrupt: 

In [95]:
df = pd.read_csv("./3mdbs_data/Allen08/Allen2008_Dopita2005/Allen2008_Dopita2005_1_1.csv")
df.head()

Unnamed: 0,Emission lines,100,125,150,175,200,225,250,275,300,...,775,800,825,850,875,900,925,950,975,1000
0,C III λ977,8.616,5.398,4.002,3.358,2.67,2.121,1.775,1.541,1.363,...,0.32,0.302,0.286,0.271,0.258,0.245,0.233,0.222,0.212,0.203
1,N III λ990,0.77,1.175,0.649,0.545,0.432,0.342,0.285,0.247,0.218,...,0.047,0.044,0.042,0.039,0.037,0.035,0.033,0.031,0.03,0.028
2,"O VI λλ1032, 1037",0.0,0.136,5.636,20.174,18.118,16.18,15.466,15.02,14.498,...,5.077,4.824,4.592,4.374,4.173,3.984,3.81,3.646,3.493,3.35
3,Lyα λ1215,53.919,36.508,32.066,32.355,32.986,32.904,33.289,34.256,35.461,...,53.055,53.739,54.369,55.062,55.737,56.323,56.902,57.54,58.146,58.544
4,N V λ1239,0.02,0.738,1.298,0.996,0.827,0.697,0.633,0.601,0.579,...,0.169,0.16,0.152,0.145,0.138,0.132,0.126,0.121,0.116,0.111


In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select, WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import pandas as pd
import time

# Configure the webdriver (adjust the path to your chromedriver)
service = Service('/path/to/chromedriver')
options = webdriver.ChromeOptions()
options.add_argument("--start-maximized")
driver = driver = webdriver.Safari()

# Define the URL
url = "http://3mdb.astro.unam.mx:3686/emisstable"
driver.get(url)

# Wait until the page loads (you may need to adjust the condition)
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.TAG_NAME, "select")))

# Select the reference from the first form (assuming it's a dropdown)
select_reference = Select(driver.find_element(By.ID, "reference"))
select_reference.select_by_value("Allen08")
time.sleep(5)  # Allow any dynamic changes to take place

# Now get the abundances options (again assuming it's a select element)
select_abundances = Select(driver.find_element(By.ID, "abundances"))

# For example, select one of the options:
select_abundances.select_by_value("Allen2008_Solar")
time.sleep(2)

# Next, select the density option
select_density = Select(driver.find_element(By.ID, "density"))
select_density.select_by_value("1")
time.sleep(2)

# Then select the magnetic option (example value used here)
select_magnetic = Select(driver.find_element(By.ID, "magnetic"))
select_magnetic.select_by_value("0.0001")
time.sleep(2)

# Click the submit button to get the table (adjust the element locator as needed)
submit_button = driver.find_element(By.XPATH, "//input[@type='submit' or @value='print table']")
submit_button.click()

# Wait for the table to load
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "table.table")))
time.sleep(2)

# Get the page source and parse with BeautifulSoup
html = driver.page_source
soup = BeautifulSoup(html, "html.parser")

# Function to extract table data into a DataFrame
def extract_table_data(soup):
    table = soup.find("table", class_="table table-hover")
    if not table:
        return None
    rows = table.find_all("tr")
    header = [th.get_text(strip=True) for th in rows[0].find_all(["th", "td"])]
    data = []
    for row in rows[1:]:
        cells = [cell.get_text(strip=True) for cell in row.find_all("td")]
        if cells:
            data.append(cells)
    return pd.DataFrame(data, columns=header)

df = extract_table_data(soup)
if df is not None:
    print(df)
else:
    print("No table found.")

# Always close the browser when done
driver.quit()

NoSuchElementException: Message: Cannot locate option with value: Allen2008_Solar; For documentation on this error, please visit: https://www.selenium.dev/documentation/webdriver/troubleshooting/errors#no-such-element-exception
