In [2]:
from selenium import webdriver
from bs4 import BeautifulSoup
import altair as alt
import vl_convert as vlc
import time
import json

In [7]:
# Setup Selenium WebDriver (example with Chrome)
driver = webdriver.Chrome()

# Navigate to the target website
website_url = "https://vega.github.io/vega-lite/examples/"
driver.get(website_url)
time.sleep(5)  # Wait for the page to fully load

# Use BeautifulSoup to parse the page source
soup = BeautifulSoup(driver.page_source, 'html.parser')
# Find all 'a' elements with the class 'imagegroup'
elements = soup.find_all('a', class_='imagegroup')

# Dictionary to store titles and hrefs
vis_examples = {}

vis = [
    'Simple Bar Chart',
    'Stacked Bar Chart',
    'Diverging Stacked Bar Chart (with Neutral Parts)',
    'Histogram (from Binned Data)',
    '2D Histogram Heatmap',
    'Colored Scatterplot',
    'Bubble Plot (Natural Disasters)',
    'Multi Series Line Chart',
    'Slope Graph',
    'A comet chart showing changes between between two states',
    'Area Chart',
    'Stacked Area Chart',
    'Annual Weather Heatmap',
    'Lasagna Plot (Dense Time-Series Heatmap)',
    'Mosaic Chart with Labels',
    'Radial Plot',
    'Waterfall Chart of Monthly Profit and Loss',
    'Parallel Coordinate Plot',
    'Scatterplot with Mean and Standard Deviation Overlay',
    'Box Plot with Pre-Calculated Summaries',
    'Ranged Dot Plot',
    'Horizon Graph',
    'Horizontally Repeated Charts',
    'Choropleth of Unemployment Rate per County',
    'One Dot per Zipcode in the U.S.'
]

for element in elements:
    title_span = element.find('span', class_='image-title')
    if title_span:
        title = title_span.text.strip()
        if title not in vis:
            continue
        href = element['href']
        vis_examples[title] = {'url': href}


# Iterate through the titles and hrefs, navigating to each page
for title, values in vis_examples.items():
    full_url = "https://vega.github.io" + values['url']  # Corrected to use the absolute path
    driver.get(full_url)
    time.sleep(5)  # Wait for the page to load
    
    # Re-parse the page with BeautifulSoup
    soup = BeautifulSoup(driver.page_source, 'html.parser')
    code_element = soup.find('code', {'class': 'language-json', 'data-highlighted': 'yes'})
    
    # If a code element is found, append the JSON data
    if code_element:
        json_text = ''.join(code_element.stripped_strings)
        vis_examples[title]['spec'] = json.loads(json_text)

# Close the WebDriver
driver.quit()



In [14]:
for title, values in vis_examples.items():
    # Create the chart from the Vega-Lite specification
    
    png_filename = f"output/{title.lower().replace(' ', '_')}.png"
    try:
        png_data = vlc.vegalite_to_png(vl_spec=values['spec']) # , scale=2)
        with open(png_filename, "wb") as f:
            f.write(png_data)
    except:
        print(f'{png_filename} was not created')
        

    svg_filename = f"output/{title.lower().replace(' ', '_')}.svg"
    try:
        svg_str = vlc.vegalite_to_svg(vl_spec=values['spec']) # , scale=2)
        with open(svg_filename, "wt") as f:
            f.write(svg_str)
    except:
        print(f'{svg_filename} was not created')



output/diverging_stacked_bar_chart_(with_neutral_parts).svg was not created
output/a_comet_chart_showing_changes_between_between_two_states.svg was not created
output/horizon_graph.svg was not created
output/one_dot_per_zipcode_in_the_u.s..svg was not created
