In [7]:
from splinter import Browser
from bs4 import BeautifulSoup as soup
import pandas as pd
import datetime as dt
from webdriver_manager.chrome import ChromeDriverManager

def scrape_all():
    # set up executable path
    executable_path={"executable_path": ChromeDriverManager().install()}
    browser=Browser("chrome", **executable_path, headless=True)
    
    # set news title and paragraph variables using our mars_news() function
    news_title, news_paragraph = mars_news(browser)
    
    # create the data dictionary
    #run all scraping functions and store results in dictionary
    data = {
        "news_title": news_title,
        "news_paragraph": news_paragraph,
        "featured_image": featured_image(browser),
        "facts": mars_facts(),
        "last_modified": dt.datetime.now()
    }
    
    # end the automated browsing session.
    browser.quit()
    
    return data


def mars_news(browser):
    #assign url and visit browser
    url="https://redplanetscience.com/"
    browser.visit(url)

    #optional delay for loading the page
    browser.is_element_present_by_css("div.list_text", wait_time=1)

    # set up HTML parser
    html=browser.html
    news_soup=soup(html, "html.parser")
    slide_elem=news_soup.select_one("div.list_text")
    
    #add try/except for error handling
    try:
        slide_elem.find("div", class_="content_title")

        #use the parent element to find the first "a" tag and save it as `news_title`
        news_title=slide_elem.find("div", class_="content_title").get_text()

        news_p=slide_elem.find("div", class_="article_teaser_body").get_text()
    except AttributeError:
        return None, None
    
    return news_title, news_p


# ### Featured Images
def featured_image(browser):
    # set up the URL
    url="https://spaceimages-mars.com/"

    #visit URL
    browser.visit(url)

    #Find and click the full image button
    full_image_elem=browser.find_by_tag("button")[1]
    full_image_elem.click()

    #Parse the resulting html with soup
    html=browser.html
    img_soup=soup(html, "html.parser")
    
    try:
        # find the relative image url
        img_url_rel=img_soup.find("img", class_="fancybox-image").get("src")
    except AttributeError:
        return None
    
    # use the base URL to create an absolute URL
    img_url=f"https://spaceimages-mars.com/{img_url_rel}"
    
    return img_url

def mars_facts():
    # copy a table's info from one page and place into application
    
    try:
        # scrape the entire table using pandas' .read_html() function

        # convert the html table into a pandas dataframe
        # read_html() searches and returns all tables found in the HTML. index 0 means only return first table found
        df=pd.read_html("https://galaxyfacts-mars.com/")[0]
    except BaseException:
        return None
    
    df.columns=["description", "Mars", "Earth"]
    df.set_index("description", inplace=True)

    # convert pandas dataframe back into html
    return df.to_html()

if __name__ == "__main__":
    #if running as script, print scraped data
    print(scrape_all())



Current google-chrome version is 99.0.4844
Get LATEST chromedriver version for 99.0.4844 google-chrome
Trying to download new driver from https://chromedriver.storage.googleapis.com/99.0.4844.51/chromedriver_win32.zip
Driver has been saved in cache [C:\Users\Emily\.wdm\drivers\chromedriver\win32\99.0.4844.51]


{'news_title': "Robotic Toolkit Added to NASA's Mars 2020 Rover", 'news_paragraph': "The bit carousel, which lies at the heart of the rover's Sample Caching System, is now aboard NASA's newest rover. ", 'featured_image': 'https://spaceimages-mars.com/image/featured/mars3.jpg', 'facts': '<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars</th>\n      <th>Earth</th>\n    </tr>\n    <tr>\n      <th>description</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Mars - Earth Comparison</th>\n      <td>Mars</td>\n      <td>Earth</td>\n    </tr>\n    <tr>\n      <th>Diameter:</th>\n      <td>6,779 km</td>\n      <td>12,742 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg</td>\n      <td>5.97 × 10^24 kg</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>Distance from Sun:</th>\n      <td>227,9