In [1]:
from splinter import Browser
from bs4 import BeautifulSoup as bs
import time
import pandas as pd

In [2]:
def init_browser():
    executable_path = {"executable_path": "/Users/user/Downloads/chromedriver"}
    return Browser("chrome", **executable_path, headless=False)

In [3]:
def scrape():
    browser = init_browser()
    listings = {}

    # Scrape NASA Mars news page into Soup
    url = "https://mars.nasa.gov/news/"
    browser.visit(url)
    html = browser.html
    soup = bs(html, "html.parser")

    news = soup.find("div", class_ = "list_text")

    news_title = news.find('div', class_ ='content_title').get_text()
    news_teaser = news.find('div', class_ ='article_teaser_body').get_text()  

    
    # Scrape NASA Mars Image into Soup
    url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
    browser.visit(url)
    html = browser.html
    soup = bs(html, "html.parser")

    image = soup.find("div", class_ = "carousel_items")

    featured_image_url = url[:24]+image.find('article')['style'][23:-3]

    
    # Scrape NASA Mars twitter page into Soup
    url = "https://twitter.com/marswxreport?lang=en"
    browser.visit(url)
    html = browser.html
    soup = bs(html, "html.parser")

    # Select top Mars Weather Report tweet
    twit0 = soup.find("div", class_ = "tweet", attrs={"data-screen-name": "MarsWxReport"})

    # Retrive Weather Report Text
    twit = twit0.find("p", class_ = "tweet-text").get_text()

    # Clean string
    mars_weather = twit.replace("\n"," ").split("pic.twitter.com")[0]
    
    
    # Scrape Mars Facts into Soup using Python Pandas
    url = "https://space-facts.com/mars/"
    mars_facts = pd.read_html(url)
    mars_facts_df = mars_facts[0]
    mars_facts_df.columns = ['Description','Value']
    mars_facts_htm = mars_facts_df.to_html(index = False, justify = 'left', table_id = 'mars_facts_tbl')\
                    .replace("\n","")\
                    .replace("> ",">").replace("> ",">").replace("> ",">")\
                    .replace("> ",">").replace("> ",">").replace("> ",">")

    
    # Scrape Mars' hemisphere images from the USGS Astrogeology page into Soup
    url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
    browser.visit(url)
    html = browser.html
    soup = bs(html, "lxml")

    # Select all Mars Hemisphere items
    hemispheres = soup.find_all("div", class_ = "item")
    
    title = []
    hemis_page = []
    
    # Retrieve each hemisphere title and page
    for item in range(len(hemispheres)):
        title.append(hemispheres[item].find("h3").get_text())
        hemis_page.append(hemispheres[item].find("a", class_ = "itemLink").get('href'))
        
    hemisphere_image_urls = []
    base_url = 'https://astrogeology.usgs.gov'
    
    # Retrieve each hemisphere full resolution image url string
    # Append hemisphere dictionary to list
    for img in range(len(hemis_page)):
            url = base_url + hemis_page[img]
            browser.visit(url)
            html = browser.html
            soup = bs(html, "lxml")
            hemisphere_image_urls.append({'title': title[img],'img_url': soup.find("a", string = "Sample").get('href')})

    # Store data in a dictionary
    mars_data = {
        "news_title": news_title,
        "news_teaser": news_teaser,
        "featured_image_url": featured_image_url,
        "mars_weather": mars_weather,
        "mars_facts_htm": mars_facts_htm,
        "hemisphere_image_urls": hemisphere_image_urls
    }

    # Close the browser after scraping
    browser.quit()

    # Return results
    return mars_data

In [4]:
scrape()

{'news_title': 'InSight Captures Sunrise and Sunset on Mars',
 'news_teaser': "InSight joins the rest of NASA's Red Planet surface missions, all of which have photographed either the start or end of a Martian day.",
 'featured_image_url': 'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA22574-1920x1200.jpg',
 'mars_weather': 'InSight sol 154 (2019-05-03) low -97.6ºC (-143.7ºF) high -17.2ºC (1.0ºF) winds from the SW at 4.5 m/s (10.0 mph) gusting to 13.1 m/s (29.2 mph) pressure at 7.40 hPa',
 'mars_facts_htm': '<table border="1" class="dataframe" id="mars_facts_tbl"><thead><tr style="text-align: left;"><th>Description</th><th>Value</th></tr></thead><tbody><tr><td>Equatorial Diameter:</td><td>6,792 km</td></tr><tr><td>Polar Diameter:</td><td>6,752 km</td></tr><tr><td>Mass:</td><td>6.42 x 10^23 kg (10.7% Earth)</td></tr><tr><td>Moons:</td><td>2 (Phobos &amp; Deimos)</td></tr><tr><td>Orbit Distance:</td><td>227,943,824 km (1.52 AU)</td></tr><tr><td>Orbit Period:</td><td>687 days (1