In [1]:

from splinter import Browser
from bs4 import BeautifulSoup
import pandas as pd

In [2]:
url_list = ['https://mars.nasa.gov/news/',
            'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars',
            'https://twitter.com/marswxreport?lang=en',
            'https://space-facts.com/mars/',
            'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars']



In [3]:
def getText(parent):
    return ''.join(parent.find_all(text=True, recursive=False)).strip()

def init_browser():
    # @NOTE: Replace the path with your actual path to the chromedriver
    executable_path = {"executable_path": "chromedriver.exe"}
    return Browser("chrome", **executable_path, headless=False)

In [4]:
browser = init_browser()

# get latest mars news
try:
    browser.visit(url_list[0])
    soup = BeautifulSoup(browser.html, "lxml")
    latest_news = soup.find("div", class_="list_text")
    news_title = latest_news.find("div", class_='content_title').text
    news_p = latest_news.find("div", class_='article_teaser_body').text
except Exception as e:
    print(f"An error occured while scraping {url_list[0]}: {e}")
    news_title = ''
    news_p = ''


In [5]:
# get featured mars image
try:
    browser.visit(url_list[1])
    soup = BeautifulSoup(browser.html, "lxml")
    image = soup.find("div", class_="carousel_items")
    relative_img_url = image.article['style'].split(":")[1].split("'")[1]
    featured_img_url = url_list[1].split(
        'spaceimages')[0] + relative_img_url
except Exception as e:
    print(f"An error occured while scraping {url_list[1]}: {e}")
    featured_img_url = ''


In [6]:
# get latest mars weather report tweet
try:
    browser.visit(url_list[2])
    soup = BeautifulSoup(browser.html, "lxml")
    tweets = soup.find_all("div", class_='js-tweet-text-container')
    for tweet in tweets:
        tweet_text = getText(tweet.p)
        if 'InSight sol' in tweet_text:
            mars_weather = tweet_text
except Exception as e:
    print(f"An error occured while scraping {url_list[2]}: {e}")
    mars_weather = ''

In [7]:
# get mars facts
try:
    browser.visit(url_list[3])
    tables = pd.read_html(url_list[3])
    table = tables[0]
    table.columns = ['Parameter', 'Value']
    table.set_index('Parameter')
except Exception as e:
    print(f"An error occured while scraping {url_list[3]}: {e}")
    table = pd.DataFrame()

In [8]:

# get images of mars hemishperes, follow image link to obtain full size image
try:
    browser.visit(url_list[4])
    soup = BeautifulSoup(browser.html, 'lxml')
    items = soup.find_all("div", class_="item")
    base_url = url_list[4].split('search')[0]
    hemisphere_link_urls = []
    for item in items:
        title = item.div.h3.text
        browser.visit(url_list[4])
        browser.click_link_by_partial_text(title)
        soup = BeautifulSoup(browser.html, 'lxml')
        relative_img_url = soup.find("img", class_="wide-image")['src']
        full_img_url = base_url + relative_img_url
        hemisphere_link_urls.append(
            {"title": title, "img_url": full_img_url})
except Exception as e:
    print(f"An error occured while scraping {url_list[3]}: {e}")
    hemisphere_link_urls = ''

In [9]:
 marsDict = {
        'news_title': news_title,
        'news_p': news_p,
        'featured_img_url': featured_img_url,
        'mars_weather': mars_weather,
        'table': table.to_html(index=False),
        'hemisphere_imgs': hemisphere_link_urls
    }

browser.quit()

In [10]:
marsDict

{'news_title': "Curiosity Tastes First Sample in 'Clay-Bearing Unit'",
 'news_p': 'This new region on Mars might reveal more about the role of water on Mount Sharp.',
 'featured_img_url': 'https://www.jpl.nasa.gov//spaceimages/images/wallpaper/PIA16225-1920x1200.jpg',
 'mars_weather': 'InSight sol 117 (2019-03-26) low -107.3ºC (-161.1ºF) high -17.1ºC (1.2ºF)\npressure at 7.30 hPa',
 'table': '<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th>Parameter</th>\n      <th>Value</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <td>Mass:</td>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>