In [1]:
# Basic Setup
from splinter import Browser
from bs4 import BeautifulSoup as bs
import pandas as pd


In [2]:
# 1. Scrape NASA Mars News

executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path)

url = 'https://mars.nasa.gov/news/'

browser.visit(url)
browser.is_element_present_by_css("ul.item_list li.slide", wait_time=1)

html = browser.html
soup = bs(html, 'html.parser')



slide = soup.find('li', class_='slide')

L_headline = slide.find("div", class_ = 'content_title').text
L_article = slide.find("div", class_ = 'article_teaser_body').text

print(L_headline)
print(L_article)


How NASA's Perseverance Mars Team Adjusted to Work in the Time of Coronavirus 
Like much of the rest of the world, the Mars rover team is pushing forward with its mission-critical work while putting the health and safety of their colleagues and community first.


In [3]:
# 2. Scrape Featured Image
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path)

url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

browser.visit(url)
html = browser.html
soup = bs(html, 'html.parser')
browser.quit()

s = soup.find('article', class_ = 'carousel_item')['style']
featured_img_url = 'https://www.jpl.nasa.gov'+s[(s.find("('") + len("('")) : s.find("')")]
print(featured_img_url)

https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA20263-1920x1200.jpg


In [4]:
# 3.Mars Weather
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path)

url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url)
browser.is_element_present_by_css("div.aria-label", wait_time=2)

html = browser.html
soup = bs(html, 'html.parser')

main = soup.find('main')
browser.quit()

def contains_word(t):
    return t and 'InSight sol' in t

mars_weather = main.find('span',text = contains_word).text
print(mars_weather)

InSight sol 499 (2020-04-22) low -94.4ºC (-137.9ºF) high -3.6ºC (25.5ºF)
winds from the SW at 5.1 m/s (11.3 mph) gusting to 16.2 m/s (36.1 mph)
pressure at 6.70 hPa


In [6]:
# 4.Mars Hemisphere
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path)

url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)
browser.is_element_present_by_css("div.description", wait_time=3)

html = browser.html
soup = bs(html, 'html.parser')

links = soup.find_all('a', class_ ='itemLink product-item')

link_list = []

for link in links:
    link_list.append(link.get('href'))

link_list = list(set(link_list))

def contains_word(t):
    return t and 'Sample' in t

hemisphere_image_urls = []

for link in link_list:
    
    browser.visit('https://astrogeology.usgs.gov/' +link)
    browser.is_element_present_by_css("div.ul.li", wait_time=1)

    html = browser.html
    soup = bs(html, 'html.parser')
    link = soup.find('a', text = contains_word)
    
    title = soup.head.title.text
    title = title[0:title.find(" |")] 
    
    Dict = {"title": title, "img_url": link.get('href')}
    hemisphere_image_urls.append(Dict)

print(hemisphere_image_urls)

[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]


In [5]:
# 5. Mars Facts

def mars_facts():
    try:
        df = pd.read_html("http://space-facts.com/mars/")[0]
    except BaseException:
        return None

    df.columns = ["description", "value"]
    df.set_index("description", inplace=True)

    # Add some bootstrap styling to <table>
    return df.to_html(classes="table table-striped")

test = mars_facts()


In [6]:
test

'<table border="1" class="dataframe table table-striped">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>value</th>\n    </tr>\n    <tr>\n      <th>description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronom

In [7]:
browser.quit()