In [1]:
from bs4 import BeautifulSoup
import requests
from splinter import Browser
import pandas as pd

In [2]:
# Path to driver for macOS.
# Needed to serve content properly.
executable_path = {'executable_path': 'chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
#  Visit NASA Mars News website with Chrome driver
# and parse html with Beautiful Soup.
url = 'https://mars.nasa.gov/news/'
browser.visit(url)
soup = BeautifulSoup(browser.html, 'html.parser')

In [4]:
# Scrape and collect the latest news title and paragraph text.
news_title = soup.find('div', class_='content_title').text
news_p = soup.find('div', class_='article_teaser_body').text
print(news_title)
print(news_p)

NASA's Mars Reconnaissance Orbiter Undergoes Memory Update
Other orbiters will continue relaying data from Mars surface missions for a two-week period.


In [5]:
# Visit JPL Mars Space Images website with chrome driver
# and parse html with Beautiful Soup.
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)
soup = BeautifulSoup(browser.html, 'html.parser')

In [6]:
# Scrape and collect the full size, image url for current Featured Mars Image.
base_url = 'https://www.jpl.nasa.gov'
image_url = soup.find('li', class_='slide').a['data-fancybox-href']
featured_image_url = base_url + image_url
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA23707_hires.jpg


In [7]:
# Visit Mars Weather twitter account with chrome driver
# and parse html with Beautiful Soup.
url = 'https://twitter.com/marswxreport?lang=en'
request = requests.get(url)
soup = BeautifulSoup(request.content, 'html.parser')

In [8]:
# Scrape and collect latest Mars weather tweet.
for find in soup.find_all('div', class_='js-tweet-text-container'):
    if find.p.text[:11] == 'InSight sol':
        mars_weather = find.p.text
        break
print(mars_weather)

InSight sol 437 (2020-02-18) low -94.7ºC (-138.5ºF) high -9.9ºC (14.1ºF)
winds from the SSE at 6.2 m/s (13.8 mph) gusting to 21.6 m/s (48.3 mph)
pressure at 6.30 hPapic.twitter.com/SLsDLVefeh


In [9]:
# Visit the Mars Facts website with chrome driver.
url = 'https://space-facts.com/mars/'
browser.visit(url)

In [19]:
# Use pandas to read html table.
df = pd.read_html(browser.html)
html_table = df[0].to_html()
df[0].head()

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"


In [16]:
# Visit the Mars Hemispheres website with chrome driver
# and parse html with Beautiful Soup.
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)
soup = BeautifulSoup(browser.html, 'html.parser')

In [17]:
# Scrape and collect high resolution images for each
# of Mars' hemispheres.
base_url = 'https://astrogeology.usgs.gov'
hemisphere_image_urls = []
for item in soup.find_all('div', class_='item'):
    title = item.h3.text
    image_url = item.find('a', class_='itemLink product-item')['href']
    
    # Visit individual hemisphere website to scrape and collect full size image.
    browser.visit(base_url + image_url)
    soup = BeautifulSoup(browser.html, 'html.parser')
    image_url = soup.find('img', class_='wide-image')['src']
    
    # Append dictionary to list
    hemisphere_image_urls.append({'title': title, 'image_url': base_url + image_url})
print(hemisphere_image_urls)

[{'title': 'Cerberus Hemisphere Enhanced', 'image_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'image_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'image_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'image_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]


In [18]:
browser.quit()