In [1]:
# Dependencies & Setup
# splinter : https://pypi.org/project/splinter/
# beautiful soup : https://www.crummy.com/software/BeautifulSoup/
# Chromedriver : https://sites.google.com/a/chromium.org/chromedriver/downloads

from splinter import Browser
from bs4 import BeautifulSoup
import pandas as pd
import time

In [2]:
# Set Executable Path & Initialize Chrome Browser (Mac)
executable_path = {"executable_path": "/usr/local/bin/chromedriver"}
browser = Browser('chrome', **executable_path, headless = False)

In [3]:
# HTML Scrape
# Visit
url = "https://mars.nasa.gov/news/"
browser.visit(url)

# Wait
time.sleep(5)

# write to HTML
html = browser.html
bs = BeautifulSoup(html, 'html.parser')

In [4]:
# Scrape title and paragraph
news_title = bs.find("div", class_ = "list_text").find('div', class_ = "content_title").text
news_p = bs.find("div", class_ = "article_teaser_body").text
print(f"Title: {news_title}")
print(f"Paragraph: {news_p}")

Title: Celebrate Mars Reconnaissance Orbiter's Views From Above
Paragraph: Marking its 15th anniversary since launch, one of the oldest spacecraft at the Red Planet has provided glimpses of dust devils, avalanches, and more.


In [5]:
# HTML Scrape
# Visit
url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(url)

# Wait
time.sleep(5)

# write to HTML
html = browser.html
bs = BeautifulSoup(html, 'html.parser')

In [6]:
# Image Scrape
# Scrape image url
image_url = bs.find("img", class_ = "thumb")["src"]
featured_image_url = "https://www.jpl.nasa.gov" + image_url
print(f"Featured Image URL: {featured_image_url}")

Featured Image URL: https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA24036-640x350.jpg


In [7]:
# HTML Scrape
# Visit the Mars Weather Twitter Account
url = "https://twitter.com/marswxreport?lang=en"
browser.visit(url)

# Wait
time.sleep(5)

# Use BeautifulSoup to write into HTML
html = browser.html
bs = BeautifulSoup(html, 'html.parser')

In [8]:
# tweet Scrape
# Scrape first tweet
first_tweet = bs.find("div", class_ = 'css-1dbjc4n').text
first_tweet

"Don’t miss what’s happeningPeople on Twitter are the first to know.Log inSign upMars Weather2,304 TweetsSee new TweetsFollowMars Weather@MarsWxReportUpdates as avail from the REMS weather instrument aboard @MarsCuriosity.  Data credit: Centro deAstrobiologia, FMI, JPL/NASA, Not an official acct.Gale Crater, Marsmars.nasa.gov/news/8415/insi…Joined August 201253 Following57.5K FollowersTweetsTweets & repliesMediaLikesTweetsTweets & repliesMediaLikesMars Weather’s TweetsMars Weather@MarsWxReport·16hInSight sol 607 (2020-08-11) low -93.1ºC (-135.6ºF) high -18.9ºC (-2.1ºF)\nwinds from the WNW at 8.2 m/s (18.4 mph) gusting to 21.4 m/s (47.8 mph)\npressure at 7.90 hPa517Mars Weather@MarsWxReport·Aug 11InSight sol 606 (2020-08-09) low -94.1ºC (-137.4ºF) high -18.7ºC (-1.6ºF)\nwinds from the WNW at 8.0 m/s (17.8 mph) gusting to 23.9 m/s (53.4 mph)\npressure at 7.90 hPa919Mars Weather@MarsWxReport·Aug 9InSight sol 605 (2020-08-09) low -92.7ºC (-134.8ºF) high -18.4ºC (-1.1ºF)\nwinds from the WNW

In [9]:
# Visit the Mars Facts Webpage and use Pandas to convert the data to an HTML table string
mars_df = pd.read_html("https://space-facts.com/mars")[0]
mars_df.columns = ["description", "value"]
mars_df.set_index(["description"], inplace = True)
mars_df

Unnamed: 0_level_0,value
description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [12]:
# Visit the USGS Astrogeology Site
url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(url)

# Wait
time.sleep(5)

# write into HTML
html = browser.html
bs = BeautifulSoup(html, 'html.parser')

In [13]:
# Create an empty list to store hemisphere URLs
hemisphere_urls = []

# Retrieve a list of all the hemispheres
links = browser.find_by_css('a.product-item h3')

# Loop through list of hemispheres
for item in range(len(links)):
    
    # Create an empty hemisphere dictionary
    hemisphere = {}
    
    # Find hemisphere on each loop and click
    browser.find_by_css('a.product-item h3')[item].click()
    
    # Extract sample image anchor tag <href> and hemisphere title
    element = browser.find_by_text("Sample").first
    hemisphere["img_url"] = element["href"]
    hemisphere["title"] = browser.find_by_css('h2.title').text
    
    # Append urls
    hemisphere_urls.append(hemisphere)
    
    browser.back()

hemisphere_urls

[{'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]

In [111]:
browser.quit()