In [1]:
# Dependencies
from bs4 import BeautifulSoup as bs
from splinter import Browser
import pandas as pd

In [2]:
# Path to driver
executable_path = {'executable_path': 'C:/Users/ama29/Desktop/Work/chromedriver_win32/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

**NASA Mars News Site - Scrape latest news title and paragraph text**

In [3]:
# Visit URL of page to be scraped
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

In [4]:
site_html = browser.html
# Create BeautifulSoup object; parse with 'lxml'
soup = bs(site_html, 'lxml')

In [5]:
# Collect the latest News Title
news_title = soup.find_all('div', class_='content_title')[1].text
print(news_title)

NASA Engineers Checking InSight's Weather Sensors


In [6]:
# Collect the latest Paragraph Text
news_p = soup.find('div', class_='article_teaser_body').text
news_p

'An electronics issue is suspected to be preventing the sensors from sharing their data about Mars weather with the spacecraft.'

**JPL Mars Space Images - Featured Image**

In [7]:
# Visit URL of page to be scraped 
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [8]:
# Get full image
# Click on "FULL IMAGE" button
browser.links.find_by_partial_text('FULL IMAGE').first.click()
#browser.find_by_id('full_image').first.click()

In [9]:
# "More info" button
browser.links.find_by_partial_text('more info').first.click()

In [10]:
image_html = browser.html
# Create BeautifulSoup object; parse with 'html.parser'
soup = bs(image_html, 'html.parser')

In [11]:
# Find the partial url to the full size '.jpg' image
image_url_part = soup.find('img', class_='main_image')['src']
# Save complete url string for this image
featured_image_url = 'https://www.jpl.nasa.gov' + image_url_part
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA17200_hires.jpg'

**Mars Facts webpage - Planet facts**

In [12]:
# URL of page to be scraped: https://space-facts.com/mars/

# Hold "Mars Planet Profile" data into a pandas dataframe
mars_facts_df = pd.read_html('https://space-facts.com/mars/')[0]
# Change column headings
mars_facts_df.columns=["Description", "Values"]
mars_facts_df

Unnamed: 0,Description,Values
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [13]:
# Use Pandas to convert the data to a HTML table string
facts_html = mars_facts_df.to_html(index=False)
facts_html

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Description</th>\n      <th>Values</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>Surface Temperature:</td>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>7</th>\n      <td>First Record:</td>\n      <td>2nd millennium

**USGS Astrogeology site - Mars Hemispheres**

In [14]:
# Visit URL of page to be scraped 
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)

In [15]:
hemi_html = browser.html
# Create BeautifulSoup object; parse with 'html.parser'
soup = bs(hemi_html, 'html.parser')

In [17]:
# Create a list to hold dictionary with image url string and title for each hemisphere
hemisphere_image_urls = []

# Parse info for each hemisphere
results = soup.find('div', class_='result-list')
hemis = results.find_all('div', class_='item')

# For loop to form and save links to each full resolution image
for hemi in hemis:
    hemi_title = hemi.find('h3').text
    hemi_title = hemi_title.replace(' Enhanced',"") # Remove "Enhanced" from name
    link_part = hemi.find('a')['href']
    small_hemi_url = 'https://astrogeology.usgs.gov/' + link_part
    browser.visit(small_hemi_url) # Visit newly formed url
    full_reso_html = browser.html 
    hemi_soup = bs(full_reso_html, 'html.parser') 
    hemi_url_div = hemi_soup.find('div', class_='downloads')
    hemi_url = hemi_url_div.find('a')['href']
    hemisphere_image_urls.append({'title': hemi_title, 'img_url': hemi_url}) # Add dictionary to list

hemisphere_image_urls

[{'title': 'Cerberus Hemisphere',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]