In [1]:
# Import Dependencies
from bs4 import BeautifulSoup as bs
import requests
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd
from pprint import pprint

In [2]:
# Create emply mars_data dictionary to append our results to
mars_data = {}

In [3]:
# Executable path + initialize browser
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)




[WDM] - Current google-chrome version is 102.0.5005
[WDM] - Get LATEST chromedriver version for 102.0.5005 google-chrome
[WDM] - Driver [/Users/graceolson/.wdm/drivers/chromedriver/mac64/102.0.5005.61/chromedriver] found in cache


## Scraping: NASA Mars News

In [4]:
# NASA Mars News URL to scrape
news_url = 'https://redplanetscience.com/'

# Visit URL
browser.visit(news_url)

In [5]:
# Create HTML and Beautiful Soup Object
news_html = browser.html
news_soup = bs(news_html, 'html.parser')

In [6]:
# Collect the latest news title
news_title = news_soup.find('div', class_ = 'content_title').text
print(news_title)

NASA's Mars 2020 Rover Goes Coast-to-Coast to Prep for Launch


In [7]:
# Collect the latest paragraph text for the news title
news_p = news_soup.find('div', class_ = 'article_teaser_body').text
print(news_p)

The agency's first step in returning rocks from Mars just arrived at Kennedy Space Center. The Mars 2020 team now begins readying for a launch to the Red Planet this July.


In [8]:
# Append results to mars_data dictionary
mars_data['news_title'] = news_title
mars_data['news_p'] = news_p
mars_data

{'news_title': "NASA's Mars 2020 Rover Goes Coast-to-Coast to Prep for Launch",
 'news_p': "The agency's first step in returning rocks from Mars just arrived at Kennedy Space Center. The Mars 2020 team now begins readying for a launch to the Red Planet this July."}

## Scraping: JPL Mars Space Images- Featured Image

In [9]:
# JPL Mars Space Images URL to scrape
image_url = 'https://spaceimages-mars.com/'

# Visit URL
browser.visit(image_url)

In [10]:
# Create HTML and Beautiful Soup Object
image_html = browser.html
image_soup = bs(image_html, 'html.parser')

In [11]:
# Find all images with the 'img' selector
all_images = image_soup.find_all('img')

In [12]:
# Display the image source for each image found in all_images
images = []
for image in all_images:
    images.append(image['src'])

print(images)

['image/nasa.png', 'image/featured/mars3.jpg', 'image/mars/Icaria Fossae7.jpg', 'image/mars/Proctor Crater Dunes 7.jpg', 'image/mars/Icaria Fossae7.jpg', 'image/mars/Proctor Crater Dunes 7.jpg', 'image/mars/Proctor Crater Dunes 7.jpg', 'image/mars/Icaria Fossae7.jpg', 'image/mars/Icaria Fossae.jpg', 'image/mars/Ariadnes Colles4.jpg', 'image/mars/Niger Vallis.jpg', 'image/mars/Proctor Crater Dunes.jpg', 'image/mars/Niger Vallis.jpg', 'image/mars/Daedalia Planum.jpg', 'image/mars/Sirenum Fossae.jpg', 'image/mars/Ariadnes Colles4.jpg', 'image/mars/South Polar Cap.jpg', 'image/mars/Daedalia Planum.jpg', 'image/mars/Ariadnes Colles3.jpg', 'image/mars/Atlantis Chaos.jpg', 'image/mars/Daedalia Planum.jpg', 'image/mars/Icaria Fossae.jpg', 'image/mars/Niger Vallis.jpg', 'image/mars/Proctor Crater Dunes.jpg', 'image/mars/Reull Vallis.jpg', 'image/mars/Ariadnes Colles3.jpg', 'image/mars/Sirenum Fossae.jpg', 'image/mars/South Polar Cap.jpg', 'image/mars/Niger Vallis.jpg', 'image/mars/Daedalia Plan

In [13]:
# Merge the base URL and the image source to display the image URL for the current Featured Mars Image
base_url = 'https://spaceimages-mars.com/'
featured_image_url = base_url + images[1]
print(featured_image_url)

https://spaceimages-mars.com/image/featured/mars3.jpg


In [14]:
# Append results to mars_data dictionary
mars_data['featured_image_url'] = featured_image_url
mars_data

{'news_title': "NASA's Mars 2020 Rover Goes Coast-to-Coast to Prep for Launch",
 'news_p': "The agency's first step in returning rocks from Mars just arrived at Kennedy Space Center. The Mars 2020 team now begins readying for a launch to the Red Planet this July.",
 'featured_image_url': 'https://spaceimages-mars.com/image/featured/mars3.jpg'}

## Scraping: Mars Facts

In [15]:
# Mars Facts URL to scrape
facts_url = 'https://galaxyfacts-mars.com/'

# Visit URL
browser.visit(facts_url)

In [16]:
# Use Pandas read_html to parse the url
tables = pd.read_html(facts_url)
tables

[                         0                1                2
 0  Mars - Earth Comparison             Mars            Earth
 1                Diameter:         6,779 km        12,742 km
 2                    Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 3                   Moons:                2                1
 4       Distance from Sun:   227,943,824 km   149,598,262 km
 5          Length of Year:   687 Earth days      365.24 days
 6             Temperature:     -87 to -5 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:          2 ( Phobos & Deimos )
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC

In [17]:
# Create dataframe for facts table
facts_df = tables[0]
facts_df

Unnamed: 0,0,1,2
0,Mars - Earth Comparison,Mars,Earth
1,Diameter:,"6,779 km","12,742 km"
2,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
3,Moons:,2,1
4,Distance from Sun:,"227,943,824 km","149,598,262 km"
5,Length of Year:,687 Earth days,365.24 days
6,Temperature:,-87 to -5 °C,-88 to 58°C


In [18]:
# Change column names and set index
facts_df.columns = ['Description', 'Mars', 'Earth']
facts_df.set_index('Description', inplace = True)
facts_df

Unnamed: 0_level_0,Mars,Earth
Description,Unnamed: 1_level_1,Unnamed: 2_level_1
Mars - Earth Comparison,Mars,Earth
Diameter:,"6,779 km","12,742 km"
Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
Moons:,2,1
Distance from Sun:,"227,943,824 km","149,598,262 km"
Length of Year:,687 Earth days,365.24 days
Temperature:,-87 to -5 °C,-88 to 58°C


In [19]:
# Convert the data to a HTML table string
facts_html_table = facts_df.to_html()
facts_html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars</th>\n      <th>Earth</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Mars - Earth Comparison</th>\n      <td>Mars</td>\n      <td>Earth</td>\n    </tr>\n    <tr>\n      <th>Diameter:</th>\n      <td>6,779 km</td>\n      <td>12,742 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg</td>\n      <td>5.97 × 10^24 kg</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2</td>\n      <td>1</td>\n    </tr>\n    <tr>\n      <th>Distance from Sun:</th>\n      <td>227,943,824 km</td>\n      <td>149,598,262 km</td>\n    </tr>\n    <tr>\n      <th>Length of Year:</th>\n      <td>687 Earth days</td>\n      <td>365.24 days</td>\n    </tr>\n    <tr>\n      <th>Temperature:</th>\n      <td>-87 to -5 °C</td>\n      <td>-88 to 58°C</td>\n    </tr>\n  </tbody>

In [20]:
# Strip unwanted newslines to clean up the table
facts_html_table.replace('\n','')

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Mars</th>      <th>Earth</th>    </tr>    <tr>      <th>Description</th>      <th></th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Mars - Earth Comparison</th>      <td>Mars</td>      <td>Earth</td>    </tr>    <tr>      <th>Diameter:</th>      <td>6,779 km</td>      <td>12,742 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.39 × 10^23 kg</td>      <td>5.97 × 10^24 kg</td>    </tr>    <tr>      <th>Moons:</th>      <td>2</td>      <td>1</td>    </tr>    <tr>      <th>Distance from Sun:</th>      <td>227,943,824 km</td>      <td>149,598,262 km</td>    </tr>    <tr>      <th>Length of Year:</th>      <td>687 Earth days</td>      <td>365.24 days</td>    </tr>    <tr>      <th>Temperature:</th>      <td>-87 to -5 °C</td>      <td>-88 to 58°C</td>    </tr>  </tbody></table>'

In [21]:
# Save the table
facts_df.to_html('mars_facts.html')

In [22]:
# Append results to mars_data dictionary
mars_data['facts_html_table'] = facts_html_table
mars_data

{'news_title': "NASA's Mars 2020 Rover Goes Coast-to-Coast to Prep for Launch",
 'news_p': "The agency's first step in returning rocks from Mars just arrived at Kennedy Space Center. The Mars 2020 team now begins readying for a launch to the Red Planet this July.",
 'featured_image_url': 'https://spaceimages-mars.com/image/featured/mars3.jpg',
 'facts_html_table': '<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars</th>\n      <th>Earth</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Mars - Earth Comparison</th>\n      <td>Mars</td>\n      <td>Earth</td>\n    </tr>\n    <tr>\n      <th>Diameter:</th>\n      <td>6,779 km</td>\n      <td>12,742 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg</td>\n      <td>5.97 × 10^24 kg</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2</td>\n      <td

## Scraping: Mars Hemispheres

In [23]:
# Mars Hemispheres URL to scrape
hemisphere_url = 'https://marshemispheres.com/'

# Visit URL
browser.visit(hemisphere_url)

In [24]:
# Create HTML and Beautiful Soup Object
hemisphere_html = browser.html
hemisphere_soup = bs(hemisphere_html, 'html.parser')

In [25]:
# Find all 'div' selectors that contain mars hemisphere information
results = hemisphere_soup.find_all('div', class_ = 'item')

In [26]:
# Create empty hemispheres data lists to append to 
hemisphere_data = []

# Base URL
base_url = 'https://marshemispheres.com/'

# Loop through the hemisphere data in 'results' and append the title and URL to the hemisphere_data list
for item in results:
    hemisphere_title = item.find('h3').text
    img_url = item.find('a', class_ = 'itemLink product-item')['href']
    
    # Go to the hemisphere page to find the full-resolution image
    browser.visit(base_url + img_url)
    
    # Create HTML and Beautiful Soup Object
    img_html = browser.html
    img_soup = bs(img_html, 'html.parser')
    
    # Full image URL
    final_img_url = base_url + img_soup.find('img', class_ = 'wide-image')['src']
    
    # Append the title and full image URL to the hemisphere_data list in a dictionary format
    hemisphere_data.append({'title': hemisphere_title, 'img_url': final_img_url })
    

In [27]:
# Print the list of hemisphere data dictionaries
hemisphere_data

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]

In [28]:
# Append results to mars_data dictionary
mars_data['hemisphere_data'] = hemisphere_data
mars_data

{'news_title': "NASA's Mars 2020 Rover Goes Coast-to-Coast to Prep for Launch",
 'news_p': "The agency's first step in returning rocks from Mars just arrived at Kennedy Space Center. The Mars 2020 team now begins readying for a launch to the Red Planet this July.",
 'featured_image_url': 'https://spaceimages-mars.com/image/featured/mars3.jpg',
 'facts_html_table': '<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars</th>\n      <th>Earth</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Mars - Earth Comparison</th>\n      <td>Mars</td>\n      <td>Earth</td>\n    </tr>\n    <tr>\n      <th>Diameter:</th>\n      <td>6,779 km</td>\n      <td>12,742 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg</td>\n      <td>5.97 × 10^24 kg</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2</td>\n      <td

In [29]:
# Close the browser
browser.quit()