In [1]:
# Import dependencies
import os
import pandas as pd
from bs4 import BeautifulSoup as bs
from splinter import Browser
import requests

In [2]:
# URL of page to be scraped
url = 'http://mars.nasa.gov/news'

In [3]:
# Retrieve page with the requests module
response = requests.get(url)

In [4]:
# Create BeautifulSoup object
soup = bs(response.text, 'lxml')
type(soup)

bs4.BeautifulSoup

In [5]:
# Examine the results
# print(soup.prettify())

# Collect Latest News Title and Paragraph Text

In [6]:
# Scrape NASA Mars News and collect the latest News Title and Paragraph Text.
# Assign the text to variables that you can reference later.

news_titles = soup.find_all('div', class_='content_title')

nasa_titles = []

for title in news_titles:
    nasa_titles.append(title.text.strip())
print(nasa_titles)

['NASA to Broadcast Mars 2020 Perseverance Launch, Prelaunch Activities', "The Launch Is Approaching for NASA's Next Mars Rover, Perseverance", 'NASA to Hold Mars 2020 Perseverance Rover Launch Briefing', "Alabama High School Student Names NASA's Mars Helicopter", "Mars Helicopter Attached to NASA's Perseverance Rover", "NASA's Perseverance Mars Rover Gets Its Wheels and Air Brakes"]


In [7]:
# View title text from list above
# Comment this out - not needed

#for title in news_titles:
#    print(title.text)

In [8]:
# Paragraph Text

paragraphs = soup.find_all('div', class_='rollover_description_inner')
nasa_paragraphs = []
for paragraph in paragraphs:
    nasa_paragraphs.append(paragraph.text.strip())
print(nasa_paragraphs)

['Starting July 27, news activities will cover everything from mission engineering and science to returning samples from Mars to, of course, the launch itself.', "The Red Planet's surface has been visited by eight NASA spacecraft. The ninth will be the first that includes a roundtrip ticket in its flight plan.", "Learn more about the agency's next Red Planet mission during a live event on June 17.", "Vaneeza Rupani's essay was chosen as the name for the small spacecraft, which will mark NASA's first attempt at powered flight on another planet.", "The team also fueled the rover's sky crane to get ready for this summer's history-making launch.", 'After the rover was shipped from JPL to Kennedy Space Center, the team is getting closer to finalizing the spacecraft for launch later this summer.']


In [9]:
# View paragraph text from list above
# Comment this out - not needed

#for paragraph in paragraphs:
#   print(paragraph.text)

# JPL Mars Space Images - Featured Image

In [10]:
# Splinter
# JPL Mars Space Images - Featured Image

executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

image_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(image_url)


# HTML Object
html = browser.html
soup = bs(html, 'lxml')

images = soup.find('footer')
link = images.find('a')['data-fancybox-href']
featured_image_url = image_url + link

featured_image_url

'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars/spaceimages/images/mediumsize/PIA16469_ip.jpg'

# Mars Facts

In [11]:
# Use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.

mars_facts_url = 'https://space-facts.com/mars/'

mars_facts = pd.read_html(mars_facts_url)
mars_facts

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers,
   Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:     -87 to -5 °C      -88 to 58°C,
           

In [12]:
mars_df = mars_facts[0]
mars_df

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [13]:
# Mars Facts - Rename the Columns
mars_df.rename(columns={0: "Mars", 1: "Mars Facts"})

Unnamed: 0,Mars,Mars Facts
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [14]:
# Save the Mars Facts Table as a DataFrame
mars_facts_html = mars_df.to_html

In [15]:
# Save Facts output as an HTML for later use
mars_df.to_html("resources/marsfacts.html")

# Mars Hemispheres: Titles and URL Links

In [16]:
# OPEN BROWSER FOR MARS HEMISPHERE TITLES

executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

titles = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(titles)

titles_html = browser.html
titles_soup = bs(titles_html, 'lxml')

In [17]:
# MARS HEMISPHERE TITLES ONLY - list type
# Comment this section out

# mars_hemi_titles = titles_soup.find_all('h3')
# hemisphere_mars_title =[]

# for x in mars_hemi_titles:
#    hemisphere_mars_title.append(x)
# print(hemisphere_mars_title)  

In [18]:
# MARS HEMISPHERE TITLES ONLY
mars_hemi_titles = titles_soup.find_all('h3')
for x in mars_hemi_titles:
    print(x.text)

Cerberus Hemisphere Enhanced
Schiaparelli Hemisphere Enhanced
Syrtis Major Hemisphere Enhanced
Valles Marineris Hemisphere Enhanced


In [19]:
# MARS HEMISPHERE LINKS
# MARS CERBERUS Hemispheres
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

cerberus_link = 'https://astrogeology.usgs.gov/search/map/Mars/Viking/cerberus_enhanced'
browser.visit(cerberus_link)

cerberus_html = browser.html
cerberus_soup = bs(cerberus_html, 'lxml')
cerberus = cerberus_soup.find('li').a['href']
print(cerberus)

https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg


In [20]:
# MARS SCHIAPARELLI Hemispheres
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

schiaparelli_link = 'https://astrogeology.usgs.gov/search/map/Mars/Viking/schiaparelli_enhanced'
browser.visit(schiaparelli_link)

schiaparelli_html = browser.html
schiaparelli_soup = bs(schiaparelli_html, 'lxml')
schiaparelli = schiaparelli_soup.find('li').a['href']
print(schiaparelli)

https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg


In [21]:
# MARS SYRTIS MAJOR Hemispheres
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

syrtis_major_link = 'https://astrogeology.usgs.gov/search/map/Mars/Viking/syrtis_major_enhanced'
browser.visit(syrtis_major_link)

syrtis_major_html = browser.html
syrtis_major_soup = bs(syrtis_major_html, 'lxml')
syrtis_major = syrtis_major_soup.find('li').a['href']
print(syrtis_major)

https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg


In [22]:
# MARS valles_marineris Hemispheres
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

valles_marineris_link = 'https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced'
browser.visit(valles_marineris_link)

valles_marineris_html = browser.html
valles_marineris_soup = bs(valles_marineris_html, 'lxml')
valles_marineris = valles_marineris_soup.find('li').a['href']
print(valles_marineris)

https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg


In [23]:
# MARS HEMISPHERE LIST of DICTIONARY - TITLES AND URL LINKS
# Create a list of dictionaries based on the scrapes above
mars_dict = [
             {"title": "Cerberus Hemisphere Enhanced", "img_url": cerberus},
             {"title": "Schiaparelli Hemisphere Enhanced", "img_url": schiaparelli},
             {"title": "Syrtis Major Hemisphere Enhanced", "img_url": syrtis_major},
             {"title": "Valles Marineris Hemisphere Enhanced", "img_url": valles_marineris}
            ]
mars_dict

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]