In [2]:
# Dependencies
from splinter import Browser
from bs4 import BeautifulSoup
import pandas as pd
import time
from selenium.webdriver.chrome.options import Options  


In [3]:
# Chromedriver
options = Options()
options.add_argument('--no-sandbox') # Bypass OS security model

executable_path = {'executable_path': '../chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False, chrome_options=options)

In [4]:
# Navigate to the NASA Mars News Site
url = 'https://mars.nasa.gov/news/'
browser.visit(url)

In [5]:
# HTML object
html = browser.html
# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')

In [6]:
# Find the first headline title and assign to a variable
nasa_header = soup.find('li', class_='slide')
news_title = nasa_header.find('div', class_='content_title').text
news_title

"NASA's Perseverance Rover Is Midway to Mars "

In [7]:
# Find the first headline paragraph and assign to a variable
news_p = nasa_header.find('div', class_='article_teaser_body').text
news_p

"Sometimes half measures can be a good thing – especially on a journey this long. The agency's latest rover only has about 146 million miles left to reach its destination."

In [8]:
# Navigate to the JPL Images Site and click to get the full image
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
base_url = 'https://www.jpl.nasa.gov'
browser.visit(url)
browser.find_by_id('full_image').click()
browser.links.find_by_partial_text('more info').click()
time.sleep(1)

# HTML object
html = browser.html
# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')

# Assign image path to variable
nasa_photo = soup.find('img', class_='main_image')['src']
base_url + nasa_photo


'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA18904_hires.jpg'

In [9]:
# Use Pandas to scrape Mars facts data
tables = pd.read_html('https://space-facts.com/mars/')

# Convert 1st table to dataframe
mars_df = tables[0]

# Name columns and reset index
mars_df.columns=['Description', 'Value']
mars_df.set_index("Description", inplace=True)
mars_df

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [10]:
# Convert mars_df to html table
mars_df.to_html('mars_facts_table.html')

In [11]:
# Navigate to the astrogeology site to get hemisphere images
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)

# HTML object
html = browser.html

# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')

# Retreive all items that contain mars hemispheres information
items = soup.find_all('div', class_='item')

# Create empty list for hemisphere urls 
hemisphere_image_urls = []

# Assign the main url to variable
hemispheres_url = 'https://astrogeology.usgs.gov'

# Loop through items
for i in items: 
    # Store the hemisphere title
    title = i.find('h3').text
    
    # Assign the link to the full image to a variable
    img_url = i.find('a', class_='itemLink product-item')['href']
    
    # Navigate to the link by combining the two urls
    browser.visit(hemispheres_url + img_url)
    
    # HTML Object 
    img_html = browser.html
    
    # Parse HTML with Beautiful Soup for every individual hemisphere information website 
    soup = BeautifulSoup(img_html, 'html.parser')
    
    # Assign the  full image source to a variable 
    full_img_url = hemispheres_url + soup.find('img', class_='wide-image')['src']
    
    # Append the retreived information into a list of dictionaries 
    hemisphere_image_urls.append({"title" : title, "img_url" : full_img_url})
    

# Display hemisphere_image_urls
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]