In [1]:
import pandas as pd
import requests
from bs4 import BeautifulSoup
import os
from selenium import webdriver

# Retrieve latest headline and abstract

In [2]:
# Import Splinter and set the chromedriver path
from splinter import Browser
executable_path = {"executable_path": "C:/chromedriver"}
browser = Browser("chrome", **executable_path, headless=False)

In [3]:
# Visit the Mars - Nasa URL
url = "https://mars.nasa.gov/news/"
browser.visit(url)

In [4]:
# Scrape the browser into soup and use soup to find the latest news title and paragraph text
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [5]:
# find headlines and articles inside item, inside slide elements
element = soup.select_one("ul.item_list li.slide")
headline = element.find("div",class_="content_title").get_text()
article = element.find("div", class_="article_teaser_body").get_text()
print(f'Latest headline: {headline}\nArticle: {article}')

Latest headline: Testing Proves Its Worth With Successful Mars Parachute Deployment
Article: The giant canopy that helped land Perseverance on Mars was tested here on Earth at NASA’s Wallops Flight Facility in Virginia.


In [6]:
browser.quit()

# Retrieve Mars featured image

In [7]:
#Set the chromedriver path and visit the JPL URL
executable_path = {"executable_path": "C:/chromedriver"}
browser = Browser("chrome", **executable_path, headless=False)
url ="https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/index.html" 
browser.visit(url)

In [8]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [9]:
# find class
soup.find("img", class_="headerimage fade-in").get("src")

'image/featured/mars3.jpg'

In [10]:
#browser.find_by_xpath(xpath)
img_url = browser.find_by_xpath('//img[@class="headerimage fade-in"]')['src']
img_url

'https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/image/featured/mars3.jpg'

In [11]:
# Use the requests library to download and save the image from the `img_url` above
import requests
import shutil
response = requests.get(img_url, stream=True)
with open('feature_image.jpg', 'wb') as out_file:
    shutil.copyfileobj(response.raw, out_file)
    
# Display the image with IPython.display
from IPython.display import Image

# Retrieve Mars Facts

In [12]:
#Use pandas to scrape the table containing facts about Mars
url = 'https://space-facts.com/mars/'

In [13]:
tables = pd.read_html(url)
table = tables[2]
table.columns = ['description', 'fact']
table

Unnamed: 0,description,fact
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [14]:
html_table = table.to_html()
print(html_table)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>description</th>
      <th>fact</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>0</th>
      <td>Equatorial Diameter:</td>
      <td>6,792 km</td>
    </tr>
    <tr>
      <th>1</th>
      <td>Polar Diameter:</td>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>2</th>
      <td>Mass:</td>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <th>3</th>
      <td>Moons:</td>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <th>4</th>
      <td>Orbit Distance:</td>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <th>5</th>
      <td>Orbit Period:</td>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>6</th>
      <td>Surface Temperature:</td>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <th>7</th>
      <td>First Record:</td>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>8</th>
     

# Retrieve Mars Hemisphere Images

In [15]:
# Get high resolution images for each of Mars' hemispheres from 
executable_path = {"executable_path": "C:/chromedriver"}
browser = Browser("chrome", **executable_path, headless=False)
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
base_url = "https://astrogeology.usgs.gov"
browser.visit(url)

In [16]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [17]:
hemisphere_image_urls = []

links = soup.find_all("div", class_="item")

for link in links:
    img_dict = {}
    title = link.find("h3").text
    next_link = link.find("div", class_="description").a["href"]
    full_next_link = base_url + next_link
    
    browser.visit(full_next_link)
    
    pic_html = browser.html
    pic_soup = BeautifulSoup(pic_html, 'html.parser')
    
    url = pic_soup.find("img", class_="wide-image")["src"]

    img_dict["title"] = title
    img_dict["img_url"] = base_url + url
    print(img_dict["img_url"])
    
    hemisphere_image_urls.append(img_dict)

https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg
https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg
https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg
https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg


In [18]:
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]

In [20]:
browser.quit()