In [1]:
# Dependencies
from bs4 import BeautifulSoup as bs
from splinter import Browser
import requests
import pandas as pd

# Scrape mars.nasa.gov for most recent article info

In [213]:
# Scrape the url provided
url = 'https://mars.nasa.gov/news/'
response = requests.get(url)

# Parse the webpage's html
soup = bs(response.text, 'html.parser')

# Used print(soup.prettify()) to observe html

# Extract Title and Description of first article from html
news_title = soup.find(class_='content_title').text.strip()
news_p = soup.find(class_='rollover_description_inner').text.strip()

# Print Results
print(f'Title: "{news_title}"')
print(f'Description: "{news_p}"')

Title: "Mars Helicopter Attached to NASA's Perseverance Rover"
Description: "The team also fueled the rover's sky crane to get ready for this summer's history-making launch."


# Scrape jpl.nasa.gov images for featured Mars image

In [138]:
# Check location of chromedriver
!which chromedriver

/usr/local/bin/chromedriver


In [139]:
# Initiate Splinter and open browser at desired url
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [140]:
# *Must run previous cell for this one to work - browser must be open*
# Parse the webpage's html
html = browser.html
soup = bs(html, 'html.parser')

# Identify the footer of the html and the elements within it
image_url = soup.footer.a.get('data-fancybox-href')

featured_image_url = 'https://www.jpl.nasa.gov' + image_url
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA18432_ip.jpg


# Scrape twitter.com for latest Mars weather tweet

In [4]:
# Scrape the url provided
url = 'https://twitter.com/marswxreport?lang=en'
response = requests.get(url)

# Parse the webpage's html
soup = bs(response.text, 'html.parser')

# Used print(soup.prettify()) to observe html

# Identify necessary html to pull data from and remove unnecessary ending text
mars_weather = soup.find(class_='js-tweet-text-container').p.text.strip()\
.replace(' hPapic.twitter.com/lSCXda8hgu','')

print(mars_weather)

InSight sol 485 (2020-04-07) low -93.4ºC (-136.1ºF) high -7.0ºC (19.4ºF)
winds from the WNW at 5.2 m/s (11.5 mph) gusting to 17.4 m/s (38.9 mph)
pressure at 6.50


# Scrape space-facts.com for Mars facts table

In [6]:
# Scrape the url provided
url = 'https://space-facts.com/mars/'
response = requests.get(url)

# Parse the webpage's html
soup = bs(response.text, 'html.parser')

# Use Panda's `read_html` to parse the url
table = pd.read_html(url)

# After observing all tables resulting from above call,
# we found that the third table was the one we wanted (table[2])
table = table[2]

# Reset Index and delete extra row it creates, then rename columns, and display
table.set_index(table[0], inplace=True)
del table[0]
table.index.name = ''
table.columns = ['Value']
display(table)

# Convert table to HTML
html_table = table.to_html()
print(html_table)

Unnamed: 0,Value
,
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>Value</th>
    </tr>
    <tr>
      <th></th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Equatorial Diameter:</th>
      <td>6,792 km</td>
    </tr>
    <tr>
      <th>Polar Diameter:</th>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>Mass:</th>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <th>Moons:</th>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <th>Orbit Distance:</th>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <th>Orbit Period:</th>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>Surface Temperature:</th>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <th>First Record:</th>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>Recorded By:</th>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>


# Scrape astrogeology.us.gov for Mars photos

In [8]:
# Scrape the url provided
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
response = requests.get(url)

# Parse the webpage's html
soup = bs(response.text, 'html.parser')

# Scrape for element for each link to Mars images
links = soup.find_all(class_='itemLink product-item')

links_list = []

for link in links:
    img = 'https://astrogeology.usgs.gov/' + link.get('href')
    links_list.append(img)
    
# Scrape links found above for titles and links to final images
hrefs = []
titles = []

for link in links_list:
    response = requests.get(link)
    soup = bs(response.text, 'html.parser')
    
    image = soup.find('a', href=True, text='Original')
    href = image['href']
    
    # Titles were stored as, for example, 'Cerberus Hemisphere Enhanced' so once
    # we scraped them, we removed the last part with the .replace() function
    title = soup.find(class_='title').text.strip().replace(' Enhanced','')
    
    titles.append(title)
    hrefs.append(href)

In [10]:
# Create list of dictionaries containing titles and links to each related full res image

hemisphere_image_urls = []

for i in range(4):
    hem = {'title': titles[i], 'img_url': hrefs[i]}
    hemisphere_image_urls.append(hem)

display(hemisphere_image_urls)

[{'title': 'Cerberus Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif'},
 {'title': 'Schiaparelli Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif'},
 {'title': 'Syrtis Major Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif'},
 {'title': 'Valles Marineris Hemisphere',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif'}]