In [1]:
# Dependencies
import requests
import pandas as pd
from bs4 import BeautifulSoup as bs
from splinter import Browser
from splinter.exceptions import ElementDoesNotExist
from pprint import pprint

In [2]:
# NASA Mars News
# URL of page to be scraped
url_news = "https://mars.nasa.gov/news"

# Retrieve page with the requests module
response_news = requests.get(url_news)

# Create BeautifulSoup object; parse with 'html5lib'
soup_news = bs(response_news.text, "html5lib")


In [3]:
# Scrape the NASA Mars News Site and collect the latest News Title
news_title = soup_news.find("div", class_="content_title").text.strip()
news_title

"The Launch Is Approaching for NASA's Next Mars Rover, Perseverance"

In [4]:
# Scrape the NASA Mars News Site and collect Paragraph Text
news_p = soup_news.find("div", class_="rollover_description_inner").text.strip()
news_p

"The Red Planet's surface has been visited by eight NASA spacecraft. The ninth will be the first that includes a roundtrip ticket in its flight plan."

In [5]:
# JPL Mars Space Images
# URL of page to be scraped
url_img = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"

# Visit the url for JPL Featured Space Image
browser_img = Browser("firefox", headless=False)
browser_img.visit(url_img)

In [6]:
# HTML object
html_img = browser_img.html

# Parse HTML with Beautiful Soup
soup_image = bs(html_img, "lxml")

In [7]:
# Find the image url for the current Featured Mars Image
image_url = soup_image.find("li", class_="slide").find('a', class_="fancybox")['data-fancybox-href']

# Assign the url string to a variable called featured_image_url
featured_image_url = "https://www.jpl.nasa.gov/"+image_url
featured_image_url

'https://www.jpl.nasa.gov//spaceimages/images/largesize/PIA23960_hires.jpg'

In [8]:
# Mars Weather
# URL of page to be scraped
url_weather = "https://www.twitter.com/marswxreport"

# Visit the Mars Weather twitter account
browser_weather = Browser("firefox", headless=False)
browser_weather.visit(url_weather)

In [9]:
# HTML object
html_weather = browser_weather.html

# Parse HTML with Beautiful Soup
soup_weather = bs(html_weather, "html5lib")

In [10]:
# Scrape the latest Mars weather tweet from the page. Save the tweet text for the weather report as a variable called mars_weather
article_weather = soup_weather.find_all("article", role="article")
mars_weather = ""

# Loop through all posts to find "span", then look through all "span" to find text that start with "InSight", since not all posts are related to the weather on Mars
for article in article_weather:
    text_weather = article.find_all("span")
    for wea in text_weather:
        if wea.text.startswith("InSight"):
            mars_weather=wea.text
            print(mars_weather)
            break
    if mars_weather:
        break

InSight sol 566 (2020-06-29) low -88.4ºC (-127.1ºF) high -4.2ºC (24.4ºF)
winds from the WNW at 5.1 m/s (11.5 mph) gusting to 16.8 m/s (37.6 mph)
pressure at 7.70 hPa


In [32]:
# Mars Facts
# URL of page to be scraped
url_facts = "https://space-facts.com/mars/"

# Use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.
tables = pd.read_html(url_facts)
mars_table = tables[0]
mars_table = mars_table.rename(columns={0:"", 1:"value"}).set_index("")

# Use Pandas to convert the data to a HTML table string
html_mars_table = mars_table.to_html(classes="mars_table").replace("\n", "").replace("text-align: right", "text-align: left")
html_mars_table

'<table border="1" class="dataframe mars_table">  <thead>    <tr style="text-align: left;">      <th></th>      <th>value</th>    </tr>    <tr>      <th></th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.39 × 10^23 kg (0.11 Earths)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.38 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-87 to -5 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

In [14]:
# Mars Hemispheres
# Visit the USGS Astrogeology site 
url_main = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

# Retrieve page with the requests module
response_main = requests.get(url_main)

# Parse HTML with Beautiful Soup
soup_main = bs(response_main.text, "html5lib")

# Find all products in results
url_all = soup_main.find_all("div", class_="item")


# Create an empty list
url_list=[]

# List all urls for each links to the hemispheres
for sub_url in url_all:
    new_url = sub_url.find("a", class_="itemLink")["href"]
    new_url = "https://astrogeology.usgs.gov/"+new_url
    url_list.append(new_url)
url_list

['https://astrogeology.usgs.gov//search/map/Mars/Viking/cerberus_enhanced',
 'https://astrogeology.usgs.gov//search/map/Mars/Viking/schiaparelli_enhanced',
 'https://astrogeology.usgs.gov//search/map/Mars/Viking/syrtis_major_enhanced',
 'https://astrogeology.usgs.gov//search/map/Mars/Viking/valles_marineris_enhanced']

In [15]:
# Create an empty list and dictionary
hemisphere_image_urls = []
hem_dict = {}

# Loop through all urls to save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name
for hem_url in url_list:
    hem_response = requests.get(hem_url)
    hem_soup = bs(hem_response.text, "html5lib")
    hem_img = hem_soup.find("img", class_="wide-image")["src"]
    hem_img = "https://astrogeology.usgs.gov/"+hem_img
    hem_title = hem_soup.find("h2", class_="title").text
    hem_dict = {"title": hem_title, "img_url": hem_img}
    hemisphere_image_urls.append(hem_dict)
hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov//cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov//cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov//cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov//cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]

In [33]:
# Create a dictionary that includes all the information we scraped
mars_dic = {
    "news_title": news_title,
    "news_p": news_p,
    "featured_image_url": featured_image_url,
    "mars_weather": mars_weather,
    "html_mars_table": html_mars_table,
    "hemisphere_image_urls": hemisphere_image_urls
}
mars_dic

{'news_title': "The Launch Is Approaching for NASA's Next Mars Rover, Perseverance",
 'news_p': "The Red Planet's surface has been visited by eight NASA spacecraft. The ninth will be the first that includes a roundtrip ticket in its flight plan.",
 'featured_image_url': 'https://www.jpl.nasa.gov//spaceimages/images/largesize/PIA23960_hires.jpg',
 'mars_weather': 'InSight sol 566 (2020-06-29) low -88.4ºC (-127.1ºF) high -4.2ºC (24.4ºF)\nwinds from the WNW at 5.1 m/s (11.5 mph) gusting to 16.8 m/s (37.6 mph)\npressure at 7.70 hPa',
 'html_mars_table': '<table border="1" class="dataframe mars_table">  <thead>    <tr style="text-align: left;">      <th></th>      <th>value</th>    </tr>    <tr>      <th></th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.39 × 10^23 kg (0.11 Earths)</td>    </tr>    <tr>      <th