In [4]:
# Dependencies
import os
from bs4 import BeautifulSoup as bs
import requests
from splinter import Browser
import pandas as pd
import re


In [6]:
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False) 


In [25]:
# URL of page to be scraped
url = 'https://mars.nasa.gov/news/'
browser.visit(url)
html = browser.html

In [26]:
# Create BeautifulSoup object; parse with 'html.parser'
soup = bs(html, 'html.parser')

In [27]:
# Examine the results, then determine element that contains sought info
#print(soup.prettify())

In [28]:
# Scrape the NASA Mars News Site and collect the latest News Title and Paragraph Text.
news_title = soup.find('div', class_='content_title').text
news_p = soup.find('div', class_='article_teaser_body').text

print("News Title:"+ news_title)
print("Paragraph:"+ news_p)


News Title:For InSight, Dust Cleanings Will Yield New Science
Paragraph:Wind can be crucial to clearing dust from spacecraft solar panels on Mars. With InSight's meteorological sensors, scientists get their first measurements of wind and dust interacting "live" on the Martian surface.  


In [29]:
# URL of page to be scraped
featured_image_url= 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(featured_image_url)
image_html = browser.html

In [30]:
# Create BeautifulSoup object; parse with 'html.parser'
soup = bs(image_html, 'html.parser')

In [31]:
#print(soup.prettify())

In [32]:
# Navigate the site and find the image url for the current Featured Mars Image 
image_article = soup.find_all('a', class_ = 'button fancybox')
image_url = image_article[0]['data-fancybox-href']
home_url = 'https://www.jpl.nasa.gov'  
featured_image_url = home_url + image_url

print('Featured Image URL: '+ featured_image_url)

Featured Image URL: https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA16092_ip.jpg


In [33]:
# URL of page to be scraped
twitter_url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(twitter_url)
twitter_html = browser.html

In [34]:
# Create BeautifulSoup object; parse with 'html.parser'
soup = bs(twitter_html, 'html.parser')

In [35]:
#print(soup.prettify())

In [36]:
# Scrape the latest Mars weather tweet from the page
mars_weather = soup.find('div',class_="js-tweet-text-container").text
mars_weather = re.sub(r"pic.twitter.com\S+","", mars_weather)
print("Current weather on Mars is: "+mars_weather)

Current weather on Mars is: 
InSight sol 156 (2019-05-05) low -99.2ºC (-146.6ºF) high -18.1ºC (-0.6ºF)
winds from the SW at 4.7 m/s (10.5 mph) gusting to 13.8 m/s (30.8 mph)
pressure at 7.40 hPa



In [45]:
# URL of page to be scraped
space_facts_url = 'https://space-facts.com/mars/'

In [46]:
# Use Pandas to scrape the table containing facts about the planet 
mars_facts = pd.read_html(space_facts_url)
mars_facts[0]

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [65]:
mars_df = mars_facts[0]
#Rename dataframe columns
mars_df.columns = ['Mars Facts', 'Details']

mars_df

Unnamed: 0,Mars Facts,Details
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [48]:
# Visit the USGS Astrogeology site 
astrogeology_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(astrogeology_url)
astrogeology_html = browser.html

In [41]:
# Create BeautifulSoup object; parse with 'html.parser'
soup = bs(astrogeology_html, 'html.parser')

In [42]:
#print(soup.prettify())

In [43]:
# Obtain high resolution images for each of Mar's hemispheres.
description = soup.find_all('div', class_ = 'description')

hem_title= []

for title in description:
    main_url = 'https://astrogeology.usgs.gov' + title.find('a')['href']
    browser.visit(main_url)
    title_html = browser.html
    soup1 = bs(title_html, 'html.parser')
    images = 'https://astrogeology.usgs.gov' + soup1.find_all('img', class_='wide-image')[0]['src']
    imgDict = {}
    imgDict['title'] = title.find('h3').text.strip()
    imgDict['img_url'] = images
    hem_title.append(imgDict)

hem_title

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]