In [1]:
#import dependencies
from splinter import Browser
from bs4 import BeautifulSoup
import pandas as pd
import time

In [2]:
# set path to chromedriver and set the browser to chrome
executable_path = {'executable_path': 'C:\Program Files (x86)\Chromedriver\chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
# declare a URL variable to visit the nasa news page and visit that URL.  Pause for 2 seconds to allow the page to fully load.
url = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'
browser.visit(url)
time.sleep(2)

In [4]:
#Scrape the data into BeautifulSoup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

#Find the title and text for the most recent article
title = soup.find(class_='content_title')
text = soup.find(class_='rollover_description_inner')
news_title = title.text
news_text = text.text

In [5]:
# Visit the following URL, scrape the data into BS to be discected.  
url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
browser.visit(url)

# Declare a partial URL to be used later
beginning_url = 'https://www.jpl.nasa.gov'

In [6]:
# Design an XPATH selector to grab the featured image from the page
xpath = '//a[@class="button fancybox"]'
results = browser.find_by_xpath(xpath)
img = results[0]
img.click()

In [7]:
# Scrape the browser into soup and use soup to find the full resolution image of mars
# Save the image url to a variable called `img_url`
time.sleep(2)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
img_url = soup.find("img", class_="fancybox-image")["src"]
img_url

'/spaceimages/images/mediumsize/PIA16227_ip.jpg'

In [8]:
# Use the partial URL from earlier to combine it with the other partial URL.  
featured_image_url = beginning_url + img_url
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA16227_ip.jpg'

In [9]:
# Declare our new URL as twitter, visit the page, and scrape the data into BS.
url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [10]:
# Find the most recent tweet and store it in a variable
tweet = soup.find("p", class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text")
mars_weather = tweet.text
mars_weather

'Well, maybe not tomorrow, but the Mars rover Opportunity is sleeping through a massive dust storm until the sun comes out on Mars again, read more at #wral\nhttps://www.wral.com/mars-rover-sleeping-through-massive-dust-storm/17627296/\xa0…pic.twitter.com/Ga2uY1KsGQ'

In [11]:
#Declare another new URL for mars facts and read the tables on the page using pandas.
url = 'https://space-facts.com/mars/'
tables = pd.read_html(url)
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [12]:
# Store the data in a dataframe for later
facts_df = tables[0]
facts_df.columns = ["Classification", "Fact"]
facts_df.set_index('Classification', inplace=True)
facts_df.to_html('table.html')
facts_df

Unnamed: 0_level_0,Fact
Classification,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [13]:
# Declare another URL to get enhanced images of the 4 hemispheres of mars.  Scrape the data into BS to get links to the enhanced images.
url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
browser.visit(url)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
base_url = "https://astrogeology.usgs.gov"

In [14]:
# Locate the individual URL for each hemisphere and store them in the img_url list to be iterated through.
hemispheres = soup.find_all("div", class_="item")
img_url = []
for h in hemispheres:
    img_url.append(h.a["href"])

In [15]:
# loop through the img_url list, go to each link, scrape the data into soup, and locate the title and hyperlink for each 
# enhanced image.  Then store the information as a dictionary.

hemisphere_image_urls = []

for hemi in img_url:
    url = base_url + hemi
    browser.visit(url)
    time.sleep(2)
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    links = soup.find_all(class_="downloads")
    link = links[0].ul
    interum_links = []
    header = soup.find(class_="title")
    hemi_title = header.text
    print(hemi_title)
    for x in link:
        interum_links.append(x)
    full_link = interum_links[1].a["href"]
    print(full_link)
    print("-"*40)
    hemisphere_image_urls.append({"title": hemi_title, "img_url": full_link})

<div class="content_title"><a href="/news/8350/nasa-encounters-the-perfect-storm-for-science/" target="_self">NASA Encounters the Perfect Storm for Science</a></div>
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg
----------------------------------------
Cerberus Hemisphere Enhanced
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg
----------------------------------------
Schiaparelli Hemisphere Enhanced
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg
----------------------------------------
Syrtis Major Hemisphere Enhanced
http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg
----------------------------------------


In [16]:
combined_dictionary = {"news_title": news_title,
                      "news_text": news_text,
                      "featured_image_url": featured_image_url,
                      "mars_weather": mars_weather,
                      "hemisphere_image_urls": hemisphere_image_urls,
                      "facts_df": facts_df}

In [17]:
combined_dictionary

{'facts_df':                                                Fact
 Classification                                     
 Equatorial Diameter:                       6,792 km
 Polar Diameter:                            6,752 km
 Mass:                 6.42 x 10^23 kg (10.7% Earth)
 Moons:                          2 (Phobos & Deimos)
 Orbit Distance:            227,943,824 km (1.52 AU)
 Orbit Period:                  687 days (1.9 years)
 Surface Temperature:                  -153 to 20 °C
 First Record:                     2nd millennium BC
 Recorded By:                   Egyptian astronomers,
 'featured_image_url': 'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA16227_ip.jpg',
 'hemisphere_image_urls': [{'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
   'title': 'Cerberus Hemisphere Enhanced'},
  {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
   'title': 'Sc