In [157]:
# Import Dependencies
! pip install splinter

import pandas as pd

from splinter import Browser
from bs4 import BeautifulSoup
import requests

import pprint



In [46]:
## Step 1A:  Scrape latest news title and paragraph text from NASA Mars News Site (Beautiful Soup)

# Set URL and retrieve page
url = "http://mars.nasa.gov/news/"
response = requests.get(url)

# Create Soup object and examine results
soup = BeautifulSoup(response.text, "html.parser")
# print(soup.prettify())

#Scrape content
news_title = soup.body.find("div", class_="content_title").a.text.strip()
news_para = soup.body.find("div", class_="rollover_description_inner").text.strip()

print(news_title)
print(news_para)

NASA Readies Perseverance Mars Rover's Earthly Twin
Did you know NASA's next Mars rover has a nearly identical sibling on Earth for testing? Even better, it's about to roll for the first time through a replica Martian landscape.


In [113]:
## Step 1B:  Find featured image (Splinter)

# set path for chromedriver
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

# Set URL and retrieve page
url1 = "https://www.jpl.nasa.gov/spaceimages/"
browser.visit(url1)

# Navigate to large image
browser.links.find_by_partial_text("FULL IMAGE").click()
browser.links.find_by_partial_text("more info").click()
browser.find_by_tag("figure.lede").first.click()

image_url = browser.url
print(image_url)



https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA17200_hires.jpg


In [48]:
## Step 1C:  Scrape facts about Mars (Pandas)

# Set URL and retrieve page
url2 = "http://space-facts.com/mars/"
response = requests.get(url)

mars_tables = pd.read_html(url2)
mars_tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers,
   Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:     -87 to -5 °C      -88 to 58°C,
           

In [49]:
##Step 1C (cont): Separate tables and clean -- mars_facts
mars_facts = mars_tables[0]
mars_facts.columns = ["Planet Profile", "Planet Data"]
mars_facts.style.hide_index()

Planet Profile,Planet Data
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [50]:
##Step 1C (cont): Separate tables and clean -- mars_earth
mars_earth = mars_tables[1]
mars_earth.columns = ["Mars-Earth Comparison", "Mars", "Earth"]
mars_earth.style.hide_index()

Mars-Earth Comparison,Mars,Earth
Diameter:,"6,779 km","12,742 km"
Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
Moons:,2,1
Distance from Sun:,"227,943,824 km","149,598,262 km"
Length of Year:,687 Earth days,365.24 days
Temperature:,-87 to -5 °C,-88 to 58°C


In [51]:
## Step 1C (cont): Convert dat to HTML strings
facts_html = mars_facts.to_html()
compare_html = mars_earth.to_html()

In [155]:
## Step 1D:  Obtain high resolution images for each of Mars' hemispheres

# set path for chromedriver
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

# Set URL, visit page, inspect HTML code
url3 = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
response = requests.get(url3)
browser.visit(url3)

# Create Soup object, grab each hemisphere division  
soup = BeautifulSoup(response.text, "html.parser")
images = soup.find_all("div", class_="item")

# Print and review image 
print(len(images))
print(images[0])


4 <div class="item"><a class="itemLink product-item" href="/search/map/Mars/Viking/cerberus_enhanced"><img alt="Cerberus Hemisphere Enhanced thumbnail" class="thumb" src="/cache/images/39d3266553462198bd2fbc4d18fbed17_cerberus_enhanced.tif_thumb.png"/><div class="description"><h3>Cerberus Hemisphere Enhanced</h3></div></a><span class="subtitle" style="float:left">image/tiff 21 MB</span><span class="pubDate" style="float:right"></span><br/><p>Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired…</p></div>


In [162]:
# Loop through each soup object ("images"); use splinter to navigate to full res image URL; store URL and name 
# NOTE:  because HREF stored a partial href, it will be appended the base USGS url

hemi_title =[]
hemi_url =[]

for image in images:
    
    # Extract title and append to hemi_title
    title = image.h3.text
    hemi_title.append(title)
    
    #Extract href from image and append to base URL to get to image file
    base_url = "https://astrogeology.usgs.gov"
    full_url = base_url+image.a["href"]

    # Create soup object and extract full image URL; append to hemi_url
    response1 = requests.get(full_url)
    soup = BeautifulSoup(response1.text, "html.parser")
    image_url = soup.find("div", class_="downloads").a["href"]
    hemi_url.append(image_url)
      
    print(title)
    print(image_url)
    print("--------------------------------------------------------------------------------------")

Cerberus Hemisphere Enhanced
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg
--------------------------------------------------------------------------------------
Schiaparelli Hemisphere Enhanced
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg
--------------------------------------------------------------------------------------
Syrtis Major Hemisphere Enhanced
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg
--------------------------------------------------------------------------------------
Valles Marineris Hemisphere Enhanced
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg
--------------------------------------------------------------------------------------


In [163]:
hemi_url


['https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg']

In [146]:
# Loop through each soup object ("images"); use splinter to navigate to full res image URL; store URL and name 
# NOTE:  because HREF stored a partial href, it will be appended the base USGS url

# Create Soup object and examine results


# set path for chromedriver
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

# Start with single effort
hemi_title =[]
hemi_url =[]

base_url = "https://astrogeology.usgs.gov"
full_url = base_url+image.a["href"]
#browser.visit(full_url)
#browser.links.find_by_partial_text("Sample").click()
#hemi_url = browser.url

response = requests.get(full_url)
soup = BeautifulSoup(response.text, "html.parser")
target = soup.find("div", class_="downloads").a["href"]


print(base_url)
print("----------")
print(full_url)
#print("----------")
#print(hemi_url)
print("----------")
print(target)


# This command downloads the files
# browser.find_by_tag("div.downloads").first.click()
# This file clicks to the page; but URL will not downlaod
#browser.links.find_by_partial_text("Sample").click()

https://astrogeology.usgs.gov
----------
https://astrogeology.usgs.gov/search/map/Mars/Viking/valles_marineris_enhanced
----------
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg


In [145]:
target.a["href"]

'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'

In [107]:
browser.visit(url1)

# Navigate to large image
browser.links.find_by_partial_text("FULL IMAGE").click()
browser.links.find_by_partial_text("more info").click()
browser.find_by_tag("figure.lede").first.click()

image_url = browser.url
print(image_url)


https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA07137_hires.jpg
