## Step 1.  Scraping

Initiating the task by using BeautifulSoup, Pandas, and Requests/Splinter to scrap NASA Mars news site

In [2]:
# import Dependencies
from bs4 import BeautifulSoup as bs
import pandas as pd
import requests
from splinter import Browser
import time

#### Part I. NASA Mars News

First, scrape the lastest News (Title and Text content) from NASA Mars News Site

In [3]:
# URL of NASA Mars News to be scraped
url_1 = 'https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest'

# Retrieve web content thru requests/response ==> Create a Beautiful Soup object
response = requests.get(url_1)

soup_1 = bs(response.text, "html.parser")
type(soup_1)

bs4.BeautifulSoup

In [15]:
 # Assign the title to variables
news_title = soup_1.find_all('div', class_='content_title')[0].find('a').text.strip()

print(news_title)

 # Assign the text content to variables
news_p = soup_1.find_all('div', class_='rollover_description_inner')[0].text.strip()

print(news_p)

NASA's Curiosity Mars Rover Finds a Clay Cache
The rover recently drilled two samples, and both showed the highest levels of clay ever found during the mission.


#### Part II. PL Mars Space Images - Featured Image

Use splinter to scrape the current Mars Images from JPL

In [29]:
# Set the executable path and deploy the chrome browser in splinter
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path)

In [30]:
# URL of JPL Mars pictures to be scraped
url_2 = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'

#Visit the JPL website
browser.visit(url_2)

In [31]:
# Find and execute the full image button
full_image_elem = browser.find_by_id('full_image')
full_image_elem.click()

In [32]:
# Find more picture objects by clicking on "more info" button
browser.is_element_present_by_text('more info', wait_time=10)
more_info_elem = browser.find_link_by_partial_text('more info')
more_info_elem.click()

In [37]:
# assign html content
html = browser.html
# Create a new Beautiful Soup object
soup_2 = bs(html, 'html.parser')

In [38]:
# retrieve image's url address
img_url_partial = soup_2.select_one('figure.lede a img').get("src")
print(img_url_partial)

/spaceimages/images/largesize/PIA19101_hires.jpg


In [39]:
# combine image url and JPL url
img_url = f'https://www.jpl.nasa.gov{img_url_partial}'
print(img_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA19101_hires.jpg


#### Part III. Mars Weather

Use splinter to scrape the latest Mars weather information from the Mars Weather twitter account

In [40]:
# Execute Chromedriver (add in again in case you close the browser)
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path)

In [43]:
# Load URL of Mars Weather twitter account
url_3 = 'https://twitter.com/marswxreport?lang=en'

#Visit the Mars Weather twitter account
browser.visit(url_3)

In [44]:
# assign html content
html = browser.html
# Create a Beautiful Soup object
soup_3 = bs(html, 'html.parser')

In [46]:
#scrap latest Mars weather tweet
mars_weather = soup_3.find_all('p', class_='TweetTextSize TweetTextSize--normal js-tweet-text tweet-text')[0].text

#print to check tweet
print(mars_weather)

InSight sol 195 (2019-06-14) low -105.6ºC (-158.1ºF) high -23.8ºC (-10.8ºF)
winds from the SSE at 4.3 m/s (9.7 mph) gusting to 14.5 m/s (32.5 mph)
pressure at 7.60 hPapic.twitter.com/3LNNz5uXCd


#### Part IV. Mars Facts

Use Pandas to scrape the table containing facts about the planet from Mars Facts webpage

In [None]:
# URL of Mars Facts webpage to be scraped
url_4 = 'https://space-facts.com/mars/'

In [52]:
# Retrieve the url table
profile_table = pd.read_html(url_4)

# convert table info into dataframe
df = profile_table[0]

# rename the columns
df.columns=['description','value']
df

Unnamed: 0,description,value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [19]:
#Set the index to the description column
df.set_index('description', inplace=True)
df

Unnamed: 0_level_0,value
description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [54]:
# Deploy the DataFrame to HTML
df.to_html('MarsFactsTable.html')

#### Part V. Mars Hemisperes

Obtain high resolution images for each of Mar's hemispheres from USGS Astrogeology site

In [14]:
# Execute Chromedriver (add in again in case you close the browser)
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path)

In [15]:
# URL of USGS Astrogeology site
url_5 = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'

#Visit USGS Astrogeology site
browser.visit(url_5)

In [16]:
# assign html content
html = browser.html
# Create a new Beautiful Soup object
# soup_5 = bs(html, 'html.parser')

In [17]:
# assign image objects to a new list
hemisphere_images = []

# Get a list of all of the hemisphere images
links = browser.find_by_css("a.product-item h3")

# Loop through all the links, find the anchor and return the "href"
for i in range(len(links)):
    hemisphere = {}
    
    # Find the elements on each loop
    browser.find_by_css("a.product-item h3")[i].click()
    # locate image anchor tag and extract the href
    sample_elem = browser.find_link_by_text('Sample').first
    hemisphere['img_url'] = sample_elem['href']

    # Get Hemisphere title
    hemisphere['title'] = browser.find_by_css("h2.title").text
    # Append hemisphere image objects to the list
    hemisphere_images.append(hemisphere)
    
    # navigate back
    browser.back()

In [18]:
# review saved images List
hemisphere_images

[{'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]