## Scrape the NASA Mars News Site

In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
from selenium import webdriver
import os
import time

In [2]:
# URL of page to be scraped
url = 'https://mars.nasa.gov/news'

In [3]:
# scrape web page
chromedriver = "/usr/local/bin/chromedriver"
os.environ["webdriver.chrome.driver"] = chromedriver
driver = webdriver.Chrome(chromedriver)
driver.get(url)
time.sleep(5)
html = driver.page_source

In [4]:
# Create BeautifulSoup object; parse with 'lxml'
soup = BeautifulSoup(html, 'lxml')

In [5]:
# Examine the results, then determine element that contains sought info
# print(soup.prettify())

In [6]:
# results are returned as an iterable list
results = soup.find_all('li', class_="slide")

In [7]:
# find text as a list from all 'a' tags in first/latest news
text = results[0].find_all('a')

In [8]:
# find news title
news_title = text[1].text

In [9]:
# find news paragraph
news_p = text[0].find('div', class_="rollover_description_inner").text

In [10]:
# print out results and check
print(f"Title: {news_title}")
print("---------")
print(f"Paragraph: {news_p}")

Title: NASA Sets Sights on May 5 Launch of InSight to Mars
---------
Paragraph: NASA’s next mission to Mars, InSight, is scheduled to launch Saturday, May 5, on a first-ever mission to study the heart of the Red Planet.


In [11]:
# close chrome browser
driver.close()

## scrape JPL featured space image

In [14]:
# import splinter
from splinter import Browser

In [15]:
# config chromedriver and generate browser object
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

In [16]:
# open the web page using browser
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [17]:
# Click the 'Full Image' button
browser.click_link_by_partial_text('FULL IMAGE')
time.sleep(5)

In [18]:
# Click the 'more info' button
browser.click_link_by_partial_text('more info')

In [19]:
# generate link found object
links_found = browser.find_link_by_partial_href('images/largesize')

In [20]:
# find link to the feature image
featured_image_url = links_found['href']

In [21]:
# print link to check
featured_image_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA18906_hires.jpg'

In [22]:
# close chrome browser
browser.quit()

## scrape Mars weather

In [23]:
# URL of page to be scraped
url = 'https://twitter.com/marswxreport?lang=en'

In [24]:
# Retrieve page with the requests module
response = requests.get(url)

In [25]:
# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(response.text, 'html.parser')

In [26]:
# Examine the results, then determine element that contains sought info
# print(soup.prettify())

In [27]:
# results are returned as an iterable list
results = soup.find_all('div', class_="js-tweet-text-container")

In [28]:
# find weather text, assign to a variable
mars_weather = results[0].find('p').text

In [29]:
# print out to check
mars_weather

'Sol 2033 (April 25, 2018), Sunny, high -10C/14F, low -71C/-95F, pressure at 7.23 hPa, daylight 05:24-17:20'

## scrape Mars facts using Pandas

In [30]:
# dependency
import pandas as pd

In [31]:
# url info
url = 'https://space-facts.com/mars/'

In [32]:
# scrape tables using Pandas read_html function
tables = pd.read_html(url)

In [33]:
# assign target table to a data frame
df = tables[0]

In [34]:
# double check data frame
df

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.42 x 10^23 kg (10.7% Earth)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.52 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-153 to 20 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [35]:
# generate html table string using Pandas
html_table = df.to_html(header=None,index=False)

In [36]:
# clean up
html_table = html_table.replace('\n', '')

In [37]:
# print to check
html_table

'<table border="1" class="dataframe">  <tbody>    <tr>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <td>Mass:</td>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <td>Orbit Distance:</td>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <td>Orbit Period:</td>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <td>Surface Temperature:</td>      <td>-153 to 20 °C</td>    </tr>    <tr>      <td>First Record:</td>      <td>2nd millennium BC</td>    </tr>    <tr>      <td>Recorded By:</td>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

## scrape USGS Astrogeology site and find Mars Hemisperes

In [38]:
# generate empty list
hemisphere_image_urls =[]

In [39]:
# define a function to scrape full resolution image link using splinter
def find_hemisperes(name):
    browser = Browser('chrome', **executable_path, headless=False)
    url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
    browser.visit(url)
    browser.click_link_by_partial_text(name)
    links_found = browser.find_link_by_partial_href(name.split()[0].lower())
    url = links_found['href']
    dic = {"title": f"{name} Hemisphere", "img_url": url}
    hemisphere_image_urls.append(dic)
    browser.quit()

In [40]:
# Mars hemisperes list
hemisperes_list = ['Cerberus', 'Schiaparelli', 'Syrtis Major', 'Valles Marineris']

In [41]:
# loop through above list and scrape information
for hemispere in hemisperes_list:
    find_hemisperes(hemispere)

In [42]:
# print out final list to check
hemisphere_image_urls

[{'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
  'title': 'Cerberus Hemisphere'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
  'title': 'Schiaparelli Hemisphere'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
  'title': 'Syrtis Major Hemisphere'},
 {'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg',
  'title': 'Valles Marineris Hemisphere'}]