In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
from splinter import Browser
import pandas as pd

## Scraping NASA for Mars headlines

In [20]:
# URL of page to be scraped
url = 'https://mars.nasa.gov/news/'

# Retrieve page with the requests module
response = requests.get(url)
# Create BeautifulSoup object; parse with 'lxml'
soup = BeautifulSoup(response.text, 'lxml')

In [21]:
#print(soup.prettify())

In [22]:
#list(soup.children)

In [23]:
# Scrape NASA Mars News (https://mars.nasa.gov/news/) for the latest News Title and Paragragh Text

news_title = soup.find('div', class_='content_title').text
news_p = soup.find('div', class_='rollover_description_inner').text

In [24]:
print(news_title)
print(news_p)



Dust Storms Linked to Gas Escape from Mars Atmosphere



If Mars has a global dust storm in 2018, observations could aid understanding of its effects.



## Using Splinter for JPL Mars Featured Image

In [6]:
# Use Splinter to navigate to NASA's Jet Propulsion Laboratory site

executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(url)

In [7]:
# Click the 'FULL IMAGE' button to get to the featured image
browser.click_link_by_partial_text('FULL IMAGE')

In [8]:
# Click the 'more info' button to get to the feature image's article
browser.click_link_by_partial_text('more info')

In [9]:
# Design an XPATH selector to grab the featured image
xpath = '//figure//a'

In [10]:
# Use splinter to click the featured image and bring up the full resolution image
results = browser.find_by_xpath(xpath)
img = results[0]
img.click()

In [11]:
# Scrape the browser into soup and use soup to find the full resolution image
# Save the image url to a variable
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
featured_img_url = soup.find("img")["src"]
featured_img_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA17011_hires.jpg'

In [12]:
# David's
# Visit the JPL Mars URL
# url2 = "https://jpl.nasa.gov/spaceimages/?search=&category=Mars"
# browser.visit(url2)
# # Scrape the browser into soup and use soup to find the image of mars
# # Save the image url to a variable called `img_url`
# html = browser.html
# soup = BeautifulSoup(html, 'html.parser')
# image = soup.find("img", class_="thumb")["src"]
# img_url = "https://jpl.nasa.gov"+image
# # Use the requests library to download and save the image from the `img_url` above
# import requests
# import shutil
# response = requests.get(img_url, stream=True)
# with open('img.jpg', 'wb') as out_file:
#     shutil.copyfileobj(response.raw, out_file)
    
# # Display the image with IPython.display
# from IPython.display import Image
# Image(url='img.jpg')  

## Mars Weather Tweets

In [13]:
# Use Splinter to navigate to Mars Weather Twitter account (@MarsWxReport)

url = "https://twitter.com/marswxreport?lang=en"
browser.visit(url)

In [14]:
# Scrape the browser into soup and use soup to find the most recent Tweet
# Save the text to a variable
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
# mars_weather = soup.find("div", class_="js-tweet-text-container").text.strip()

mars_weather_tweets = soup.find_all("div", class_="js-tweet-text-container")

for tweet in mars_weather_tweets:
    latest_weather = tweet.text.strip()
    if latest_weather.startswith('Sol'):
        print(latest_weather)
        break

Sol 1946 (Jan 26, 2018), Sunny, high -20C/-4F, low -79C/-110F, pressure at 7.53 hPa, daylight 05:43-17:28


## Mars Facts: Scrape and Convert to HTML Table with Pandas

In [15]:
# Use Pandas read_html function to automatically scrape tabular data

url = 'https://space-facts.com/mars/'
tables = pd.read_html(url)

# Display 'tables' to explore and review in preparation for conversion
# tables
# tables[0]

In [16]:
# Set tabular data to variable and cleanup dataframe

mars_facts_df = tables[0]
mars_facts_df.columns = ['Parameter', 'Measurement']
mars_facts_df.set_index('Parameter', inplace=True)
#mars_facts_df.head()

In [17]:
# Use Pandas to_html method to generate HTML table from DataFrame and save as string

html_table = mars_facts_df.to_html()
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Measurement</th>\n    </tr>\n    <tr>\n      <th>Parameter</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    <

In [18]:
# Strip newlines

html_table.replace('\n', '')

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Measurement</th>    </tr>    <tr>      <th>Parameter</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

## Convert to Python script

In [19]:
!jupyter nbconvert --to script mission_to_mars.ipynb

[NbConvertApp] Converting notebook mission_to_mars.ipynb to script
[NbConvertApp] Writing 4046 bytes to mission_to_mars.py
