In [None]:
# Unit 12 Assignment - Mission to Mars
# Step 1 - Scraping
# by Christopher Reutz

In [2]:
%autosave 0
# Dependencies
from splinter import Browser
from bs4 import BeautifulSoup as bs
import re
import pandas as pd

Autosave disabled


In [3]:
# Initialize executable path for the chromedriver
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [4]:
# --- Scrape "NASA Mars News" website for news info ---
mars_news_url = 'https://mars.nasa.gov/news/'

# Retrieve page with Splinter
browser.visit(mars_news_url)

# Create BeautifulSoup object and parse with 'html.parser'
html = browser.html
soup = bs(html, "html.parser")

In [5]:
# Initialize lists to put titles and paragraphs into
news_title_list = []
news_p_list = []

In [6]:
# Extract all article titles and paragraphs and put into lists
results = soup.find_all('li', class_='slide')
for result in results:
        # Error handling
    try:
        # Identify and return the news title
        nasa_news_title = result.find('div', class_="content_title").a.text
        # Identify and return the news paragraph text
        nasa_news_p = result.find('div', class_="article_teaser_body").text

        # Print results only if title and paragraph text are available
        if (nasa_news_title and nasa_news_p):
            news_title_list.append(nasa_news_title)
            news_p_list.append(nasa_news_p)
    except AttributeError as e:
        print(e)

In [7]:
# Latest "NASA Mars News" news title -- the first article (index=0)
news_title = news_title_list[0]
print(news_title)

Things Are Stacking up for NASA's Mars 2020 Spacecraft


In [8]:
# Latest "NASA Mars News" next paragraph text -- the first article (index=0)
news_p = news_p_list[0]
print(news_p)

As the July 2020 launch date inches closer, the next spacecraft headed to the Red Planet is assembled for more testing.


In [9]:
# --- Scrape "JPL Mars Space Images" to get a URL for the featured image ---

# Use Splinter to open up web browser to main page
jpl_domain = 'https://www.jpl.nasa.gov'
jpl_path = '/spaceimages/?search=&category=Mars'
jpl_url = jpl_domain + jpl_path
browser.visit(jpl_url)

In [10]:
# Open the full featured image
browser.click_link_by_partial_text('FULL IMAGE')

In [11]:
# Open and parse the page with 'more info'
browser.click_link_by_partial_text('more info')
html = browser.html
soup = bs(html, 'html.parser')

In [13]:
# Obtain URL for the full-size featured image
main_img_path = soup.find('img', class_="main_image")['src']
featured_image_url = jpl_domain + main_img_path
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA11591_hires.jpg


In [14]:
# --- Scrape "Mars Weather" Twitter account ---

# Open and parse the Twitter account
mars_weather_url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(mars_weather_url)
html = browser.html
soup = bs(html, 'html.parser')

In [15]:
# Find the latest weather report
results = soup.find_all('div', class_='js-tweet-text-container')
for result in results:
    tweet_txt = result.find('p').text
    try:
        anchor_txt = result.find('a').text
    except:
        anchor_txt = ''
    if re.search(r'^InSight sol', tweet_txt, re.IGNORECASE): break 

In [16]:
# Remove anchor url from the weather tweet
regex = re.compile(anchor_txt)
mars_weather = regex.sub('', tweet_txt)
print(mars_weather)

InSight sol 138 (2019-04-17) low -97.7ºC (-143.9ºF) high -17.3ºC (0.9ºF)
winds from the W at 4.3 m/s (9.5 mph) gusting to 12.6 m/s (28.1 mph)
pressure at 7.30 hPa


In [17]:
# --- Scrape "Mars Facts" website table ---

# Use Pandas to pull html table and put into a list
marsfacts_url = 'https://space-facts.com/mars/'
marsfacts_list = pd.read_html(marsfacts_url)

In [18]:
# Put list into a dataframe and format
marsfacts_df = marsfacts_list[0]
marsfacts_df.columns = ['Description', 'Value']
marsfacts_df.set_index('Description', inplace=True)
marsfacts_df

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [19]:
# Convert dataframe into an html table string
marsfacts_table = marsfacts_df.to_html()
print(marsfacts_table)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>Value</th>
    </tr>
    <tr>
      <th>Description</th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Equatorial Diameter:</th>
      <td>6,792 km</td>
    </tr>
    <tr>
      <th>Polar Diameter:</th>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>Mass:</th>
      <td>6.42 x 10^23 kg (10.7% Earth)</td>
    </tr>
    <tr>
      <th>Moons:</th>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <th>Orbit Distance:</th>
      <td>227,943,824 km (1.52 AU)</td>
    </tr>
    <tr>
      <th>Orbit Period:</th>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>Surface Temperature:</th>
      <td>-153 to 20 °C</td>
    </tr>
    <tr>
      <th>First Record:</th>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>Recorded By:</th>
      <td>Egyptian astronomers</td>
    </tr>
  </tbody>
</table>
