In [1]:
# Dependencies
import pandas as pd
from splinter import Browser
from bs4 import BeautifulSoup
import requests

### Windows Chrome Driver

In [2]:
# Function to choose the executable path to driver
def init_browser():
    executable_path = {"executable_path": "C:/chromedriver/chromedriver"}
    return Browser("chrome", **executable_path, headless=False)

### NASA Mars News

In [3]:
# Run init_browser/driver.
browser = init_browser()

# Visit Nasa news url.
news_url = "https://mars.nasa.gov/news/"
browser.visit(news_url)

# HTML Object.
html = browser.html

# Parse HTML with Beautiful Soup
news_soup = BeautifulSoup(html, "html.parser")

# Retrieve the most recent article's title and paragraph.
# Store in news variables.
news_title = news_soup.find("div", class_="content_title").find('a').text
news_paragraph = news_soup.find("div", class_="article_teaser_body").get_text()

# Exit Browser.
browser.quit()

In [4]:
print(f'Title: {news_title}\nText: {news_paragraph}')

Title: Robotic Toolkit Added to NASA's Mars 2020 Rover
Text: The bit carousel, which lies at the heart of the rover's Sample Caching System, is now aboard NASA's newest rover. 


### JPL Mars Space Images - Featured Image

In [5]:
# Run init_browser/driver.
browser = init_browser()

# Visit the url for JPL Featured Space Image.
jpl_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(jpl_url)

# Select "FULL IMAGE".
browser.click_link_by_partial_text("FULL IMAGE")

# Find "more info" for first image, set to variable, and command click.
browser.is_element_present_by_text("more info", wait_time=1)
more_info_element = browser.find_link_by_partial_text("more info")
more_info_element.click()

# HTML Object.
html = browser.html

# Parse HTML with Beautiful Soup
image_soup = BeautifulSoup(html, "html.parser")

# Scrape image URL.
image_url = image_soup.find("figure", class_="lede").a["href"]

# Concatentate https://www.jpl.nasa.gov with image_url.
featured_image_url = f'https://www.jpl.nasa.gov{image_url}'

# Exit Browser.
browser.quit()

In [6]:
print(featured_image_url)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA17044_hires.jpg


### Mars Weather

In [None]:
# Run init_browser/driver.
browser = init_browser()

# Visit the url for Mars Weather twitter account.
weather_url = "https://twitter.com/marswxreport?lang=en"
browser.visit(weather_url)

# HTML Object.
html = browser.html

# Parse HTML with Beautiful Soup
weather_soup = BeautifulSoup(html, "html.parser")

# Retrieve ALL 'ol' tags and save to variable 'tweets'.
tweets = weather_soup.find_all('ol', class_='stream-items')
# Iterate through all 'tweets' and find text in 'p' tag.
# Break for most recent tweet if keyword 'InSight' in text.
# Otherwise move onto next tweet.
for tweet in tweets:
    mars_weather = tweet.find('p', class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").text
    if 'InSight' in tweet:
        break
    else:
        # Ask about pass vs continue.
        continue

# Exit Browser.
browser.quit()

# Remove 'anchor' tag text from "mars_weather" via split on 'pic'.
mars_weather = mars_weather.split('pic')[0]

In [30]:
print(mars_weather)

InSight sol 258 (2019-08-18) low -100.0ºC (-148.1ºF) high -26.2ºC (-15.2ºF)
winds from the SSE at 5.3 m/s (11.9 mph) gusting to 16.8 m/s (37.6 mph)
pressure at 7.60 hPa


In [9]:
tweets

[<ol class="stream-items js-navigable-stream" id="stream-items-id">
 <li class="js-stream-item stream-item stream-item" data-item-id="1163403052969336832" data-item-type="tweet" data-suggestion-json='{"suggestion_details":{},"tweet_ids":"1163403052969336832","scribe_component":"tweet"}' id="stream-item-tweet-1163403052969336832">
 <div class="tweet js-stream-tweet js-actionable-tweet js-profile-popup-actionable dismissible-content original-tweet js-original-tweet has-cards has-content" data-conversation-id="1163403052969336832" data-disclosure-type="" data-follows-you="false" data-has-cards="true" data-item-id="1163403052969336832" data-name="Mars Weather" data-permalink-path="/MarsWxReport/status/1163403052969336832" data-reply-to-users-json='[{"id_str":"786939553","screen_name":"MarsWxReport","name":"Mars Weather","emojified_name":{"text":"Mars Weather","emojified_text_as_html":"Mars Weather"}}]' data-screen-name="MarsWxReport" data-tweet-id="1163403052969336832" data-tweet-nonce="11

### Mars Facts

In [10]:
# URL for Mars Facts.
facts_url = "https://space-facts.com/mars/"

# Use Panda's `read_html` to parse the URL.
facts_tables = pd.read_html(facts_url)

# Required table stored in index "1".
# Save as DF.
df_mars_facts = facts_tables[1]

# Rename columns.
df_mars_facts.columns = ['Description', 'Value']

# Set index to 'Description'.
df_mars_facts.set_index('Description', inplace=True)

df_mars_facts

Unnamed: 0_level_0,Value
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [31]:
# Convert DF to html and save in Resources Folder.
df_mars_facts.to_html('Resources/mars_facts.html')