In [46]:
# import dependencies
from splinter import Browser
from bs4 import BeautifulSoup as bs
import time
import requests
import pandas as pd

executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

## Mars News

In [20]:
# set up splinter and beautiful soup

news_url = 'https://mars.nasa.gov/news'
browser.visit(news_url)
html = browser.html
time.sleep(3)
soup_news = bs(html, 'html.parser')

In [21]:
# scrape the title and paragraph from the first headline

titles = soup_news.find("div", class_="content_title")
news_title = titles.find("a").text.strip()
news_p = soup_news.find("div", class_="article_teaser_body").text
print(news_title)
print(news_p)

InSight Is the Newest Mars Weather Service
By collecting data around the clock, NASA's lander will provide unique science about the Martian surface.


## Featured Image

In [27]:
# navigate to target URL and use splinter to click on featured image button

image_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(image_url)
browser.click_link_by_partial_text('FULL IMAGE')
time.sleep(3)

In [30]:
# set up beautiful soup and scrape the url tag, then combine it with the site url

html = browser.html
soup_featured = bs(html, 'lxml')
featured_url = soup_featured.find('img', class_='fancybox-image')['src']
target_url = 'https://www.jpl.nasa.gov' + featured_url
target_url

'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA19643_ip.jpg'

## Weather

In [32]:
# set up beautiful soup and splinter

tweet_url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(tweet_url)
html = browser.html
time.sleep(3)
soup_tweet = bs(html, 'html.parser')

In [43]:
# scrape the twitter page, obtaining the weather information from the latest tweet

weather_info = soup_tweet.find('p', class_= 'TweetTextSize TweetTextSize--normal js-tweet-text tweet-text').text
print(weather_info)

InSight sol 81 (2019-02-17), high -17/2F, low -95/-138F, pressure at 7.23hPa, winds from the WNW at 12 mph gusting to 37.8 mph

Welcome to the Mars Weather team @NASAInSight!
https://mars.nasa.gov/insight/weather/ …pic.twitter.com/SH12FvcMfv


## Facts

In [44]:
# set up beautiful soup and splinter

facts_url = 'https://space-facts.com/mars/'
browser.visit(facts_url)
html = browser.html
time.sleep(3)
soup_facts = bs(html, 'html.parser')

In [48]:
# scrap the table using pandas

tables = pd.read_html(facts_url)
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [65]:
# convert the table into a dataframe and set field as the index

df = tables[0]
df.columns = ['Field', 'Value']
df = df.set_index('Field')
df

Unnamed: 0_level_0,Value
Field,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [72]:
# convert dataframe into an html

html_table = df.to_html()
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Value</th>\n    </tr>\n    <tr>\n      <th>Field</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </

In [73]:
# remove those unseemly '\n's

html_table = html_table.replace('\n', '')
html_table

## Hemisphere