In [1]:
# import dependencies
from splinter import Browser
from bs4 import BeautifulSoup as bs
import time
import requests
import pandas as pd

executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

## Mars News

In [2]:
# set up splinter and beautiful soup

news_url = 'https://mars.nasa.gov/news'
browser.visit(news_url)
html = browser.html
time.sleep(3)
soup_news = bs(html, 'html.parser')

In [4]:
# get article title and paragraph from the first headline
slide = soup_news.select_one('ul.item_list li.slide')
titles = slide.find("div", class_="content_title").get_text()
titles
news_p = slide.find("div", class_="article_teaser_body").get_text()
news_p

"By collecting data around the clock, NASA's lander will provide unique science about the Martian surface."

## Featured Image

In [12]:
# navigate to target URL and use splinter to click on featured image button

image_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(image_url)
browser.click_link_by_partial_text('FULL IMAGE')
time.sleep(1)

In [13]:
# click more info button and then get the url for largesize image, combine with base url

more_info_elem = browser.find_link_by_partial_text('more info')
more_info_elem.click()
html = browser.html
img_soup = bs(html, 'html.parser')
# find the relative image url
img_url_rel = img_soup.select_one('figure.lede a img').get("src")
feature_url = 'https://www.jpl.nasa.gov' + img_url_rel
feature_url

'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA17841_hires.jpg'

## Weather

In [18]:
# set up beautiful soup and splinter

tweet_url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(tweet_url)
html = browser.html
time.sleep(1)
soup_tweet = bs(html, 'html.parser')

In [19]:
# scrape the twitter page, obtaining the weather information from the latest tweet

weather_info = soup_tweet.find('p', class_= 'TweetTextSize TweetTextSize--normal js-tweet-text tweet-text').text
print(weather_info)

InSight sol 84 (2019-02-20) low -95.1ºC (-139.2ºF) high -13.2ºC (8.3ºF)
winds from the SW at 4.1 m/s (9.3 mph) gusting to 10.8 m/s (24.2 mph)pic.twitter.com/WlR4gr8gpC


## Facts

In [21]:
# set up beautiful soup and splinter

facts_url = 'https://space-facts.com/mars/'
browser.visit(facts_url)
html = browser.html
time.sleep(1)
soup_facts = bs(html, 'html.parser')

In [22]:
# scrap the table using pandas

tables = pd.read_html(facts_url)
tables

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.42 x 10^23 kg (10.7% Earth)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.52 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                  -153 to 20 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers]

In [23]:
# convert the table into a dataframe and set field as the index

df = tables[0]
df.columns = ['Field', 'Value']
df = df.set_index('Field')
df

Unnamed: 0_level_0,Value
Field,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.42 x 10^23 kg (10.7% Earth)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.52 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-153 to 20 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [24]:
# convert dataframe into an html

html_table = df.to_html()
html_table

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Value</th>\n    </tr>\n    <tr>\n      <th>Field</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\n  </

In [25]:
# remove those unseemly '\n's

html_table = html_table.replace('\n', '')
html_table

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>Value</th>    </tr>    <tr>      <th>Field</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

## Hemisphere

In [26]:
# navigate to target url and initialize splinter

hemi_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemi_url)
html = browser.html
time.sleep(1)

In [27]:
# each item class has one of the 4 hemispheres, store this class into hemis variable

soup_hemi = bs(html, 'html.parser')

hemis = soup_hemi.find_all('div', class_='item')
hemis

[<div class="item"><a class="itemLink product-item" href="/search/map/Mars/Viking/cerberus_enhanced"><img alt="Cerberus Hemisphere Enhanced thumbnail" class="thumb" src="/cache/images/dfaf3849e74bf973b59eb50dab52b583_cerberus_enhanced.tif_thumb.png"/></a><div class="description"><a class="itemLink product-item" href="/search/map/Mars/Viking/cerberus_enhanced"><h3>Cerberus Hemisphere Enhanced</h3></a><span class="subtitle" style="float:left">image/tiff 21 MB</span><span class="pubDate" style="float:right"></span><br/><p>Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired…</p></div> <!-- end description --></div>,
 <div class="item"><a class="itemLink product-item" href="/search/map/Mars/Viking/schiaparelli_enhanced"><img alt="Schiaparelli Hemisphere Enhanced thumbnail" class="thumb" src="/cache/images/7677c0a006b83871b5a2f66985ab5857_schiapa

In [28]:
# iterate throught each of the item classes

hemi_dict = []

for hemi in hemis:
    
    # extract title which is located in each of the h3 tags
    title = hemi.find('h3').text
    
    # find the reference link for the enlarged image
    target_url = hemi.find("a", "itemLink product-item")['href']
    
    # combine this link with the base url to make the enlarged image url
    click_url = 'https://astrogeology.usgs.gov' + target_url
    
    # visit the new url with the enlarged image and create a new instance of beautiful soup
    browser.visit(click_url)
    html = browser.html
    time.sleep(1)
    soup_click = bs(html, 'html.parser')
    
    # navigate to the source for the jpg of the enlarged image then combine this with the base url
    img = soup_click.find('img', class_='wide-image')['src']
    img_url = 'https://astrogeology.usgs.gov' + img
    
    hemi_dict.append({'title': title, 'img_url':img_url})

#     print(title)
#     print(target_url)
#     print(click_url)
#     print(img) 
#     print(img_url)
print(hemi_dict)

[{'title': 'Cerberus Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'}, {'title': 'Schiaparelli Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'}, {'title': 'Syrtis Major Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'}, {'title': 'Valles Marineris Hemisphere Enhanced', 'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]
