In [1]:
#import dependencies
from splinter import Browser
from bs4 import BeautifulSoup as bs
import time
import pandas as pd
import requests

In [2]:
# def init_browser():
#     executable_path = {"executable_path": "chromedriver.exe"}
#     browser = Browser('chrome', **executable_path, headless=False)

#collect the latest News Title and Paragraph Text
def scrape_info():
    browser = Browser("chrome")
    
    url = "https://mars.nasa.gov/news/"
    browser.visit(url)
    time.sleep(1)
    html = browser.html
    soup = bs(html, "html.parser")
    
    news_title = soup.find("div", class_="content_title").get_text()
    news_p = soup.find("div", class_="article_teaser_body").get_text()
    
    #store data in dictionary
    nasa_data = {
        "news_title": news_title, 
        "news_p": news_p
    }
    
    #close the browser
    browser.quit()
    
    return nasa_data

scrape_info()

{'news_title': 'Mars InSight Lander Seen in First Images from Space ',
 'news_p': "Look closely, and you can make out the lander's solar panels."}

In [5]:
#Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string
def scrape_images():
    browser = Browser("chrome")
    
    url_2 = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
    browser.visit(url_2)
    time.sleep(1)
    html = browser.html
    soup = bs(html, "html.parser")
    
    query_url = 'https://www.jpl.nasa.gov'
    relative_image_path = soup.find('article')['style'].\
        replace('background-image: url(','').replace(');', '')[1:-1]
    
    featured_img_url = query_url + relative_image_path
    
    browser.quit()
    return featured_img_url

scrape_images()

'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA16883-1920x1200.jpg'

In [7]:
#scrape the latest Mars weather tweet from the page
def scrape_twitter():
    browser = Browser("chrome")
    
    url_3 = "https://twitter.com/marswxreport?lang=en"
    browser.visit(url_3)
    time.sleep(1)
    html = browser.html
    soup = bs(html, "html.parser")
    
    mars_weather = soup.find("p", class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").text
    
    
    browser.quit()
    return mars_weather

scrape_twitter()

'Sol 2258 (2018-12-13), high -6C/21F, low -70C/-93F, pressure at 8.41 hPa, daylight 06:37-18:51'

In [9]:
#use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc
#Use Pandas to convert the data to a HTML table string
def scrape_facts():
    browser = Browser("chrome")
    
    url_4 = "https://space-facts.com/mars/"
    tables = pd.read_html(url_4)
    df = tables[0]
    df.columns = ["description", "value"]
    df.set_index('description', inplace=True)
    
    fact_table = df.to_html()
    fact_table = fact_table.replace("\n", "")
    
    return fact_table

scrape_facts()

'<table border="1" class="dataframe">  <thead>    <tr style="text-align: right;">      <th></th>      <th>value</th>    </tr>    <tr>      <th>description</th>      <th></th>    </tr>  </thead>  <tbody>    <tr>      <th>Equatorial Diameter:</th>      <td>6,792 km</td>    </tr>    <tr>      <th>Polar Diameter:</th>      <td>6,752 km</td>    </tr>    <tr>      <th>Mass:</th>      <td>6.42 x 10^23 kg (10.7% Earth)</td>    </tr>    <tr>      <th>Moons:</th>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <th>Orbit Distance:</th>      <td>227,943,824 km (1.52 AU)</td>    </tr>    <tr>      <th>Orbit Period:</th>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <th>Surface Temperature:</th>      <td>-153 to 20 °C</td>    </tr>    <tr>      <th>First Record:</th>      <td>2nd millennium BC</td>    </tr>    <tr>      <th>Recorded By:</th>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

In [12]:
#obtain high resolution images for each of Mar's hemispheres.
def scrape_hemisphere():
    browser = Browser("chrome")
    
    url_5 = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
    browser.visit(url_5)
    time.sleep(1)
    html = browser.html
    soup = bs(html, "html.parser")
    
    hemisphere_image_urls = []
    #find titles and links
    hemispheres = soup.find_all("div", class_="item")
    for hemis in hemispheres:
        title = hemis.find('h3').text
        class_url = hemis.find("a", class_="itemLink product-item")["href"]
        
        combined_url = "https://astrogeology.usgs.gov/" + class_url
        browser.visit(combined_url)
        html = browser.html
        soup = bs(html, "html.parser")
        
        img_url = url_5 + soup.find("img", class_="wide-image")['src']
        hemisphere_image_urls.append({"title" : title, "img_url" : img_url})
        
        
    
    browser.quit()
    return hemisphere_image_urls

scrape_hemisphere()

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]