In [1]:
from splinter import Browser
from bs4 import BeautifulSoup as bs
import requests
import time
import pandas as pd
from pprint import pprint

In [2]:
# Define function to initialize browser
def init_browser():
    
    executable_path = {"executable_path":"C:\Program Files\chromedriver.exe"}
    return Browser("chrome", **executable_path, headless = False)

# NASA Mars News

In [5]:
# Define function to scrape Mars news
def mars_news():
    browser = init_browser()
    
#Visit URL
    url = "https://mars.nasa.gov/news/"
    browser.visit(url)

# Scrape page into soup
    html = browser.html
    soup = bs(html,"html.parser")

# Find news title and paragraph
    mars_title = soup.find("div",class_="content_title").text
    mars_paragraph  = soup.find("div", class_="article_teaser_body").text
    
#Create an empty news list and append news dict
    news=[]
    news_dict={'Title': mars_title,
              'Description': mars_paragraph
              }
    news.append(news_dict)
    
    browser.quit()
    
    return news
    

In [6]:
mars_news()

[{'Title': 'Mars 2020 Unwrapped and Ready for More Testing',
  'Description': "In time-lapse video, bunny-suited engineers remove the inner layer of protective foil on NASA's Mars 2020 rover after it was relocated for testing."}]

# JPL Mars Space Images - Featured Image

In [7]:
# Define function to scapr Mars featured image
def mars_image():
    browser = init_browser()

# Visit URL
    jpl_url ="https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
    browser.visit(jpl_url)

# Scrape page into soup
    html = browser.html
    soup = bs(html, 'html.parser')
    
# Find image URL and format accordingly
    featured_img_url_raw = soup.find("div", class_="carousel_items").find("article")["style"]
    featured_img_url_raw = featured_img_url_raw.split("'")[1]
    
    base_url= "https://www.jpl.nasa.gov"
    featured_img_url= base_url + featured_img_url_raw
    
    browser.quit()
    
    return featured_img_url

In [8]:
mars_image()

'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA00063-1920x1200.jpg'

# Mars Weather

In [23]:
# Define function to scrape Mars weather
def mars_weather():
    browser = init_browser()

# Visit URL
    twitter_url ="https://twitter.com/marswxreport?lang=en"
    browser.visit(twitter_url)

# Scrape page into soup
    html = browser.html
    soup = bs(html, 'html.parser')

# Get the Mars weather tweet text
    mars_weather_data = (soup.find(class_="tweet-text")).get_text()
    mars_weather_data = mars_weather_data.replace('\n', ' ').replace('pic',',').split(",")[0]
    
    browser.quit()
    
    return mars_weather_data

In [24]:
mars_weather()

'InSight sol 317 (2019-10-18) low -103.2ºC (-153.8ºF) high -26.2ºC (-15.2ºF) winds from the SSE at 5.6 m/s (12.5 mph) gusting to 22.2 m/s (49.7 mph) pressure at 7.10 hPa'

# Mars Facts

In [81]:
# Define a function to scrape Mars facts
def mars_facts():
    browser = init_browser()
    
    facts_url="https://space-facts.com/mars/"

# Scrape table into pandas
    table = pd.read_html(facts_url)
    stats_table=table[1]

# Convert table info ibto HTML
    stats_html=stats_table.to_html(header=False, index=False).replace('\n', '')
    
    browser.quit()
    
    return stats_html

In [82]:
mars_facts()

'<table border="1" class="dataframe">  <tbody>    <tr>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <td>Mass:</td>      <td>6.39 × 10^23 kg (0.11 Earths)</td>    </tr>    <tr>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <td>Orbit Distance:</td>      <td>227,943,824 km (1.38 AU)</td>    </tr>    <tr>      <td>Orbit Period:</td>      <td>687 days (1.9 years)</td>    </tr>    <tr>      <td>Surface Temperature:</td>      <td>-87 to -5 °C</td>    </tr>    <tr>      <td>First Record:</td>      <td>2nd millennium BC</td>    </tr>    <tr>      <td>Recorded By:</td>      <td>Egyptian astronomers</td>    </tr>  </tbody></table>'

# Mars Hemispheres

In [83]:
# Define funtion to find Mars hemispheres info
def mars_hemispheres():
    browser = init_browser()

# Visit URL
    hemisphere_url ="https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
    browser.visit(hemisphere_url)

# Scrape page into soup
    html = browser.html
    soup = bs(html, 'html.parser')

# Create a list to hold hemisphere names and append names to list
    hemi_names=[]
    links=soup.find_all('h3')

    for hemi in links:
        hemi_names.append(hemi.text)

# Create a list to hold hemisphere names and URL's
    hemi_urls=[]

# Visit each hemisphere site and append to the dict...append dict to list
    for hemi in hemi_names:
        hemi_dict ={}
        browser.click_link_by_partial_text(hemi)
        hemi_dict['title'] = hemi
        hemi_dict['img_url'] = browser.find_by_text('Sample')['href']
        hemi_urls.append(hemi_dict)
        browser.back()
        
    browser.quit()

    return hemi_urls

In [84]:
mars_hemispheres()

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'http://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg'}]

In [87]:
# Define a function that scrapes all Mars info
def scrape():
    
    mars_news_scrape=mars_news()
    mars_image_scrape=mars_image()
    mars_weather_scrape=mars_weather()
    mars_facts_scrape=mars_facts()
    mars_hemispheres_scrape=mars_hemispheres()

# Define a mars_info dict to hold all information from the scrape
    mars_info={'Mars_News': mars_news_scrape,
               'Featured_Image': mars_image_scrape,
               'Mars_Weather': mars_weather_scrape,
               'Mars_Facts': mars_facts_scrape,
               'Mars_Hemispheres': mars_hemispheres_scrape
              }
    return mars_info

In [88]:
scrape()

{'Mars_News': [{'Title': 'Mars 2020 Unwrapped and Ready for More Testing',
   'Description': "In time-lapse video, bunny-suited engineers remove the inner layer of protective foil on NASA's Mars 2020 rover after it was relocated for testing."}],
 'Featured_Image': 'https://www.jpl.nasa.gov/spaceimages/images/wallpaper/PIA19092-1920x1200.jpg',
 'Mars_Weather': 'InSight sol 317 (2019-10-18) low -103.2ºC (-153.8ºF) high -26.2ºC (-15.2ºF)\nwinds from the SSE at 5.6 m/s (12.5 mph) gusting to 22.2 m/s (49.7 mph)\npressure at 7.10 hPapic.twitter.com/LNDEvGwVDw',
 'Mars_Facts': '<table border="1" class="dataframe">  <tbody>    <tr>      <td>Equatorial Diameter:</td>      <td>6,792 km</td>    </tr>    <tr>      <td>Polar Diameter:</td>      <td>6,752 km</td>    </tr>    <tr>      <td>Mass:</td>      <td>6.39 × 10^23 kg (0.11 Earths)</td>    </tr>    <tr>      <td>Moons:</td>      <td>2 (Phobos &amp; Deimos)</td>    </tr>    <tr>      <td>Orbit Distance:</td>      <td>227,943,824 km (1.38 AU)</t