In [1]:
# Import libraries and dependancies
from splinter import Browser
from bs4 import BeautifulSoup as bs
import pandas as pd
import requests
import time

In [2]:
def init_browser():
    executable_path = {"executable_path": "/usr/local/bin/chromedriver"}
    return Browser("chrome", **executable_path, headless=False)

## NASA Mars News

In [3]:
def news_scrape():
    # Connect function init_browser
    browser = init_browser()
    
    # Set URL to scrape
    news_url = "https://mars.nasa.gov/news/"
    browser.visit(news_url)
    
    # Add time delay
    time.sleep(3)

    # Scrape page into Soup
    news_html = browser.html
    news_soup = bs(news_html, "html5lib")

    # Find latest news title and paragraph
    news_article = news_soup.find_all("div", class_="list_text")[0]

    news_title = news_article.find("div", class_="content_title").text

    news_text = news_article.find("div", class_ ="article_teaser_body").text

    # Close the browser after scraping
    browser.quit()
    
    # Return dictionary of data for database
    return {"news_title": news_title, "news_text": news_text}

In [4]:
news_scrape()

{'news_title': "How NASA's Mars Helicopter Will Reach the Red Planet's Surface",
 'news_text': 'The small craft will seek to prove that powered, controlled flight is possible on another planet. But just getting it onto the surface of Mars will take a whole lot of ingenuity.'}

## JPL Mars Space Image

In [5]:
def image_scrape():
    # Connect function init_browser
    browser = init_browser()
    
    # Set URL to scrape
    image_search = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
    browser.visit(image_search)
    
    # Add time delay
    time.sleep(3)

    # Click on "FULL IMAGE"
    browser.click_link_by_partial_text("FULL IMAGE")

    # Click on "more info"
    browser.click_link_by_partial_text("more info")

    # Scrape page into Soup
    image_html = browser.html
    image_soup = bs(image_html, "html5lib")

    # Find featured image url
    image_url = image_soup.find("figure", class_="lede").find("a")["href"]
    image_full_url = "https://www.jpl.nasa.gov" + image_url

    # Close the browser after scraping
    browser.quit()
    
    # Return dictionary of data for database
    return {"image_url": image_full_url}

In [6]:
image_scrape()



{'image_url': 'https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA16884_hires.jpg'}

## Mars Weather

In [7]:
def weather_scrape():
    # Connect function init_browser
    browser = init_browser()
    
    # Set URL to scrape
    weather_url = "https://twitter.com/marswxreport?lang=en"
    browser.visit(weather_url)

    # Add time delay
    time.sleep(3)

    # Scrape page into Soup
    weather_html = browser.html
    weather_soup = bs(weather_html, "html5lib")

    # Find latest news title and paragraph
    weather_art = weather_soup.find_all("article", role="article")[0]
    weather_text = weather_art.find_all("span")[4].text

    # Close the browser after scraping
    browser.quit()
    
    # Return dictionary of data for database
    return {"weather_text": weather_text}

In [8]:
weather_scrape()

{'weather_text': 'InSight sol 561 (2020-06-25) low -89.7ºC (-129.5ºF) high -2.9ºC (26.8ºF)\nwinds from the W at 5.7 m/s (12.8 mph) gusting to 17.8 m/s (39.8 mph)\npressure at 7.60 hPa'}

## Mars Facts

In [27]:
def mars_facts():
    # Set URL variable
    facts_url = "https://space-facts.com/mars/"
    
    # Use Pandas to read html
    table = pd.read_html(facts_url)
    
    # Set variable for table
    fact_table = table[0]
    
    # Set column names
    fact_table.columns = ['Description', 'Value']
    
    # Set index
    fact_table = fact_table.set_index('Description')
    
    # Convert table back to html
    fact_table = fact_table.to_html
    
    return fact_table

# df.rename(columns=df.iloc[0]).drop(df.index[0])


In [28]:
mars_facts()

<bound method DataFrame.to_html of                                            6,792 km
Description                                        
Equatorial Diameter:                       6,792 km
Polar Diameter:                            6,752 km
Mass:                 6.39 × 10^23 kg (0.11 Earths)
Moons:                          2 (Phobos & Deimos)
Orbit Distance:            227,943,824 km (1.38 AU)
Orbit Period:                  687 days (1.9 years)
Surface Temperature:                   -87 to -5 °C
First Record:                     2nd millennium BC
Recorded By:                   Egyptian astronomers>

## Mars Hemispheres

In [7]:
def hemi_scrape():
    # Connect function init_browser
    browser = init_browser()
    
    # Set URL to scrape
    hemi_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
    browser.visit(hemi_url)

    # Scrape page into Soup
    hemi_html = browser.html
    hemi_soup = bs(hemi_html, "html5lib")
    
    # Find all items with image 
    items = hemi_soup.find_all("div", class_='item')
    
    # Create empty list to append with results from loop
    hemi_list = []

    # Set variable for main url
    main_url = 'https://astrogeology.usgs.gov'

    # Start loop through items
    for item in items:
        
        # Find hemi title
        hemi_title = item.find("h3").text
        
        # Find url to hemi page
        hi_url = item.find("a")["href"]
        
        # Visit url to scrape
        browser.visit(main_url + hi_url)
        
        # Scrape page into Soup
        hi_html = browser.html
        hi_soup = bs(hi_html, "html5lib")
        
         # Find high res image url
        hemi_image = hi_soup.find("div", class_="wide-image-wrapper").find("img", class_='wide-image')["src"]
        
        # Append list with results as dictionary
        hemi_list.append({"title": hemi_title, "hemi_image": main_url + hemi_image})
    
    # Close the browser after scraping
    browser.quit()
    
    # Return dictionary of data for database
    return hemi_list

In [8]:
hemi_scrape()

[{'title': 'Cerberus Hemisphere Enhanced',
  'hemi_image': 'https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'hemi_image': 'https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'hemi_image': 'https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'hemi_image': 'https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]