In [16]:
import pickle
import requests
from bs4 import BeautifulSoup
from splinter import Browser
import time
import pandas as pd
executable_path = {
    'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path)
from pymongo import MongoClient
client = MongoClient()

In [6]:
# scrape the NASA Mars News SIte, collect news title, paragraph text, assign
# to variables for later reference
# Visit the NASA news URL


def mars_headline():
    url = "https://mars.nasa.gov/api/v1/news_items/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest"
    resp = requests.get(url).json()
    first_item = resp.get('items')[0]
    return {"item_title": first_item.get('title'), 
            "item_desc": first_item.get('description')
           }

headlines_dict = mars_headline()

headlines_dict

{'item_title': 'After a Reset, Curiosity Is Operating Normally',
 'item_desc': 'Curiosity has returned to science operations and is once again exploring the clay unit. '}

### NASA Mars News

In [3]:
# * Visit the url for JPL Featured Space Image [here](https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars).

# * Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called `featured_image_url`.
def mars_news():
    url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
    browser.visit(url)
    time.sleep(2)
    browser.click_link_by_partial_text("FULL IMAGE")
    time.sleep(2)
    browser.click_link_by_partial_text("more info")
    #get html code once at page
    image_html = browser.html

    #parse
    soup = BeautifulSoup(image_html, "html.parser")

    #find path and make full path
    image_path = soup.find('figure', class_='lede').a['href']
    featured_image_url = "https://www.jpl.nasa.gov/" + image_path
    return{"featured_image_url": featured_image_url}

news_dict = mars_news()
news_dict

{'featured_image_url': 'https://www.jpl.nasa.gov//spaceimages/images/largesize/PIA02570_hires.jpg'}

### Mars Weather

In [4]:
# * Visit the Mars Weather twitter account [here](https://twitter.com/marswxreport?lang=en)
# and scrape the latest Mars weather tweet from the page. Save the tweet text for the weather report
# as a variable called `mars_weather`.
def mars_weather():
    browser.visit('https://twitter.com/marswxreport?lang=en')

    soup = BeautifulSoup(browser.html, "html.parser")
    get_mars_weather = soup.find("p", class_="TweetTextSize TweetTextSize--normal js-tweet-text tweet-text").text
    return {"mars_weather": get_mars_weather}

weather_dict = mars_weather()
weather_dict

{'mars_weather': 'InSight sol 92 (2019-03-01) low -94.4ºC (-137.9ºF) high -12.9ºC (8.8ºF)\nwinds from the SW at 4.6 m/s (10.2 mph) gusting to 10.4 m/s (23.2 mph)\npressure at 7.20 hPapic.twitter.com/zxXhRFOwTo'}

### Mars Facts

In [12]:
def mars_facts():

    # * Visit the Mars Facts webpage [here](http://space-facts.com/mars/) and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.
    url = 'http://space-facts.com/mars/'


    # * Use Pandas to convert the data to a HTML table string.
    tables = pd.read_html(url)
    new_table = tables[0]
    new_table.columns = ["Description", "Value"]
    formatted =  new_table.to_html(classes=["table-bordered", "table-striped", "table-hover"])
    return {"html_table_facts": formatted}

facts_dict = mars_facts()
facts_dict

{'html_table_facts': '<table border="1" class="dataframe table-bordered table-striped table-hover">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Description</th>\n      <th>Value</th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>0</th>\n      <td>Equatorial Diameter:</td>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>1</th>\n      <td>Polar Diameter:</td>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>2</th>\n      <td>Mass:</td>\n      <td>6.42 x 10^23 kg (10.7% Earth)</td>\n    </tr>\n    <tr>\n      <th>3</th>\n      <td>Moons:</td>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>4</th>\n      <td>Orbit Distance:</td>\n      <td>227,943,824 km (1.52 AU)</td>\n    </tr>\n    <tr>\n      <th>5</th>\n      <td>Orbit Period:</td>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>6</th>\n      <td>Surface Temperature:</td>\n      <td>-153 to 20 °C</td>\n    </tr>\n    <tr>\n      <th>

### Mars Hemispheres

In [57]:
def mars_hemispheres():
    hemispheres_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"
    browser.visit(hemispheres_url)
    html = browser.html
    soup = BeautifulSoup(html, "html.parser")
    mars_hemisphere = []

    products = soup.find("div", class_ = "result-list" )
    hemispheres = products.find_all("div", class_="item")

    for hemis in hemispheres:
        title = hemis.find("h3").text
        end_link = hemis.find("a")["href"]
        image_link = "https://astrogeology.usgs.gov/" + end_link    
        browser.visit(image_link)
        html = browser.html
        soup=BeautifulSoup(html, "html.parser")
        downloads = soup.find("div", class_="downloads")
        image_url = downloads.find("a")["href"]
        mars_hemisphere.append({"title": title, "img_url": image_url})

    return {"hemisphere_image_urls": mars_hemisphere}

hemispheres_dict = mars_hemispheres()
hemispheres_dict

WebDriverException: Message: chrome not reachable
  (Session info: chrome=72.0.3626.119)
  (Driver info: chromedriver=73.0.3683.20 (8e2b610813e167eee3619ac4ce6e42e3ec622017),platform=Mac OS X 10.13.6 x86_64)


In [55]:
merged_dict = {**headlines_dict, **news_dict, **weather_dict, **facts_dict, **hemispheres_dict}

In [56]:
db = client.test_mars
collection = db.test_collection
collection.insert_one(merged_dict)

<pymongo.results.InsertOneResult at 0x1225b7bc8>

## Step 2 - MongoDB and Flask Application

In [None]:
# Use MongoDB with Flask templating to create a new HTML page that displays all of the information that was scraped from the URLs above.

# * Start by converting your Jupyter notebook into a Python script called `scrape_mars.py` with a function called `scrape` that will execute all of your scraping code from above and return one Python dictionary containing all of the scraped data.

# * Next, create a route called `/scrape` that will import your `scrape_mars.py` script and call your `scrape` function.

# * Store the return value in Mongo as a Python dictionary.

# * Create a root route `/` that will query your Mongo database and pass the mars data into an HTML template to display the data.

# * Create a template HTML file called `index.html` that will take the mars data dictionary and display all of the data in the appropriate HTML elements. Use the following as a guide for what the final product should look like, but feel free to create your own design.