In [1]:
import pandas as pd

import requests
from bs4 import BeautifulSoup as bs
from splinter import Browser

import pymongo

In [2]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

## NASA Mars News
* Scrape the [NASA Mars News Site](https://mars.nasa.gov/news/) and collect the latest News Title and Paragraph Text. Assign the text to variables that you can reference later.

In [3]:
nasa_url = 'https://mars.nasa.gov/news/'
browser.visit(nasa_url)

In [4]:
# First News title is the latest
nasa_html = browser.html
nasa_soup = bs(nasa_html, 'lxml')

In [5]:
first_article = nasa_soup.find('div',class_="list_text")
# print(first_article.prettify())
news_date = first_article.find('div', class_="list_date").text
news_title = first_article.find('div', class_="content_title").text
news_p = first_article.find('div',class_="article_teaser_body").text

print(f"Date: {news_date} \nTitle: {news_title} \nParagraph: {news_p}")

Date: December 18, 2019 
Title: NASA's Mars 2020 Rover Completes Its First Drive 
Paragraph: In a 10-plus-hour marathon, the rover steered, turned and drove in 3-foot (1-meter) increments over small ramps.


## JPL Mars Space Images - Featured Image
* Visit the url for JPL Featured Space Image [here](https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars).
* Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called `featured_image_url`.
* Make sure to find the image url to the full size `.jpg` image.
* Make sure to save a complete url string for this image.

In [6]:
img_url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(img_url)

In [7]:
img_html = browser.html
img_soup = bs(img_html, 'html.parser')

In [8]:
current_href = img_soup.find('a', class_="button fancybox")['data-fancybox-href']

featured_image_url = f'https://www.jpl.nasa.gov{current_href}'
print(f'Featured Image_url: {featured_image_url}')
# browser.visit(featured_image_url)

Featured Image_url: https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA14579_ip.jpg


## Mars Weather
* Visit the Mars Weather twitter account [here](https://twitter.com/marswxreport?lang=en) and scrape the latest Mars weather tweet from the page. Save the tweet text for the weather report as a variable called `mars_weather`.

In [9]:
twiiter_url = 'https://twitter.com/marswxreport?lang=en'
browser.visit(twiiter_url)

In [10]:
wthr_html = browser.html
wthr_soup = bs(wthr_html, 'lxml')

In [12]:
wthr_xpath = '//*[@id="stream-item-tweet-1207720064440553478"]/div[1]/div[2]/div[2]/p'
mars_weather = browser.find_by_xpath(wthr_xpath).text
print(f'Mars Weather: {mars_weather}')

Mars Weather: InSight sol 377 (2019-12-18) low -97.5ºC (-143.6ºF) high -19.9ºC (-3.9ºF)
winds from the SSE at 6.4 m/s (14.3 mph) gusting to 21.0 m/s (47.1 mph)
pressure at 6.50 hPa


## Mars Facts
* Visit the Mars Facts webpage [here](https://space-facts.com/mars/) and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.
* Use Pandas to convert the data to a HTML table string.

In [13]:
fact_url = 'https://space-facts.com/mars/'
tables = pd.read_html(fact_url)
browser.visit(fact_url)

In [14]:
mars_fact = tables[1].set_index(['Mars - Earth Comparison'])
mars_fact_html = mars_fact.to_html()

## Mars Hemispheres
* Visit the USGS Astrogeology site [here](https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars) to obtain high resolution images for each of Mar's hemispheres.
* You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.
* Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys `img_url` and `title`.
* Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

In [15]:
atro_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(atro_url)

In [16]:
products_html = browser.html
products_soup = bs(products_html, 'lxml')

products_items = products_soup.find_all('div', class_='item')

In [17]:
visit_urls = []
for i in products_items:
    link_url = i.find('a', class_="itemLink product-item")['href']
    visit_urls.append(link_url)

In [18]:
baseURL = 'https://astrogeology.usgs.gov'
titles = []
img_url = []

for visit in visit_urls:
    search_url = baseURL+visit
    browser.visit(search_url)
    hem_html = browser.html
    hemi_soup = bs(hem_html, 'lxml')
    
    hemi_url = hemi_soup.find('img', class_="wide-image")['src']
    img_url.append(baseURL+hemi_url)
    
    hemi_title = hemi_soup.find('h2',class_="title").text
    titles.append(hemi_title)

In [19]:
hemisphere_image_urls = []
for i in range(4):
    hemisphere_image_urls.append({'title': titles[i], 'img_url': img_url[i]})

hemisphere_image_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/cfa62af2557222a02478f1fcd781d445_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/3cdd1cbf5e0813bba925c9030d13b62e_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/ae209b4e408bb6c3e67b6af38168cf28_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://astrogeology.usgs.gov/cache/images/7cf2da4bf549ed01c17f206327be4db7_valles_marineris_enhanced.tif_full.jpg'}]

In [20]:
browser.quit()

## MongoDB
* Store the return value in Mongo as a Python dictionary.

In [23]:
mars_data = {'news_date': news_date,
             'news_title': news_title,
             'news_p': news_p,
             'featured_image_url': featured_image_url,
             'mars_weather': mars_weather,
             'mars_fact_html': mars_fact_html,
             'hemisphere_image_urls': hemisphere_image_urls}
mars_data

{'news_date': 'December 18, 2019',
 'news_title': "NASA's Mars 2020 Rover Completes Its First Drive",
 'news_p': 'In a 10-plus-hour marathon, the rover steered, turned and drove in 3-foot (1-meter) increments over small ramps.',
 'featured_image_url': 'https://www.jpl.nasa.gov/spaceimages/images/mediumsize/PIA14579_ip.jpg',
 'mars_weather': 'InSight sol 377 (2019-12-18) low -97.5ºC (-143.6ºF) high -19.9ºC (-3.9ºF)\nwinds from the SSE at 6.4 m/s (14.3 mph) gusting to 21.0 m/s (47.1 mph)\npressure at 6.50 hPa',
 'mars_fact_html': '<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Mars</th>\n      <th>Earth</th>\n    </tr>\n    <tr>\n      <th>Mars - Earth Comparison</th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Diameter:</th>\n      <td>6,779 km</td>\n      <td>12,742 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg</td>\n      <td>5.97 × 10^24 kg</t