# Web Scraping Challenge

In [5]:
#Import dependencies
from splinter import Browser
import pandas as pd
from bs4 import BeautifulSoup as bs
import requests
import os
import pymongo
import time

In [6]:
from scrape_mars import scrape

In [7]:
data = scrape()



In [8]:
data

{'title': [<title>News  – NASA’s Mars Exploration Program </title>],
 'paragraph': [<p>Managed by the Mars Exploration Program and the Jet Propulsion Laboratory for NASA’s Science Mission Directorate</p>],
 'featured_img': 'https://jpl.nasa.gov/spaceimages/images/largesize/PIA12831_hires.jpg',
 'facts': None,
 'hemi': [{'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
   'title': 'Cerberus Hemisphere Enhanced'},
  {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
   'title': 'Schiaparelli Hemisphere Enhanced'},
  {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
   'title': 'Syrtis Major Hemisphere Enhanced'},
  {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg',
   'title': 'Valles Marineris Hemisphere Enhanced'}]}

### NASA Mars News

- Scrape the NASA Mars News Site and collect the latest News Title and Paragraph Text. 
- Assign the text to variables that you can reference later.

In [17]:
#Getting the NASA news Page and parsing the file.
page = requests.get('https://mars.nasa.gov/news/')
soup = bs(page.content, 'html.parser')
title = soup.find_all('title')[0].text
paragraph = soup.find_all('p')[0].text

#get rid of tags
title
paragraph

'Managed by the Mars Exploration Program and the Jet Propulsion Laboratory for NASA’s Science Mission Directorate'

### JPL Mars Space Images - Featured Image
- Visit the url for JPL Featured Space Image here.
- Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called featured_image_url.
- Make sure to find the image url to the full size .jpg image.
- Make sure to save a complete url string for this image.

In [16]:
#Splinter chromedriver setup
!which chromedriver
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

/usr/local/bin/chromedriver


In [17]:
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [18]:
time.sleep(3)

In [7]:
btn = browser.find_by_id('full_image')
btn.click()

In [8]:
more_info = browser.find_link_by_partial_text('more info')
more_info.click()



In [9]:
html = browser.html
img_soup = bs(html, 'html.parser')

In [10]:
img_url_rel = img_soup.select_one('figure.lede a img').get('src')
img_url_rel

'/spaceimages/images/largesize/PIA19346_hires.jpg'

In [11]:
img_url = f'https://jpl.nasa.gov{img_url_rel}'
img_url

'https://jpl.nasa.gov/spaceimages/images/largesize/PIA19346_hires.jpg'

### Mars Facts
Visit the Mars Facts webpage and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.

In [22]:
#Read website URL using pandas
url = 'https://space-facts.com/mars/'
d = pd.read_html(url)

#Converted the table to an html file
d[0].columns = ['Description', 'Values']
d[0].to_html('table.html')

d[0]

Unnamed: 0,Description,Values
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


### Mars Hemispheres
- Visit the USGS Astrogeology site to obtain high resolution images for each of Mar's hemispheres.
- You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.
- Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys img_url and title.
- Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

In [13]:
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)

In [14]:
time.sleep(3)

In [15]:
links = browser.find_by_css('a.product-item h3')

In [15]:
len(links)

4

In [16]:
hemisphere_image_urls = []

for i in range(len(links)):
    hemisphere = {}
    browser.find_by_css('a.product-item h3')[i].click()
    sample_elem = browser.links.find_by_text('Sample').first
    hemisphere['img_url'] = sample_elem['href']
    hemisphere['title'] = browser.find_by_css('h2.title').text
    hemisphere_image_urls.append(hemisphere)
    browser.back()
    
hemisphere_image_urls

[{'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]

In [17]:
browser.quit()