# Web Scraping Challenge

In [1]:
#Import dependencies
from splinter import Browser
import pandas as pd
from bs4 import BeautifulSoup as bs
import requests
import os
import pymongo
import time

### NASA Mars News

- Scrape the NASA Mars News Site and collect the latest News Title and Paragraph Text. 
- Assign the text to variables that you can reference later.

In [2]:
#Getting the NASA news Page and parsing the file.
page = requests.get('https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest')
soup = bs(page.content, 'html.parser')
title = soup.find_all('title')
paragraph = soup.find_all('p')

#get rid of tags
title
paragraph

[<p>Managed by the Mars Exploration Program and the Jet Propulsion Laboratory for NASA’s Science Mission Directorate</p>]

### JPL Mars Space Images - Featured Image
- Visit the url for JPL Featured Space Image here.
- Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the url string to a variable called featured_image_url.
- Make sure to find the image url to the full size .jpg image.
- Make sure to save a complete url string for this image.

In [3]:
#Splinter chromedriver setup
!which chromedriver
executable_path = {'executable_path': '/usr/local/bin/chromedriver'}
browser = Browser('chrome', **executable_path, headless=False)

/usr/local/bin/chromedriver


In [4]:
url = 'https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars'
browser.visit(url)

In [5]:
time.sleep(3)

In [6]:
btn = browser.find_by_id('full_image')
btn.click()

In [7]:
more_info = browser.find_link_by_partial_text('more info')
more_info.click()



In [8]:
html = browser.html
img_soup = bs(html, 'html.parser')

In [9]:
img_url_rel = img_soup.select_one('figure.lede a img').get('src')
img_url_rel

'/spaceimages/images/largesize/PIA23354_hires.jpg'

In [10]:
img_url = f'https://jpl.nasa.gov{img_url_rel}'
img_url

'https://jpl.nasa.gov/spaceimages/images/largesize/PIA23354_hires.jpg'

### Mars Facts
Visit the Mars Facts webpage and use Pandas to scrape the table containing facts about the planet including Diameter, Mass, etc.

In [11]:
#Read website URL using pandas
url = 'https://space-facts.com/mars/'
d = pd.read_html(url)

#Converted the table to an html file
d[0].columns = ['Description', 'Values']
d[0].to_html('table.html')

### Mars Hemispheres
- Visit the USGS Astrogeology site to obtain high resolution images for each of Mar's hemispheres.
- You will need to click each of the links to the hemispheres in order to find the image url to the full resolution image.
- Save both the image url string for the full resolution hemisphere image, and the Hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys img_url and title.
- Append the dictionary with the image url string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

In [12]:
url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url)

In [13]:
time.sleep(3)

In [14]:
links = browser.find_by_css('a.product-item h3')

In [15]:
len(links)

4

In [16]:
hemisphere_image_urls = []

for i in range(len(links)):
    hemisphere = {}
    browser.find_by_css('a.product-item h3')[i].click()
    sample_elem = browser.links.find_by_text('Sample').first
    hemisphere['img_url'] = sample_elem['href']
    hemisphere['title'] = browser.find_by_css('h2.title').text
    hemisphere_image_urls.append(hemisphere)
    browser.back()
    
hemisphere_image_urls

[{'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]

In [17]:
browser.quit()