## Web Scraping Assignment 

In [95]:
# import dependencies 
import pandas as pd 
from bs4 import BeautifulSoup
import requests
import pymongo
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager

### NASA Mars News 

Scrape the NASA Mars News Site and collect the latest News Title and Paragraph Text. Assign the text to variables that you can reference later.

In [97]:
# Initialize PyMongo to work with MongoDBs
conn = 'mongodb://localhost:27017'
client = pymongo.MongoClient(conn)

In [99]:
# Indicate which site we are scraping from 
nasa_url = "https://mars.nasa.gov/news/?page=0&per_page=40&order=publish_date+desc%2Ccreated_at+desc&search=&category=19%2C165%2C184%2C204&blank_scope=Latest"

In [101]:
# Retrieve page with the requests module
response = requests.get(nasa_url)

# Create BeautifulSoup object; parse with 'html.parser'
soup = BeautifulSoup(response.text, 'html.parser')

In [103]:
# Save latest news title to variable
latest_title = soup.find_all('div', class_='content_title')[0].text
print(latest_title)

latest_paragraph = soup.find_all('div', class_='rollover_description_inner')[0].text
print(latest_paragraph)



NASA's Perseverance Rover Begins Its First Science Campaign on Mars



The six-wheeled scientist is heading south to explore Jezero Crater’s lakebed in search of signs of ancient microbial life.



### JPL Mars Space Images

- Visit the url for JPL Featured Space Image.

- Use splinter to navigate the site and find the image url for the current Featured Mars Image and assign the   url string to a variable called featured_image_url.

- Make sure to find the image url to the full size .jpg image.

- Make sure to save a complete url string for this image.

In [104]:
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)



Current google-chrome version is 91.0.4472
Get LATEST driver version for 91.0.4472
Driver [/Users/ashleypatricia/.wdm/drivers/chromedriver/mac64/91.0.4472.101/chromedriver] found in cache


In [105]:
url = "https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/index.html"
image_url = "https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/"
browser.visit(url)

In [106]:
 # HTML object
html = browser.html
# Parse HTML with Beautiful Soup
soup = BeautifulSoup(html, 'html.parser')
# Find the featured image (use inspect to get the class) 
image = soup.find_all('img', class_='headerimage fade-in')[0]['src']
image

'image/featured/mars3.jpg'

In [107]:
# Add url with 'image' variable 
final_img = image_url + image
final_img

'https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/image/featured/mars3.jpg'

### Mars Facts

- Visit the Mars Facts webpage here and use Pandas to scrape the table containing facts about the planet        including Diameter, Mass, etc.

- Use Pandas to convert the data to a HTML table string.

In [110]:
# Url that we are scraping
facts_url = "https://space-facts.com/mars/"
browser.visit(facts_url)
# Read the site with pandas 
mars_facts = pd.read_html(facts_url)
mars_facts

[                      0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:            2 (Phobos & Deimos)
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC
 8          Recorded By:           Egyptian astronomers,
   Mars - Earth Comparison             Mars            Earth
 0               Diameter:         6,779 km        12,742 km
 1                   Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 2                  Moons:                2                1
 3      Distance from Sun:   227,943,824 km   149,598,262 km
 4         Length of Year:   687 Earth days      365.24 days
 5            Temperature:     -87 to -5 °C      -88 to 58°C,
           

In [112]:
# Make into a table that is more readable 
facts_table = mars_facts[0]
facts_table

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [115]:
# Change column names 
facts_table.columns = ["Aspect of Mars", "Value"]
facts_table

Unnamed: 0,Aspect of Mars,Value
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [119]:
# Read table to HTML
html_table = facts_table.to_html()
print(html_table)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>Aspect of Mars</th>
      <th>Value</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>0</th>
      <td>Equatorial Diameter:</td>
      <td>6,792 km</td>
    </tr>
    <tr>
      <th>1</th>
      <td>Polar Diameter:</td>
      <td>6,752 km</td>
    </tr>
    <tr>
      <th>2</th>
      <td>Mass:</td>
      <td>6.39 × 10^23 kg (0.11 Earths)</td>
    </tr>
    <tr>
      <th>3</th>
      <td>Moons:</td>
      <td>2 (Phobos &amp; Deimos)</td>
    </tr>
    <tr>
      <th>4</th>
      <td>Orbit Distance:</td>
      <td>227,943,824 km (1.38 AU)</td>
    </tr>
    <tr>
      <th>5</th>
      <td>Orbit Period:</td>
      <td>687 days (1.9 years)</td>
    </tr>
    <tr>
      <th>6</th>
      <td>Surface Temperature:</td>
      <td>-87 to -5 °C</td>
    </tr>
    <tr>
      <th>7</th>
      <td>First Record:</td>
      <td>2nd millennium BC</td>
    </tr>
    <tr>
      <th>8</th>
 

### Mars Hemispheres

- Visit the USGS Astrogeology site here to obtain high resolution images for each of Mar's hemispheres.

- You will need to click each of the links to the hemispheres in order to find the image url to the full        resolution image.

- Save both the image url string for the full resolution hemisphere image, and the Hemisphere title             containing the hemisphere name. Use a Python dictionary to store the data using the keys img_url and title.

- Append the dictionary with the image url string and the hemisphere title to a list. This list will contain    one dictionary for each hemisphere.

In [140]:
# Url that we are scraping
USGSimage_url = "https://astrogeology.usgs.gov" 
USGS_url = "https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars"

browser.visit(USGS_url)
html = browser.html
soup = BeautifulSoup(response.text, 'html.parser')

In [146]:
# Find all hemisphere info and images (use inspect to get the class) 
allhemispheres = soup.find("div", class_="collapsible results")
# Find the items within the collapsible results (each hemisphere)
each_hemisphere = soup.find_all('div', class_ = 'item')

hemispheres_list = []