# Module 12 Challenge: Web Scraping - Mission to Mars
#### by Rosie Gianan

In [1]:
from splinter import Browser
from bs4 import BeautifulSoup 
from webdriver_manager.chrome import ChromeDriverManager

In [2]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}

### NASA Mars News
* Scrape the [Mars News Site](https://redplanetscience.com/) and collect the latest News Title and Paragraph Text.

In [3]:
# Visit Mars News Site
browser  = Browser('chrome', **executable_path, headless=False)
news_url = 'https://redplanetscience.com/'        
browser.visit(news_url)

# Convert the browser html to a soup object and then quit the browser
news_html = browser.html
news_soup = BeautifulSoup(news_html, 'html.parser')
browser.quit()

In [4]:
# Use find_all method to search for items by class name 

# Search for news titles and news text
news_texts = news_soup.find_all('div', class_='list_text')

# Save the latest news (first element in class_='list_text' bs4.element.ResultSet
latest_news = news_texts[0]

# Search for the latest news_tile and news_p (from class 'bs4.element.Tag')
news_title = latest_news.find('div', class_='content_title').text
news_p     = latest_news.find('div', class_='article_teaser_body').text

print ("==================================================================================================")
print ("=  Latests scraped news                                                                          =")
print ("==================================================================================================")
print (f"news_title: {news_title}")
print (f"news_p    : {news_p}")

=  Latests scraped news                                                                          =
news_title: NASA Wins Two Emmy Awards for Interactive Mission Coverage
news_p    : NASA-JPL's coverage of the Mars InSight landing earns one of the two wins, making this the NASA center's second Emmy.


### JPL Mars Space Images—Featured Image
* Visit the URL for the [Featured Space Image site](https://spaceimages-mars.com).
* Find the image URL for the current Featured Mars Image

In [5]:
# Visit Featured Space Image Site
browser   = Browser('chrome', **executable_path, headless=False)
image_url = 'https://spaceimages-mars.com'
browser.visit(image_url)

# Convert the browser html to a soup object and then quit the browser
image_html = browser.html
image_soup = BeautifulSoup(image_html, 'html.parser')
browser.quit()

In [6]:
# Search for the featured image and news text
image_srcs = image_soup.find_all('div', class_='floating_text_area')

# Loop through list of image_srcs
for image_src in image_srcs:
    
    try: 
        # Find the URL for the current Featured Mars Image
        image_filename = image_src.a['href']
        
        # Save the complete URL String
        featured_image_url = f"{image_url}/{image_filename}"
        print (f"URL for the current Featured Mars Image: {featured_image_url}")
        
    except AttributeError as e:
        print(e)

URL for the current Featured Mars Image: https://spaceimages-mars.com/image/featured/mars3.jpg


### Mars Facts
* Visit the [Mars Facts webpage](https://galaxyfacts-mars.com) 
* Use Pandas to scrape the table containing facts about the planet including diameter, mass, etc.
* Use Pandas to convert the data to a HTML table string.

In [7]:
import pandas as pd

# Visit Mars Facts webpage
browser   = Browser('chrome', **executable_path, headless=False)
facts_url = 'https://galaxyfacts-mars.com'
browser.visit(facts_url)

# Convert the browser html to a soup object and then quit the browser
facts_html = browser.html
facts_soup = BeautifulSoup(facts_html, 'html.parser')

browser.quit()

In [8]:
# Scrape the table containing facts about the planet including diameter, mass, etc.
tables = pd.read_html(facts_url)

# # Save the table with Mars facts data
facts_df         = tables[0]
facts_df.columns = ['Description', 'Mars','Earth']
facts_df         = facts_df.set_index('Description') 

facts_df

Unnamed: 0_level_0,Mars,Earth
Description,Unnamed: 1_level_1,Unnamed: 2_level_1
Mars - Earth Comparison,Mars,Earth
Diameter:,"6,779 km","12,742 km"
Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
Moons:,2,1
Distance from Sun:,"227,943,824 km","149,598,262 km"
Length of Year:,687 Earth days,365.24 days
Temperature:,-87 to -5 °C,-88 to 58°C


In [9]:
# Convert the mars fact df to a HTML table string
facts_html_string = facts_df.to_html()
print(facts_html_string)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th></th>
      <th>Mars</th>
      <th>Earth</th>
    </tr>
    <tr>
      <th>Description</th>
      <th></th>
      <th></th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <th>Mars - Earth Comparison</th>
      <td>Mars</td>
      <td>Earth</td>
    </tr>
    <tr>
      <th>Diameter:</th>
      <td>6,779 km</td>
      <td>12,742 km</td>
    </tr>
    <tr>
      <th>Mass:</th>
      <td>6.39 × 10^23 kg</td>
      <td>5.97 × 10^24 kg</td>
    </tr>
    <tr>
      <th>Moons:</th>
      <td>2</td>
      <td>1</td>
    </tr>
    <tr>
      <th>Distance from Sun:</th>
      <td>227,943,824 km</td>
      <td>149,598,262 km</td>
    </tr>
    <tr>
      <th>Length of Year:</th>
      <td>687 Earth days</td>
      <td>365.24 days</td>
    </tr>
    <tr>
      <th>Temperature:</th>
      <td>-87 to -5 °C</td>
      <td>-88 to 58°C</td>
    </tr>
  </tbody>
</table>


### Mars Hemispheres
* Visit the [astrogeology site](https://marshemispheres.com/) to obtain high-resolution images for each hemisphere of Mars.
* Find the image URL to the full-resolution image.
* Save the image URL string for the full resolution hemisphere image and the hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys `img_url` and `title`.
* Append the dictionary with the image URL string and the hemisphere title to a list. This list will contain one dictionary for each hemisphere.

In [10]:
# Visit astrogeology site
browser        = Browser('chrome', **executable_path, headless=False)
hemisphere_url = 'https://marshemispheres.com/'
browser.visit(hemisphere_url)

# Convert the browser html to a soup object and then quit the browser
hemisphere_html = browser.html
hemisphere_soup = BeautifulSoup(hemisphere_html, 'html.parser')

browser.quit()

In [11]:
# Search the item by class name
item_hemisphere_images = hemisphere_soup.find_all('div', class_="item")

# Loop through item_hemisphere_images and save the title and URL in a dictionary
mars_hemisphere_images_urls = []

for item in item_hemisphere_images:
    
    try:
        mars_hemispheres_images = {}
        # --------------------------------------------------------------------
        # Search for image title
        img_title_h3 = item.find_all('h3')
        img_title    = img_title_h3[0].text
        # --------------------------------------------------------------------
        # Search for the image url
        a     = item.find_all('a')
        a_url = a[0]['href']

        # Save the complete image url
        img_url = f"{hemisphere_url}{ a_url}"
        # --------------------------------------------------------------------
        # Search for the img src
        img_src = item.img['src']
       
        # Save the complete img src url 
        img_src_url = f"{hemisphere_url}{img_src}"
        # -------------------------------------------------------------------- 
        # Save the dictionary of image title and url into a list
        mars_hemispheres_images['title']       =  img_title   
        mars_hemispheres_images['img_url']     =  img_url 
        mars_hemispheres_images['img_src_url'] =  img_src_url 
        mars_hemisphere_images_urls.append(mars_hemispheres_images)
        
    except AttributeError as e:
        print(e)
        
mars_hemisphere_images_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/cerberus.html',
  'img_src_url': 'https://marshemispheres.com/images/39d3266553462198bd2fbc4d18fbed17_cerberus_enhanced.tif_thumb.png'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/schiaparelli.html',
  'img_src_url': 'https://marshemispheres.com/images/08eac6e22c07fb1fe72223a79252de20_schiaparelli_enhanced.tif_thumb.png'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/syrtis.html',
  'img_src_url': 'https://marshemispheres.com/images/55a0a1e2796313fdeafb17c35925e8ac_syrtis_major_enhanced.tif_thumb.png'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_url': 'https://marshemispheres.com/valles.html',
  'img_src_url': 'https://marshemispheres.com/images/4e59980c1c57f89c680c0e1ccabbeff1_valles_marineris_enhanced.tif_thumb.png'}]