### Dependencies and start up material

In [1]:
# Dependencies
from bs4 import BeautifulSoup
import requests
import pandas as pd
import pymongo

In [2]:
# Add splinter depencies
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager

In [3]:
# Setup splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

[WDM] - Current google-chrome version is 89.0.4389
[WDM] - Get LATEST driver version for 89.0.4389


[WDM] - Get LATEST driver version for 89.0.4389
[WDM] - Trying to download new driver from https://chromedriver.storage.googleapis.com/89.0.4389.23/chromedriver_mac64.zip
[WDM] - Driver has been saved in cache [/Users/Haley/.wdm/drivers/chromedriver/mac64/89.0.4389.23]


## NASA Mars News

In [4]:
# URL of page to be scraped
url1 = 'https://mars.nasa.gov/news/'

In [5]:
# Retrieve page with the requests module
browser.visit(url1)

In [6]:
# Create BeautifulSoup object; parse with 'html.parser'
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [7]:
# Navigates to the first item in the list
content = soup.find_all('div', class_='list_text')[0]

In [8]:
# Collect the latest news title and paragraph text
title_result = content.find('div', class_="content_title")
paragraph_result = content.find('div', class_='article_teaser_body')

# Save them in variables
news_title = title_result.text.strip()
print(news_title)
news_p = paragraph_result.text.strip()
print(news_p)

NASA Ingenuity Mars Helicopter Prepares for First Flight
Now uncocooned from its protective carbon-fiber shield, the helicopter is being readied for its next steps.


## JPL Mars Space Images - Featured Image

In [9]:
# URL of page to be scraped
url2 = 'https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/index.html'
browser.visit(url2)

In [10]:
# Use splinter to navigate the site and find the image url for the current Featured Mars Image

html = browser.html
soup = BeautifulSoup(html, 'html.parser')

# Save result of image
image_url = soup.find(class_="headerimage fade-in")['src']

# Assign the url string to a variable
featured_image_url = f'{url2}/{image_url}'
print(featured_image_url)

https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/index.html/image/featured/mars1.jpg


## Mars Facts

In [11]:
# URL of page to be scraped
url3 = 'https://space-facts.com/mars/'

In [12]:
# Grab tables from the website
tables = pd.read_html(url3)

In [13]:
# Grab the first table containing facts about the planet including Diameter, Mass, etc.
df = tables[0]

In [14]:
# Clean Up DF
df = df.rename(columns={0:"Description"})
df = df.rename(columns={1:"Mars"})
df = df.set_index('Description')
df

Unnamed: 0_level_0,Mars
Description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [15]:
# Use Pandas to convert the data to a HTML table string.
mars_facts = df.to_html('mars_facts.html')

## Mars Hemispheres

In [16]:
# URL of page to be scraped
url4 = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(url4)
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [17]:
# Navigate to the section that contains the images
sidebar = soup.find('div', class_='collapsible results')
items = sidebar.find_all('div', class_='item')
hemisphere_image_urls = []

In [18]:
for item in items:
    # Error handling
    try: 
        # Find the image title
        image = item.find('div', class_='description')
        title = image.h3.text
        # Find the image url
        image_url = image.a['href']
        base_url = 'https://astrogeology.usgs.gov'
        # Make connection to full resoltuion image by navigating to link
        browser.visit(base_url+image_url)
        html = browser.html
        soup = BeautifulSoup(html, 'html.parser')
        image_src = soup.find('li').a['href']
        # Confirm results are being extracted
        print(title)
        print(image_src)
        print(' ')
        # Add title and image url to dict then add to list
        image_dict = {'title':title, 
                    'image_url':image_src}
        hemisphere_image_urls.append(image_dict)
    except Exception as e:
        print(e)

Cerberus Hemisphere Enhanced
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg
 
Schiaparelli Hemisphere Enhanced
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg
 
Syrtis Major Hemisphere Enhanced
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg
 
Valles Marineris Hemisphere Enhanced
https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg
 


### Quit Connection with Browser

In [20]:
# Quit browser
browser.quit()