In [1]:
# Import Splinter, BeautifulSoup, and Pandas
from splinter import Browser
from bs4 import BeautifulSoup
import pandas as pd
from webdriver_manager.chrome import ChromeDriverManager
import requests
import pymongo
import pathlib
import time

In [2]:
# Path to chromedriver

In [3]:
# Set the executable path and initialize the chrome browser in splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)



Current google-chrome version is 96.0.4664
Get LATEST driver version for 96.0.4664
Get LATEST driver version for 96.0.4664
Trying to download new driver from https://chromedriver.storage.googleapis.com/96.0.4664.45/chromedriver_mac64.zip
Driver has been saved in cache [/Users/vanessaandjaviervillagomez/.wdm/drivers/chromedriver/mac64/96.0.4664.45]


## Visit the NASA mars news site

In [4]:
# Visit the mars nasa news site
nasa_url = "https://mars.nasa.gov/news/"
browser.visit(nasa_url)
# Optional delay for loading the page
time.sleep(2)

In [5]:
# Convert the browser html to a soup object and then quit the browser
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
browser.quit()

In [6]:
# .find() the content title and save it as `news_title`
# latest news as of December 15, 2021 - "NASA's Ingenuity Mars Helicopter Reaches a Total of 30 Minutes Aloft"
latest_news = soup.find('li', class_='slide')
news_title = latest_news.find('div', class_='content_title').text
news_title


"NASA-JPL's 'On a Mission' Podcast New Season Rolls Out With Mars Rovers"

In [7]:
# .find() the paragraph text
nasa_paragraph = latest_news.find('div', class_='article_teaser_body').text
nasa_paragraph

'With the first episode available now, Season Four shares the personal stories of the people who’ve helped put NASA’s six-wheeled explorers on the Red Planet.   '

## JPL Space Images Featured Image

In [8]:
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)



Current google-chrome version is 96.0.4664
Get LATEST driver version for 96.0.4664
Driver [/Users/vanessaandjaviervillagomez/.wdm/drivers/chromedriver/mac64/96.0.4664.45/chromedriver] found in cache


In [9]:
# Visit JPL space images Mars URL 
jpl_url = "https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/index.html"
browser.visit(jpl_url)


In [10]:
# Find and click the full image button
image_btn = browser.links.find_by_partial_text('FULL IMAGE').click()

In [11]:
# Find the more info button and click that
#browser.links.find_by_partial_text('more info').click()
#no elements could be found with link by partial text "more info"

In [12]:
# Parse the resulting html with soup
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [13]:
# find the relative image url
relative_image = soup.find('img', class_='headerimage fade-in')['src']
relative_image

'image/featured/mars2.jpg'

In [14]:
# Use the base url to create an absolute url
featured_image_url = 'https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/'+relative_image
featured_image_url


'https://data-class-jpl-space.s3.amazonaws.com/JPL_Space/image/featured/mars2.jpg'

In [15]:
browser.quit()

## Mars Facts

In [16]:
# Create a dataframe from the space-facts.com mars page
mars_url = 'https://space-facts.com/mars/'

mars_df = pd.read_html(mars_url)[0]
mars_df

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 (Phobos & Deimos)
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [17]:
# clean the dataframe and export to HTML
mars_df.columns=['description', 'value']
mars_df.set_index('description', inplace=True)
mars_df

Unnamed: 0_level_0,value
description,Unnamed: 1_level_1
Equatorial Diameter:,"6,792 km"
Polar Diameter:,"6,752 km"
Mass:,6.39 × 10^23 kg (0.11 Earths)
Moons:,2 (Phobos & Deimos)
Orbit Distance:,"227,943,824 km (1.38 AU)"
Orbit Period:,687 days (1.9 years)
Surface Temperature:,-87 to -5 °C
First Record:,2nd millennium BC
Recorded By:,Egyptian astronomers


In [18]:
mars_html_table = mars_df.to_html("app/templates/mars_facts.html", index=True)


## Mars Hemispheres

In [19]:
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)



Current google-chrome version is 96.0.4664
Get LATEST driver version for 96.0.4664
Driver [/Users/vanessaandjaviervillagomez/.wdm/drivers/chromedriver/mac64/96.0.4664.45/chromedriver] found in cache


In [20]:
# visit the USGS astrogeology page for hemisphere data from Mars
hemis_url = 'https://marshemispheres.com/'
browser.visit(hemis_url)


In [21]:
# Convert the browser html to a soup object and then quit the browser
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

In [22]:
# First, get a list of all of the hemispheres
img_titles = soup.find_all('h3')
img_titles

[<h3>Cerberus Hemisphere Enhanced</h3>,
 <h3>Schiaparelli Hemisphere Enhanced</h3>,
 <h3>Syrtis Major Hemisphere Enhanced</h3>,
 <h3>Valles Marineris Hemisphere Enhanced</h3>,
 <h3>Back</h3>]

In [23]:
# Next, loop through those links, click the link, find the sample anchor, return the href
hemisphere_image_urls = []

for i in range(len(img_titles)):
    
    #python dictionary to store the data
    hemispheres = {}
    
    # We have to find the elements on each loop to avoid a stale element exception
    img_title = soup.find_all('h3')[i].text
    
    # Next, we find the Sample image anchor tag and extract the href
    sample_img = browser.links.find_by_text('Sample').first
    hemisphers['img_url'] = sample_img['href']
    
    html = browser.html
    soup = BeautifulSoup(html, 'html.parser')
    
    # Get Hemisphere title
    hemispheres['title'] = browser.find_by_css('h3.title').text
    
    # Append hemisphere object to list
    #hemispheres['img_url'] = hemis_img_url
    #hemis_image.append(hemispheres)
    
    # Finally, we navigate backwards with browser.back()
    #browser.back()
    

ElementDoesNotExist: no elements could be found with link by text "Sample"

In [None]:
# view the hemisphere urls to make sure they look good


In [20]:
# view the hemisphere urls to make sure they look good


[{'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/cerberus_enhanced.tif/full.jpg',
  'title': 'Cerberus Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/schiaparelli_enhanced.tif/full.jpg',
  'title': 'Schiaparelli Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/syrtis_major_enhanced.tif/full.jpg',
  'title': 'Syrtis Major Hemisphere Enhanced'},
 {'img_url': 'https://astropedia.astrogeology.usgs.gov/download/Mars/Viking/valles_marineris_enhanced.tif/full.jpg',
  'title': 'Valles Marineris Hemisphere Enhanced'}]

In [None]:
browser.quit()