In [24]:
#! pip install splinter
#! pip install webdriver_manager

Collecting webdriver_manager
  Downloading webdriver_manager-3.2.2-py2.py3-none-any.whl (16 kB)
Collecting configparser
  Downloading configparser-5.0.1-py3-none-any.whl (22 kB)
Collecting crayons
  Downloading crayons-0.4.0-py2.py3-none-any.whl (4.6 kB)
Installing collected packages: configparser, crayons, webdriver-manager
Successfully installed configparser-5.0.1 crayons-0.4.0 webdriver-manager-3.2.2


# Step 1: Scraping

In [1]:
from bs4 import BeautifulSoup
import os
import requests
from splinter import Browser
from webdriver_manager.chrome import ChromeDriverManager
import pandas as pd

## NASA Mars News

In [2]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [3]:
#open Chrome browser
url = "https://mars.nasa.gov/news"
browser.visit(url)


In [4]:
html = browser.html
soup = BeautifulSoup(html, 'html.parser')

#Find news header and parapgraph and assign to a variable
news = soup.find('ul', class_='item_list')
first = news.find('li', class_='slide')

header = first.find('div', class_='content_title').text
paragraph = first.find('div', class_='article_teaser_body').text

#Print first header and paragraph
print(header)
print(paragraph)


A Martian Roundtrip: NASA's Perseverance Rover Sample Tubes
Marvels of engineering, the rover's sample tubes must be tough enough to safely bring Red Planet samples on the long journey back to Earth in immaculate condition. 


## JPL Mars Space Images - Featured Image

In [5]:
executable_path = {'executable_path': 'chromedriver.exe'}
browser = Browser('chrome', **executable_path, headless=False)

In [6]:
#open Chrome browser for featured image
image_url = "https://www.jpl.nasa.gov/spaceimages/?search=&category=Mars"
browser.visit(image_url)

In [7]:
#click for full image
browser.click_link_by_partial_text('FULL IMAGE')



In [8]:
#click more info
browser.click_link_by_partial_text('more info')

In [9]:
#parse through webpage for img link
html = browser.html
soup = BeautifulSoup(html, 'html.parser')
feat_img = soup.find('article')
img = feat_img.find('figure', class_='lede')
img_link = img.a['href']
print(img_link)

/spaceimages/images/largesize/PIA18328_hires.jpg


In [10]:
#print full image link
main_url = 'https://www.jpl.nasa.gov'

print(main_url+img_link)

https://www.jpl.nasa.gov/spaceimages/images/largesize/PIA18328_hires.jpg


## Mars Facts


In [11]:
facts_url = "https://space-facts.com/mars/"

#parse the URL and assign to a Pandas DF
facts = pd.read_html(facts_url)
df = facts[0]

#name columns and set index
df.columns = ['Description', 'Value']
df.set_index('Description', inplace=True)

#save to HTML string
html_string = df.to_html()

html_string

'<table border="1" class="dataframe">\n  <thead>\n    <tr style="text-align: right;">\n      <th></th>\n      <th>Value</th>\n    </tr>\n    <tr>\n      <th>Description</th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>Equatorial Diameter:</th>\n      <td>6,792 km</td>\n    </tr>\n    <tr>\n      <th>Polar Diameter:</th>\n      <td>6,752 km</td>\n    </tr>\n    <tr>\n      <th>Mass:</th>\n      <td>6.39 × 10^23 kg (0.11 Earths)</td>\n    </tr>\n    <tr>\n      <th>Moons:</th>\n      <td>2 (Phobos &amp; Deimos)</td>\n    </tr>\n    <tr>\n      <th>Orbit Distance:</th>\n      <td>227,943,824 km (1.38 AU)</td>\n    </tr>\n    <tr>\n      <th>Orbit Period:</th>\n      <td>687 days (1.9 years)</td>\n    </tr>\n    <tr>\n      <th>Surface Temperature:</th>\n      <td>-87 to -5 °C</td>\n    </tr>\n    <tr>\n      <th>First Record:</th>\n      <td>2nd millennium BC</td>\n    </tr>\n    <tr>\n      <th>Recorded By:</th>\n      <td>Egyptian astronomers</td>\n    </tr>\

## Mars Hemispheres

In [12]:
#Mars Hemispheres URL 
hemi_url = 'https://astrogeology.usgs.gov/search/results?q=hemisphere+enhanced&k1=target&v1=Mars'
browser.visit(hemi_url)

In [13]:
hemi_html = browser.html

soup = BeautifulSoup(hemi_html, 'html.parser')

items = soup.find_all('div', class_='item')

img_urls = []

hemi_main = 'https://astrogeology.usgs.gov'

for i in items:
    title = i.find('h3').text
    partial_img = i.find('a', class_='itemLink product-item')['href']
    browser.visit(hemi_main + partial_img)
    partial_img_html = browser.html
    soup = BeautifulSoup(partial_img_html, 'html.parser')
    img_final = hemi_main + soup.find('img', class_='wide-image')['src']
    img_urls.append({"title" : title, "img_final" : img_final})
    
img_urls

[{'title': 'Cerberus Hemisphere Enhanced',
  'img_final': 'https://astrogeology.usgs.gov/cache/images/f5e372a36edfa389625da6d0cc25d905_cerberus_enhanced.tif_full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'img_final': 'https://astrogeology.usgs.gov/cache/images/3778f7b43bbbc89d6e3cfabb3613ba93_schiaparelli_enhanced.tif_full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'img_final': 'https://astrogeology.usgs.gov/cache/images/555e6403a6ddd7ba16ddb0e471cadcf7_syrtis_major_enhanced.tif_full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'img_final': 'https://astrogeology.usgs.gov/cache/images/b3c7c6c9138f57b4756be9b9c43e3a48_valles_marineris_enhanced.tif_full.jpg'}]

[]