In [28]:
# Import Splinter, BeautifulSoup, and Pandas
from splinter import Browser
from bs4 import BeautifulSoup as soup
import pandas as pd
from webdriver_manager.chrome import ChromeDriverManager
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By

In [29]:
# Set up Splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

## Scrape NASA Mars News

In [3]:
# Visit the Mars news site
url = 'https://redplanetscience.com/'
browser.visit(url)

In [4]:
# Convert the browser html to a soup object
html = browser.html
news_soup = soup(html, 'html.parser')

In [5]:
#Use CSS selector
list_text = news_soup.select_one('div.list_text')
list_text.find('div', class_='content_title')
# Use the parent element to find the first a tag and save it as `news_title`
news_title = list_text.find('div', class_='content_title').get_text()
news_title

"Robotic Toolkit Added to NASA's Mars 2020 Rover"

In [6]:
# Use the parent element to find the paragraph text
news_p = list_text.find('div', class_='article_teaser_body').get_text()
news_p

"The bit carousel, which lies at the heart of the rover's Sample Caching System, is now aboard NASA's newest rover. "

## Scrape JPL Mars Space Images—Featured Image

In [7]:
url = 'https://spaceimages-mars.com/'
browser.visit(url)

In [8]:
full_image_elem = browser.find_by_tag('button')[1]
full_image_elem.click()

In [9]:
# Parse html with soup
html = browser.html
img_soup = soup(html, 'html.parser')

In [10]:
img_url_rel = img_soup.find('img', class_='headerimage fade-in').get('src')
img_url_rel

'image/featured/mars1.jpg'

In [11]:
#Store as variable
featured_image_url = f'https://spaceimages-mars.com/{img_url_rel}'
featured_image_url 

'https://spaceimages-mars.com/image/featured/mars1.jpg'

### Scrape Mars Facts

In [12]:
mars_facts_url = 'https://galaxyfacts-mars.com/'
mars_facts = pd.read_html(mars_facts_url)
mars_facts

[                         0                1                2
 0  Mars - Earth Comparison             Mars            Earth
 1                Diameter:         6,779 km        12,742 km
 2                    Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 3                   Moons:                2                1
 4       Distance from Sun:   227,943,824 km   149,598,262 km
 5          Length of Year:   687 Earth days      365.24 days
 6             Temperature:     -87 to -5 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:          2 ( Phobos & Deimos )
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC

In [13]:
# Select info we want
mars_facts[1]

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 ( Phobos & Deimos )
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [14]:
mars_facts[0]

Unnamed: 0,0,1,2
0,Mars - Earth Comparison,Mars,Earth
1,Diameter:,"6,779 km","12,742 km"
2,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
3,Moons:,2,1
4,Distance from Sun:,"227,943,824 km","149,598,262 km"
5,Length of Year:,687 Earth days,365.24 days
6,Temperature:,-87 to -5 °C,-88 to 58°C


In [15]:
df1 = mars_facts[1]
df1

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 ( Phobos & Deimos )
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [16]:
#Make top row the column names 
new_header = df1.iloc[0] 
df1.columns = new_header

## Scrape Mars Hemispheres

In [149]:
url = 'https://marshemispheres.com/'
browser.visit(url)

In [150]:
html = browser.html
hemisphere_soup = soup(html, 'html.parser')
hemispheres_all = hemisphere_soup.find_all('div', class_='description')
hemispheres_all

[<div class="description">
 <a class="itemLink product-item" href="cerberus.html">
 <h3>Cerberus Hemisphere Enhanced</h3>
 </a>
 <span class="subtitle" style="float:left">image/tiff 21 MB</span><span class="pubDate" style="float:right"></span><br/>
 <p>Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired…</p>
 </div>,
 <div class="description">
 <a class="itemLink product-item" href="schiaparelli.html">
 <h3>Schiaparelli Hemisphere Enhanced</h3>
 </a>
 <span class="subtitle" style="float:left">image/tiff 35 MB</span><span class="pubDate" style="float:right"></span><br/>
 <p>Mosaic of the Schiaparelli hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. The images were acquired in 1980 during early northern…</p>
 </div>,
 <div class="description">
 <a class="itemLink product-item" 

title = hemispheres_all.find('a', class_= 'product-item')
title

In [151]:
browser.find_by_css('a.product-item img')[0].click()
#browser.find_by_css('a.product-item img')[i].click()

In [152]:
hemisphere_soup = soup(html, 'html.parser')

In [153]:
name = hemisphere_soup.find('div', class_='wrapper')
name

<div class="wrapper">
<div class="container">
<div class="widget block bar">
<a href="https://astrogeology.usgs.gov/search" style="float:right;text-decoration:none;">
<img alt="Astropedia" src="images/astropedia-logo-main.png" style="width:200px;border:none;float:right;"/>
<div style="clear:both;font-size:.8em;float:right;color:#888;">Lunar and Planetary Cartographic Catalog</div>
</a>
<div style="float:left;height:60px;">
</div>
</div>
<div class="full-content">
<section class="block" id="results-accordian">
<div class="result-list" data-section="product" id="product-section">
<div class="accordian">
<h2>Products</h2>
<span class="count">4 Results</span>
<span class="collapse">Collapse</span>
</div>
<div class="collapsible results">
<div class="item">
<a class="itemLink product-item" href="cerberus.html"><img alt="Cerberus Hemisphere Enhanced thumbnail" class="thumb" src="images/39d3266553462198bd2fbc4d18fbed17_cerberus_enhanced.tif_thumb.png"/></a>
<div class="description">
<a class=

In [154]:
container = name.find('div', class_='container')
print(container)

<div class="container">
<div class="widget block bar">
<a href="https://astrogeology.usgs.gov/search" style="float:right;text-decoration:none;">
<img alt="Astropedia" src="images/astropedia-logo-main.png" style="width:200px;border:none;float:right;"/>
<div style="clear:both;font-size:.8em;float:right;color:#888;">Lunar and Planetary Cartographic Catalog</div>
</a>
<div style="float:left;height:60px;">
</div>
</div>
<div class="full-content">
<section class="block" id="results-accordian">
<div class="result-list" data-section="product" id="product-section">
<div class="accordian">
<h2>Products</h2>
<span class="count">4 Results</span>
<span class="collapse">Collapse</span>
</div>
<div class="collapsible results">
<div class="item">
<a class="itemLink product-item" href="cerberus.html"><img alt="Cerberus Hemisphere Enhanced thumbnail" class="thumb" src="images/39d3266553462198bd2fbc4d18fbed17_cerberus_enhanced.tif_thumb.png"/></a>
<div class="description">
<a class="itemLink product-item

In [155]:
#title2 = title.find('h2', class_='title')
title = container.find('h3').text
title

'Cerberus Hemisphere Enhanced'

In [156]:
elem = browser.links.find_by_text('Sample').first
imgurl = elem['href']
print(imgurl)

https://marshemispheres.com/images/full.jpg


In [157]:
browser.back()

In [186]:
url = 'https://marshemispheres.com/'
browser.visit(url)

In [187]:
html = browser.html
hemisphere_soup = soup(html, 'html.parser')

hemispheres_all = hemisphere_soup.find_all('div', class_='description')

titles_list = []
img_links = []
counter = 0

for x in range(4):
    browser.find_by_css('a.product-item img')[int(counter)].click()
    
#for hemisphere in hemispheres_all:
    #click on thumbnail 
    #browser.find_by_css('a.product-item img')[0].click()
    hemisphere_soup = soup(html, 'html.parser')
    #finding img title
    name = hemisphere_soup.find('div', class_='wrapper')
    container = name.find('div', class_='container')
    #get title
    title = container.find('h3').text
    titles_list.append(title)
    #click on Sample  
    elem = browser.links.find_by_text('Sample').first
    #finding image url 
    imgurl = elem['href']
    img_links.append(imgurl)
    browser.back()
    counter = counter + 1

In [188]:
titles_list

['Cerberus Hemisphere Enhanced',
 'Cerberus Hemisphere Enhanced',
 'Cerberus Hemisphere Enhanced',
 'Cerberus Hemisphere Enhanced']

In [185]:
img_links

['https://marshemispheres.com/images/full.jpg',
 'https://marshemispheres.com/images/schiaparelli_enhanced-full.jpg',
 'https://marshemispheres.com/images/syrtis_major_enhanced-full.jpg',
 'https://marshemispheres.com/images/valles_marineris_enhanced-full.jpg']