In [28]:
# Import Splinter, BeautifulSoup, and Pandas
from splinter import Browser
from bs4 import BeautifulSoup as soup
import pandas as pd
from webdriver_manager.chrome import ChromeDriverManager
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By

In [29]:
# Set up Splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

## Scrape NASA Mars News

In [3]:
# Visit the Mars news site
url = 'https://redplanetscience.com/'
browser.visit(url)

In [4]:
# Convert the browser html to a soup object
html = browser.html
news_soup = soup(html, 'html.parser')

In [5]:
#Use CSS selector
list_text = news_soup.select_one('div.list_text')
list_text.find('div', class_='content_title')
# Use the parent element to find the first a tag and save it as `news_title`
news_title = list_text.find('div', class_='content_title').get_text()
news_title

"Robotic Toolkit Added to NASA's Mars 2020 Rover"

In [6]:
# Use the parent element to find the paragraph text
news_p = list_text.find('div', class_='article_teaser_body').get_text()
news_p

"The bit carousel, which lies at the heart of the rover's Sample Caching System, is now aboard NASA's newest rover. "

## Scrape JPL Mars Space Images—Featured Image

In [7]:
url = 'https://spaceimages-mars.com/'
browser.visit(url)

In [8]:
full_image_elem = browser.find_by_tag('button')[1]
full_image_elem.click()

In [9]:
# Parse html with soup
html = browser.html
img_soup = soup(html, 'html.parser')

In [10]:
img_url_rel = img_soup.find('img', class_='headerimage fade-in').get('src')
img_url_rel

'image/featured/mars1.jpg'

In [11]:
#Store as variable
featured_image_url = f'https://spaceimages-mars.com/{img_url_rel}'
featured_image_url 

'https://spaceimages-mars.com/image/featured/mars1.jpg'

### Scrape Mars Facts

In [12]:
mars_facts_url = 'https://galaxyfacts-mars.com/'
mars_facts = pd.read_html(mars_facts_url)
mars_facts

[                         0                1                2
 0  Mars - Earth Comparison             Mars            Earth
 1                Diameter:         6,779 km        12,742 km
 2                    Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 3                   Moons:                2                1
 4       Distance from Sun:   227,943,824 km   149,598,262 km
 5          Length of Year:   687 Earth days      365.24 days
 6             Temperature:     -87 to -5 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:          2 ( Phobos & Deimos )
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC

In [13]:
# Select info we want
mars_facts[1]

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 ( Phobos & Deimos )
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [14]:
mars_facts[0]

Unnamed: 0,0,1,2
0,Mars - Earth Comparison,Mars,Earth
1,Diameter:,"6,779 km","12,742 km"
2,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
3,Moons:,2,1
4,Distance from Sun:,"227,943,824 km","149,598,262 km"
5,Length of Year:,687 Earth days,365.24 days
6,Temperature:,-87 to -5 °C,-88 to 58°C


In [15]:
df1 = mars_facts[1]
df1

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 ( Phobos & Deimos )
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [16]:
#Make top row the column names 
new_header = df1.iloc[0] 
df1.columns = new_header

## Scrape Mars Hemispheres

In [30]:
url = 'https://marshemispheres.com/'
browser.visit(url)

In [31]:
html = browser.html
hemisphere_soup = soup(html, 'html.parser')
hemispheres_all = hemisphere_soup.find_all('div', class_='description')
hemispheres_all

[<div class="description">
 <a class="itemLink product-item" href="cerberus.html">
 <h3>Cerberus Hemisphere Enhanced</h3>
 </a>
 <span class="subtitle" style="float:left">image/tiff 21 MB</span><span class="pubDate" style="float:right"></span><br/>
 <p>Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired…</p>
 </div>,
 <div class="description">
 <a class="itemLink product-item" href="schiaparelli.html">
 <h3>Schiaparelli Hemisphere Enhanced</h3>
 </a>
 <span class="subtitle" style="float:left">image/tiff 35 MB</span><span class="pubDate" style="float:right"></span><br/>
 <p>Mosaic of the Schiaparelli hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. The images were acquired in 1980 during early northern…</p>
 </div>,
 <div class="description">
 <a class="itemLink product-item" 

In [36]:
all_hems = hemisphere_soup.find_all('div', class_='collapsible results')
all_hems

[<div class="collapsible results">
 <div class="item">
 <a class="itemLink product-item" href="cerberus.html"><img alt="Cerberus Hemisphere Enhanced thumbnail" class="thumb" src="images/39d3266553462198bd2fbc4d18fbed17_cerberus_enhanced.tif_thumb.png"/></a>
 <div class="description">
 <a class="itemLink product-item" href="cerberus.html">
 <h3>Cerberus Hemisphere Enhanced</h3>
 </a>
 <span class="subtitle" style="float:left">image/tiff 21 MB</span><span class="pubDate" style="float:right"></span><br/>
 <p>Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired…</p>
 </div>
 <!-- end description -->
 </div>
 <div class="item">
 <a class="itemLink product-item" href="schiaparelli.html"><img alt="Schiaparelli Hemisphere Enhanced thumbnail" class="thumb" src="images/08eac6e22c07fb1fe72223a79252de20_schiaparelli_enhanced.tif_thumb.png"/></a>
 <div c

get the LINKS (div class description > a href) 
click the link

image: (div class wide-image-wrapper > img > src)

You will need to click each of the links to the hemispheres in order to find the image URL to the full-resolution image.


Save the image URL string for the full resolution hemisphere image and the hemisphere title containing the hemisphere name. Use a Python dictionary to store the data using the keys img_url and title.

In [39]:
# for hemisphere in all_hems:
#     description = hemisphere.find('a')
#     link = description.find('href')
#     browser.links.find_by_partial_href('.html').click()

    
#     title = img.find('alt')
#     print('----------')
#     print(title)
#     print(link)
#     print('https://marshemispheres.com/' + link)
    
for hemisphere in all_hems:
      # hem = hemisphere_all.find('div')
    img = all_hems.find('img').text
    link = img.find('src')
    title = img.find('alt')
    print('----------')
    print(title)
    print('https://marshemispheres.com/' + link)

AttributeError: ResultSet object has no attribute 'find'. You're probably treating a list of elements like a single element. Did you call find_all() when you meant to call find()?

In [70]:
hemisphere_soup.find_all('div', class_='item')

[<div class="item">
 <a class="itemLink product-item" href="cerberus.html"><img alt="Cerberus Hemisphere Enhanced thumbnail" class="thumb" src="images/39d3266553462198bd2fbc4d18fbed17_cerberus_enhanced.tif_thumb.png"/></a>
 <div class="description">
 <a class="itemLink product-item" href="cerberus.html">
 <h3>Cerberus Hemisphere Enhanced</h3>
 </a>
 <span class="subtitle" style="float:left">image/tiff 21 MB</span><span class="pubDate" style="float:right"></span><br/>
 <p>Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired…</p>
 </div>
 <!-- end description -->
 </div>,
 <div class="item">
 <a class="itemLink product-item" href="schiaparelli.html"><img alt="Schiaparelli Hemisphere Enhanced thumbnail" class="thumb" src="images/08eac6e22c07fb1fe72223a79252de20_schiaparelli_enhanced.tif_thumb.png"/></a>
 <div class="description">
 <a class="ite

In [35]:
for x in range(4):
    html = browser.html
    hemisphere_soup = soup(html, 'html.parser')
    hemispheres_all = hemisphere_soup.find_all('div', class_='collapsible results')
    
    
    for hemisphere in hemispheres_all:
       # hem = hemisphere_all.find('div')
        img = hemisphere.find('img')
        link = img.find('src')
        title = img.find('alt')
        print('----------')
        print(title)
        print('https://marshemispheres.com/' + link)

----------
None


TypeError: can only concatenate str (not "NoneType") to str

In [40]:
html = browser.html 
hemisphere_soup = soup(html, 'html.parser')

In [58]:
#soup.find_all('div', class_='collapsible results')
hemisphere_soup.find_all('div', class_='item')

[<div class="item">
 <a class="itemLink product-item" href="cerberus.html"><img alt="Cerberus Hemisphere Enhanced thumbnail" class="thumb" src="images/39d3266553462198bd2fbc4d18fbed17_cerberus_enhanced.tif_thumb.png"/></a>
 <div class="description">
 <a class="itemLink product-item" href="cerberus.html">
 <h3>Cerberus Hemisphere Enhanced</h3>
 </a>
 <span class="subtitle" style="float:left">image/tiff 21 MB</span><span class="pubDate" style="float:right"></span><br/>
 <p>Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired…</p>
 </div>
 <!-- end description -->
 </div>,
 <div class="item">
 <a class="itemLink product-item" href="schiaparelli.html"><img alt="Schiaparelli Hemisphere Enhanced thumbnail" class="thumb" src="images/08eac6e22c07fb1fe72223a79252de20_schiaparelli_enhanced.tif_thumb.png"/></a>
 <div class="description">
 <a class="ite

In [33]:
hemisphere_im1 = hemisphere_soup.find('img', class_='thumb').get('src')
hemisphere_im1

'images/39d3266553462198bd2fbc4d18fbed17_cerberus_enhanced.tif_thumb.png'

In [42]:
hem_url = f'{url}{hemisphere_im1}'
print(hem_url)

https://marshemispheres.com/images/39d3266553462198bd2fbc4d18fbed17_cerberus_enhanced.tif_thumb.png


In [43]:
list1[0]

<img alt="Cerberus Hemisphere Enhanced thumbnail" class="thumb" src="images/39d3266553462198bd2fbc4d18fbed17_cerberus_enhanced.tif_thumb.png"/>

In [57]:
image_div = hemisphere_soup.find_all('div', class_='item')
print(image_div)

[<div class="item">
<a class="itemLink product-item" href="cerberus.html"><img alt="Cerberus Hemisphere Enhanced thumbnail" class="thumb" src="images/39d3266553462198bd2fbc4d18fbed17_cerberus_enhanced.tif_thumb.png"/></a>
<div class="description">
<a class="itemLink product-item" href="cerberus.html">
<h3>Cerberus Hemisphere Enhanced</h3>
</a>
<span class="subtitle" style="float:left">image/tiff 21 MB</span><span class="pubDate" style="float:right"></span><br/>
<p>Mosaic of the Cerberus hemisphere of Mars projected into point perspective, a view similar to that which one would see from a spacecraft. This mosaic is composed of 104 Viking Orbiter images acquired…</p>
</div>
<!-- end description -->
</div>, <div class="item">
<a class="itemLink product-item" href="schiaparelli.html"><img alt="Schiaparelli Hemisphere Enhanced thumbnail" class="thumb" src="images/08eac6e22c07fb1fe72223a79252de20_schiaparelli_enhanced.tif_thumb.png"/></a>
<div class="description">
<a class="itemLink product-

In [36]:
hemisphere_im1 = hemisphere_soup.find('img', class_='thumb').get('src')
hemisphere_im1

'images/39d3266553462198bd2fbc4d18fbed17_cerberus_enhanced.tif_thumb.png'

In [None]:
hemisphere_im = browser.find_by_tag('img')
hemisphere_im.click()

In [None]:
hemisphere_image_urls = []
