In [286]:
# Import Splinter, BeautifulSoup, and Pandas
from splinter import Browser
from bs4 import BeautifulSoup as soup
import pandas as pd
from webdriver_manager.chrome import ChromeDriverManager
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.by import By

In [287]:
# Set up Splinter
executable_path = {'executable_path': ChromeDriverManager().install()}
browser = Browser('chrome', **executable_path, headless=False)

[WDM] - Downloading: 100%|████████████████████████████████████████████████████████| 6.68M/6.68M [00:00<00:00, 12.3MB/s]


## Scrape NASA Mars News

In [3]:
# Visit the Mars news site
url = 'https://redplanetscience.com/'
browser.visit(url)

In [4]:
# Convert the browser html to a soup object
html = browser.html
news_soup = soup(html, 'html.parser')

In [5]:
#Use CSS selector
list_text = news_soup.select_one('div.list_text')
list_text.find('div', class_='content_title')
# Use the parent element to find the first a tag and save it as `news_title`
news_title = list_text.find('div', class_='content_title').get_text()
news_title

"Robotic Toolkit Added to NASA's Mars 2020 Rover"

In [6]:
# Use the parent element to find the paragraph text
news_p = list_text.find('div', class_='article_teaser_body').get_text()
news_p

"The bit carousel, which lies at the heart of the rover's Sample Caching System, is now aboard NASA's newest rover. "

## Scrape JPL Mars Space Images—Featured Image

In [7]:
url = 'https://spaceimages-mars.com/'
browser.visit(url)

In [8]:
full_image_elem = browser.find_by_tag('button')[1]
full_image_elem.click()

In [9]:
# Parse html with soup
html = browser.html
img_soup = soup(html, 'html.parser')

In [10]:
img_url_rel = img_soup.find('img', class_='headerimage fade-in').get('src')
img_url_rel

'image/featured/mars1.jpg'

In [11]:
#Store as variable
featured_image_url = f'https://spaceimages-mars.com/{img_url_rel}'
featured_image_url 

'https://spaceimages-mars.com/image/featured/mars1.jpg'

### Scrape Mars Facts

In [12]:
mars_facts_url = 'https://galaxyfacts-mars.com/'
mars_facts = pd.read_html(mars_facts_url)
mars_facts

[                         0                1                2
 0  Mars - Earth Comparison             Mars            Earth
 1                Diameter:         6,779 km        12,742 km
 2                    Mass:  6.39 × 10^23 kg  5.97 × 10^24 kg
 3                   Moons:                2                1
 4       Distance from Sun:   227,943,824 km   149,598,262 km
 5          Length of Year:   687 Earth days      365.24 days
 6             Temperature:     -87 to -5 °C      -88 to 58°C,
                       0                              1
 0  Equatorial Diameter:                       6,792 km
 1       Polar Diameter:                       6,752 km
 2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
 3                Moons:          2 ( Phobos & Deimos )
 4       Orbit Distance:       227,943,824 km (1.38 AU)
 5         Orbit Period:           687 days (1.9 years)
 6  Surface Temperature:                   -87 to -5 °C
 7         First Record:              2nd millennium BC

In [13]:
# Select info we want
mars_facts[1]

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 ( Phobos & Deimos )
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [14]:
mars_facts[0]

Unnamed: 0,0,1,2
0,Mars - Earth Comparison,Mars,Earth
1,Diameter:,"6,779 km","12,742 km"
2,Mass:,6.39 × 10^23 kg,5.97 × 10^24 kg
3,Moons:,2,1
4,Distance from Sun:,"227,943,824 km","149,598,262 km"
5,Length of Year:,687 Earth days,365.24 days
6,Temperature:,-87 to -5 °C,-88 to 58°C


In [15]:
df1 = mars_facts[1]
df1

Unnamed: 0,0,1
0,Equatorial Diameter:,"6,792 km"
1,Polar Diameter:,"6,752 km"
2,Mass:,6.39 × 10^23 kg (0.11 Earths)
3,Moons:,2 ( Phobos & Deimos )
4,Orbit Distance:,"227,943,824 km (1.38 AU)"
5,Orbit Period:,687 days (1.9 years)
6,Surface Temperature:,-87 to -5 °C
7,First Record:,2nd millennium BC
8,Recorded By:,Egyptian astronomers


In [293]:
#Make top row the column names 
new_header = df1.iloc[0] 
df1.columns = new_header

In [294]:
#convert to html string 
df1 = df1.to_html
df1

<bound method DataFrame.to_html of 0  Equatorial Diameter:                       6,792 km
0  Equatorial Diameter:                       6,792 km
1       Polar Diameter:                       6,752 km
2                 Mass:  6.39 × 10^23 kg (0.11 Earths)
3                Moons:          2 ( Phobos & Deimos )
4       Orbit Distance:       227,943,824 km (1.38 AU)
5         Orbit Period:           687 days (1.9 years)
6  Surface Temperature:                   -87 to -5 °C
7         First Record:              2nd millennium BC
8          Recorded By:           Egyptian astronomers>

## Scrape Mars Hemispheres

In [288]:
url = 'https://marshemispheres.com/'
browser.visit(url)

In [289]:
html = browser.html
hemisphere_soup = soup(html, 'html.parser')

hemispheres_all = hemisphere_soup.find_all('div', class_='description')

titles_list = []
img_links = []
counter = 0

hem_dicts = []

for x in range(4):
    browser.find_by_css('a.product-item img')[int(counter)].click()
    hemisphere_soup = soup(html, 'html.parser')
    
    #get titles
    title = hemisphere_soup.body.find_all('h3')[int(counter)].text
    print(title)
    titles_list.append(title)
    
  
    #click on Sample  
    elem = browser.links.find_by_text('Sample').first
    #finding image url 
    imgurl = elem['href']
    print(imgurl)
    img_links.append(imgurl)
    browser.back()
    counter = counter + 1
    
    hem_dicts.append({'title': title,
                      'imgurl': imgurl        
    })

Cerberus Hemisphere Enhanced
https://marshemispheres.com/images/full.jpg
Schiaparelli Hemisphere Enhanced
https://marshemispheres.com/images/schiaparelli_enhanced-full.jpg
Syrtis Major Hemisphere Enhanced
https://marshemispheres.com/images/syrtis_major_enhanced-full.jpg
Valles Marineris Hemisphere Enhanced
https://marshemispheres.com/images/valles_marineris_enhanced-full.jpg


In [278]:
titles_list

['Cerberus Hemisphere Enhanced',
 'Schiaparelli Hemisphere Enhanced',
 'Syrtis Major Hemisphere Enhanced',
 'Valles Marineris Hemisphere Enhanced']

In [279]:
img_links

['https://marshemispheres.com/images/full.jpg',
 'https://marshemispheres.com/images/schiaparelli_enhanced-full.jpg',
 'https://marshemispheres.com/images/syrtis_major_enhanced-full.jpg',
 'https://marshemispheres.com/images/valles_marineris_enhanced-full.jpg']

In [291]:
browser.quit()

In [290]:
hem_dicts

[{'title': 'Cerberus Hemisphere Enhanced',
  'imgurl': 'https://marshemispheres.com/images/full.jpg'},
 {'title': 'Schiaparelli Hemisphere Enhanced',
  'imgurl': 'https://marshemispheres.com/images/schiaparelli_enhanced-full.jpg'},
 {'title': 'Syrtis Major Hemisphere Enhanced',
  'imgurl': 'https://marshemispheres.com/images/syrtis_major_enhanced-full.jpg'},
 {'title': 'Valles Marineris Hemisphere Enhanced',
  'imgurl': 'https://marshemispheres.com/images/valles_marineris_enhanced-full.jpg'}]